Merge pull request #4245 from GoogleCloudPlatform/bigquery-b2
BigQuery Beta 2 Changes
diff --git a/bigquery/.coveragerc b/bigquery/.coveragerc
deleted file mode 100644
index d097511..0000000
--- a/bigquery/.coveragerc
+++ /dev/null
@@ -1,13 +0,0 @@
-[run]
-branch = True
-
-[report]
-fail_under = 100
-show_missing = True
-exclude_lines =
- # Re-enable the standard pragma
- pragma: NO COVER
- # Ignore debug-only repr
- def __repr__
- # Ignore abstract methods
- raise NotImplementedError
diff --git a/bigquery/README.rst b/bigquery/README.rst
deleted file mode 100644
index 01a1194..0000000
--- a/bigquery/README.rst
+++ /dev/null
@@ -1,117 +0,0 @@
-Python Client for Google BigQuery
-=================================
-
- Python idiomatic client for `Google BigQuery`_
-
-.. _Google BigQuery: https://cloud.google.com/bigquery/what-is-bigquery
-
-|pypi| |versions|
-
-- `Documentation`_
-
-.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html
-
-Quick Start
------------
-
-.. code-block:: console
-
- $ pip install --upgrade google-cloud-bigquery
-
-Fore more information on setting up your Python development environment, such as installing ``pip`` and on your system, please refer to `Python Development Environment Setup Guide`_ for Google Cloud Platform.
-
-.. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup
-
-Authentication
---------------
-
-With ``google-cloud-python`` we try to make authentication as painless as
-possible. Check out the `Authentication section`_ in our documentation to
-learn more. You may also find the `authentication document`_ shared by all
-the ``google-cloud-*`` libraries to be helpful.
-
-.. _Authentication section: https://google-cloud-python.readthedocs.io/en/latest/core/auth.html
-.. _authentication document: https://github.com/GoogleCloudPlatform/google-cloud-common/tree/master/authentication
-
-Using the API
--------------
-
-Querying massive datasets can be time consuming and expensive without the
-right hardware and infrastructure. Google `BigQuery`_ (`BigQuery API docs`_)
-solves this problem by enabling super-fast, SQL queries against
-append-mostly tables, using the processing power of Google's infrastructure.
-
-.. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery
-.. _BigQuery API docs: https://cloud.google.com/bigquery/docs/reference/v2/
-
-Create a dataset
-~~~~~~~~~~~~~~~~
-
-.. code:: python
-
- from google.cloud import bigquery
- from google.cloud.bigquery import Dataset
-
- client = bigquery.Client()
-
- dataset_ref = client.dataset('dataset_name')
- dataset = Dataset(dataset_ref)
- dataset.description = 'my dataset'
- dataset = client.create_dataset(dataset) # API request
-
-Load data from CSV
-~~~~~~~~~~~~~~~~~~
-
-.. code:: python
-
- import csv
-
- from google.cloud import bigquery
- from google.cloud.bigquery import LoadJobConfig
- from google.cloud.bigquery import SchemaField
-
- client = bigquery.Client()
-
- SCHEMA = [
- SchemaField('full_name', 'STRING', mode='required'),
- SchemaField('age', 'INTEGER', mode='required'),
- ]
- table_ref = client.dataset('dataset_name').table('table_name')
-
- load_config = LoadJobConfig()
- load_config.skip_leading_rows = 1
- load_config.schema = SCHEMA
-
- # Contents of csv_file.csv:
- # Name,Age
- # Tim,99
- with open('csv_file.csv', 'rb') as readable:
- client.load_table_from_file(
- readable, table_ref, job_config=load_config) # API request
-
-Perform a query
-~~~~~~~~~~~~~~~
-
-.. code:: python
-
- # Perform a query.
- QUERY = (
- 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
- 'WHERE state = "TX" '
- 'LIMIT 100')
- query_job = client.query(QUERY) # API request
- rows = query_job.result() # Waits for query to finish
-
- for row in rows:
- print(row.name)
-
-
-See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how
-to connect to BigQuery using this Client Library.
-
-.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html
-
-.. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg
- :target: https://pypi.org/project/google-cloud-bigquery/
-.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg
- :target: https://pypi.org/project/google-cloud-bigquery/
diff --git a/bigquery/google/cloud/bigquery/__init__.py b/bigquery/google/cloud/bigquery/__init__.py
deleted file mode 100644
index 4c3fcd7..0000000
--- a/bigquery/google/cloud/bigquery/__init__.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Google BigQuery API wrapper.
-
-The main concepts with this API are:
-
-- :class:`~google.cloud.bigquery.dataset.Dataset` represents a
- collection of tables.
-
-- :class:`~google.cloud.bigquery.table.Table` represents a single "relation".
-"""
-
-
-from pkg_resources import get_distribution
-__version__ = get_distribution('google-cloud-bigquery').version
-
-from google.cloud.bigquery._helpers import Row
-from google.cloud.bigquery._helpers import DEFAULT_RETRY
-from google.cloud.bigquery.client import Client
-from google.cloud.bigquery.dataset import AccessEntry
-from google.cloud.bigquery.dataset import Dataset
-from google.cloud.bigquery.dataset import DatasetReference
-from google.cloud.bigquery.job import CopyJobConfig
-from google.cloud.bigquery.job import ExtractJobConfig
-from google.cloud.bigquery.job import QueryJobConfig
-from google.cloud.bigquery.job import LoadJobConfig
-from google.cloud.bigquery.query import ArrayQueryParameter
-from google.cloud.bigquery.query import ScalarQueryParameter
-from google.cloud.bigquery.query import StructQueryParameter
-from google.cloud.bigquery.query import UDFResource
-from google.cloud.bigquery.schema import SchemaField
-from google.cloud.bigquery.table import Table
-from google.cloud.bigquery.table import TableReference
-from google.cloud.bigquery.external_config import ExternalConfig
-from google.cloud.bigquery.external_config import BigtableOptions
-from google.cloud.bigquery.external_config import BigtableColumnFamily
-from google.cloud.bigquery.external_config import BigtableColumn
-from google.cloud.bigquery.external_config import CSVOptions
-from google.cloud.bigquery.external_config import GoogleSheetsOptions
-
-__all__ = [
- '__version__',
- 'AccessEntry',
- 'ArrayQueryParameter',
- 'Client',
- 'Dataset',
- 'DatasetReference',
- 'CopyJobConfig',
- 'ExtractJobConfig',
- 'QueryJobConfig',
- 'Row',
- 'LoadJobConfig',
- 'ScalarQueryParameter',
- 'SchemaField',
- 'StructQueryParameter',
- 'Table',
- 'TableReference',
- 'UDFResource',
- 'DEFAULT_RETRY',
- 'ExternalConfig',
- 'BigtableOptions',
- 'BigtableColumnFamily',
- 'BigtableColumn',
- 'CSVOptions',
- 'GoogleSheetsOptions',
-]
diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py
deleted file mode 100644
index 1ba9233..0000000
--- a/bigquery/google/cloud/bigquery/_helpers.py
+++ /dev/null
@@ -1,562 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Shared helper functions for BigQuery API classes."""
-
-import base64
-import datetime
-import operator
-
-import six
-
-from google.api_core import retry
-from google.cloud._helpers import UTC
-from google.cloud._helpers import _date_from_iso8601_date
-from google.cloud._helpers import _datetime_from_microseconds
-from google.cloud._helpers import _microseconds_from_datetime
-from google.cloud._helpers import _RFC3339_NO_FRACTION
-from google.cloud._helpers import _time_from_iso8601_time_naive
-from google.cloud._helpers import _to_bytes
-
-_RFC3339_MICROS_NO_ZULU = '%Y-%m-%dT%H:%M:%S.%f'
-
-
-def _not_null(value, field):
- """Check whether 'value' should be coerced to 'field' type."""
- return value is not None or field.mode != 'NULLABLE'
-
-
-def _int_from_json(value, field):
- """Coerce 'value' to an int, if set or not nullable."""
- if _not_null(value, field):
- return int(value)
-
-
-def _float_from_json(value, field):
- """Coerce 'value' to a float, if set or not nullable."""
- if _not_null(value, field):
- return float(value)
-
-
-def _bool_from_json(value, field):
- """Coerce 'value' to a bool, if set or not nullable."""
- if _not_null(value, field):
- return value.lower() in ['t', 'true', '1']
-
-
-def _string_from_json(value, _):
- """NOOP string -> string coercion"""
- return value
-
-
-def _bytes_from_json(value, field):
- """Base64-decode value"""
- if _not_null(value, field):
- return base64.standard_b64decode(_to_bytes(value))
-
-
-def _timestamp_from_json(value, field):
- """Coerce 'value' to a datetime, if set or not nullable."""
- if _not_null(value, field):
- # value will be a float in seconds, to microsecond precision, in UTC.
- return _datetime_from_microseconds(1e6 * float(value))
-
-
-def _timestamp_query_param_from_json(value, field):
- """Coerce 'value' to a datetime, if set or not nullable.
-
- Args:
- value (str): The timestamp.
- field (.SchemaField): The field corresponding to the value.
-
- Returns:
- Optional[datetime.datetime]: The parsed datetime object from
- ``value`` if the ``field`` is not null (otherwise it is
- :data:`None`).
- """
- if _not_null(value, field):
- # Canonical formats for timestamps in BigQuery are flexible. See:
- # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type
- # The separator between the date and time can be 'T' or ' '.
- value = value.replace(' ', 'T', 1)
- # The UTC timezone may be formatted as Z or +00:00.
- value = value.replace('Z', '')
- value = value.replace('+00:00', '')
-
- if '.' in value:
- # YYYY-MM-DDTHH:MM:SS.ffffff
- return datetime.datetime.strptime(
- value, _RFC3339_MICROS_NO_ZULU).replace(tzinfo=UTC)
- else:
- # YYYY-MM-DDTHH:MM:SS
- return datetime.datetime.strptime(
- value, _RFC3339_NO_FRACTION).replace(tzinfo=UTC)
- else:
- return None
-
-
-def _datetime_from_json(value, field):
- """Coerce 'value' to a datetime, if set or not nullable.
-
- Args:
- value (str): The timestamp.
- field (.SchemaField): The field corresponding to the value.
-
- Returns:
- Optional[datetime.datetime]: The parsed datetime object from
- ``value`` if the ``field`` is not null (otherwise it is
- :data:`None`).
- """
- if _not_null(value, field):
- if '.' in value:
- # YYYY-MM-DDTHH:MM:SS.ffffff
- return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU)
- else:
- # YYYY-MM-DDTHH:MM:SS
- return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION)
- else:
- return None
-
-
-def _date_from_json(value, field):
- """Coerce 'value' to a datetime date, if set or not nullable"""
- if _not_null(value, field):
- # value will be a string, in YYYY-MM-DD form.
- return _date_from_iso8601_date(value)
-
-
-def _time_from_json(value, field):
- """Coerce 'value' to a datetime date, if set or not nullable"""
- if _not_null(value, field):
- # value will be a string, in HH:MM:SS form.
- return _time_from_iso8601_time_naive(value)
-
-
-def _record_from_json(value, field):
- """Coerce 'value' to a mapping, if set or not nullable."""
- if _not_null(value, field):
- record = {}
- record_iter = zip(field.fields, value['f'])
- for subfield, cell in record_iter:
- converter = _CELLDATA_FROM_JSON[subfield.field_type]
- if subfield.mode == 'REPEATED':
- value = [converter(item['v'], subfield) for item in cell['v']]
- else:
- value = converter(cell['v'], subfield)
- record[subfield.name] = value
- return record
-
-
-_CELLDATA_FROM_JSON = {
- 'INTEGER': _int_from_json,
- 'INT64': _int_from_json,
- 'FLOAT': _float_from_json,
- 'FLOAT64': _float_from_json,
- 'BOOLEAN': _bool_from_json,
- 'BOOL': _bool_from_json,
- 'STRING': _string_from_json,
- 'BYTES': _bytes_from_json,
- 'TIMESTAMP': _timestamp_from_json,
- 'DATETIME': _datetime_from_json,
- 'DATE': _date_from_json,
- 'TIME': _time_from_json,
- 'RECORD': _record_from_json,
-}
-
-_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON)
-_QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json
-
-
-class Row(object):
- """A BigQuery row.
-
- Values can be accessed by position (index), by key like a dict,
- or as properties.
-
- :type values: tuple
- :param values: the row values
-
- :type field_to_index: dict
- :param field_to_index: a mapping from schema field names to indexes
- """
-
- # Choose unusual field names to try to avoid conflict with schema fields.
- __slots__ = ('_xxx_values', '_xxx_field_to_index')
-
- def __init__(self, values, field_to_index):
- self._xxx_values = values
- self._xxx_field_to_index = field_to_index
-
- def values(self):
- return self._xxx_values
-
- def __getattr__(self, name):
- i = self._xxx_field_to_index.get(name)
- if i is None:
- raise AttributeError('no row field "%s"' % name)
- return self._xxx_values[i]
-
- def __len__(self):
- return len(self._xxx_values)
-
- def __getitem__(self, key):
- if isinstance(key, six.string_types):
- i = self._xxx_field_to_index.get(key)
- if i is None:
- raise KeyError('no row field "%s"' % key)
- key = i
- return self._xxx_values[key]
-
- def __eq__(self, other):
- if not isinstance(other, Row):
- return NotImplemented
- return(
- self._xxx_values == other._xxx_values and
- self._xxx_field_to_index == other._xxx_field_to_index)
-
- def __ne__(self, other):
- return not self == other
-
- def __repr__(self):
- # sort field dict by value, for determinism
- items = sorted(self._xxx_field_to_index.items(),
- key=operator.itemgetter(1))
- f2i = '{' + ', '.join('%r: %d' % i for i in items) + '}'
- return 'Row({}, {})'.format(self._xxx_values, f2i)
-
-
-def _field_to_index_mapping(schema):
- """Create a mapping from schema field name to index of field."""
- return {f.name: i for i, f in enumerate(schema)}
-
-
-def _row_tuple_from_json(row, schema):
- """Convert JSON row data to row with appropriate types.
-
- Note: ``row['f']`` and ``schema`` are presumed to be of the same length.
-
- :type row: dict
- :param row: A JSON response row to be converted.
-
- :type schema: tuple
- :param schema: A tuple of
- :class:`~google.cloud.bigquery.schema.SchemaField`.
-
- :rtype: tuple
- :returns: A tuple of data converted to native types.
- """
- row_data = []
- for field, cell in zip(schema, row['f']):
- converter = _CELLDATA_FROM_JSON[field.field_type]
- if field.mode == 'REPEATED':
- row_data.append([converter(item['v'], field)
- for item in cell['v']])
- else:
- row_data.append(converter(cell['v'], field))
-
- return tuple(row_data)
-
-
-def _rows_from_json(values, schema):
- """Convert JSON row data to rows with appropriate types."""
- field_to_index = _field_to_index_mapping(schema)
- return [Row(_row_tuple_from_json(r, schema), field_to_index)
- for r in values]
-
-
-def _int_to_json(value):
- """Coerce 'value' to an JSON-compatible representation."""
- if isinstance(value, int):
- value = str(value)
- return value
-
-
-def _float_to_json(value):
- """Coerce 'value' to an JSON-compatible representation."""
- return value
-
-
-def _bool_to_json(value):
- """Coerce 'value' to an JSON-compatible representation."""
- if isinstance(value, bool):
- value = 'true' if value else 'false'
- return value
-
-
-def _bytes_to_json(value):
- """Coerce 'value' to an JSON-compatible representation."""
- if isinstance(value, bytes):
- value = base64.standard_b64encode(value).decode('ascii')
- return value
-
-
-def _timestamp_to_json_parameter(value):
- """Coerce 'value' to an JSON-compatible representation.
-
- This version returns the string representation used in query parameters.
- """
- if isinstance(value, datetime.datetime):
- if value.tzinfo not in (None, UTC):
- # Convert to UTC and remove the time zone info.
- value = value.replace(tzinfo=None) - value.utcoffset()
- value = '%s %s+00:00' % (
- value.date().isoformat(), value.time().isoformat())
- return value
-
-
-def _timestamp_to_json_row(value):
- """Coerce 'value' to an JSON-compatible representation.
-
- This version returns floating-point seconds value used in row data.
- """
- if isinstance(value, datetime.datetime):
- value = _microseconds_from_datetime(value) * 1e-6
- return value
-
-
-def _datetime_to_json(value):
- """Coerce 'value' to an JSON-compatible representation."""
- if isinstance(value, datetime.datetime):
- value = value.strftime(_RFC3339_MICROS_NO_ZULU)
- return value
-
-
-def _date_to_json(value):
- """Coerce 'value' to an JSON-compatible representation."""
- if isinstance(value, datetime.date):
- value = value.isoformat()
- return value
-
-
-def _time_to_json(value):
- """Coerce 'value' to an JSON-compatible representation."""
- if isinstance(value, datetime.time):
- value = value.isoformat()
- return value
-
-
-# Converters used for scalar values marshalled as row data.
-_SCALAR_VALUE_TO_JSON_ROW = {
- 'INTEGER': _int_to_json,
- 'INT64': _int_to_json,
- 'FLOAT': _float_to_json,
- 'FLOAT64': _float_to_json,
- 'BOOLEAN': _bool_to_json,
- 'BOOL': _bool_to_json,
- 'BYTES': _bytes_to_json,
- 'TIMESTAMP': _timestamp_to_json_row,
- 'DATETIME': _datetime_to_json,
- 'DATE': _date_to_json,
- 'TIME': _time_to_json,
-}
-
-
-# Converters used for scalar values marshalled as query parameters.
-_SCALAR_VALUE_TO_JSON_PARAM = _SCALAR_VALUE_TO_JSON_ROW.copy()
-_SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter
-
-
-def _snake_to_camel_case(value):
- """Convert snake case string to camel case."""
- words = value.split('_')
- return words[0] + ''.join(map(str.capitalize, words[1:]))
-
-
-class _ApiResourceProperty(object):
- """Base property implementation.
-
- Values will be stored on a `_properties` helper attribute of the
- property's job instance.
-
- :type name: str
- :param name: name of the property
-
- :type resource_name: str
- :param resource_name: name of the property in the resource dictionary
- """
-
- def __init__(self, name, resource_name):
- self.name = name
- self.resource_name = resource_name
-
- def __get__(self, instance, owner):
- """Descriptor protocol: accessor"""
- if instance is None:
- return self
- return instance._properties.get(self.resource_name)
-
- def _validate(self, value):
- """Subclasses override to impose validation policy."""
- pass
-
- def __set__(self, instance, value):
- """Descriptor protocol: mutator"""
- self._validate(value)
- instance._properties[self.resource_name] = value
-
- def __delete__(self, instance):
- """Descriptor protocol: deleter"""
- del instance._properties[self.resource_name]
-
-
-class _TypedApiResourceProperty(_ApiResourceProperty):
- """Property implementation: validates based on value type.
-
- :type name: str
- :param name: name of the property
-
- :type resource_name: str
- :param resource_name: name of the property in the resource dictionary
-
- :type property_type: type or sequence of types
- :param property_type: type to be validated
- """
- def __init__(self, name, resource_name, property_type):
- super(_TypedApiResourceProperty, self).__init__(
- name, resource_name)
- self.property_type = property_type
-
- def _validate(self, value):
- """Ensure that 'value' is of the appropriate type.
-
- :raises: ValueError on a type mismatch.
- """
- if value is None:
- return
- if not isinstance(value, self.property_type):
- raise ValueError('Required type: %s' % (self.property_type,))
-
-
-class _ListApiResourceProperty(_ApiResourceProperty):
- """Property implementation: validates based on value type.
-
- :type name: str
- :param name: name of the property
-
- :type resource_name: str
- :param resource_name: name of the property in the resource dictionary
-
- :type property_type: type or sequence of types
- :param property_type: type to be validated
- """
- def __init__(self, name, resource_name, property_type):
- super(_ListApiResourceProperty, self).__init__(
- name, resource_name)
- self.property_type = property_type
-
- def __get__(self, instance, owner):
- """Descriptor protocol: accessor"""
- if instance is None:
- return self
- return instance._properties.get(self.resource_name, [])
-
- def _validate(self, value):
- """Ensure that 'value' is of the appropriate type.
-
- :raises: ValueError on a type mismatch.
- """
- if value is None:
- raise ValueError((
- 'Required type: list of {}. '
- 'To unset, use del or set to empty list').format(
- self.property_type,))
- if not all(isinstance(item, self.property_type) for item in value):
- raise ValueError(
- 'Required type: list of %s' % (self.property_type,))
-
-
-class _EnumApiResourceProperty(_ApiResourceProperty):
- """Pseudo-enumeration class.
-
- :type name: str
- :param name: name of the property.
-
- :type resource_name: str
- :param resource_name: name of the property in the resource dictionary
- """
-
-
-def _item_to_row(iterator, resource):
- """Convert a JSON row to the native object.
-
- .. note::
-
- This assumes that the ``schema`` attribute has been
- added to the iterator after being created, which
- should be done by the caller.
-
- :type iterator: :class:`~google.api_core.page_iterator.Iterator`
- :param iterator: The iterator that is currently in use.
-
- :type resource: dict
- :param resource: An item to be converted to a row.
-
- :rtype: :class:`Row`
- :returns: The next row in the page.
- """
- return Row(_row_tuple_from_json(resource, iterator.schema),
- iterator._field_to_index)
-
-
-# pylint: disable=unused-argument
-def _rows_page_start(iterator, page, response):
- """Grab total rows when :class:`~google.cloud.iterator.Page` starts.
-
- :type iterator: :class:`~google.api_core.page_iterator.Iterator`
- :param iterator: The iterator that is currently in use.
-
- :type page: :class:`~google.api_core.page_iterator.Page`
- :param page: The page that was just created.
-
- :type response: dict
- :param response: The JSON API response for a page of rows in a table.
- """
- total_rows = response.get('totalRows')
- if total_rows is not None:
- total_rows = int(total_rows)
- iterator.total_rows = total_rows
-# pylint: enable=unused-argument
-
-
-def _should_retry(exc):
- """Predicate for determining when to retry.
-
- We retry if and only if the 'reason' is 'backendError'
- or 'rateLimitExceeded'.
- """
- if not hasattr(exc, 'errors'):
- return False
- if len(exc.errors) == 0:
- return False
- reason = exc.errors[0]['reason']
- return reason == 'backendError' or reason == 'rateLimitExceeded'
-
-
-DEFAULT_RETRY = retry.Retry(predicate=_should_retry)
-"""The default retry object.
-
-Any method with a ``retry`` parameter will be retried automatically,
-with reasonable defaults. To disable retry, pass ``retry=None``.
-To modify the default retry behavior, call a ``with_XXX`` method
-on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds,
-pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``.
-"""
-
-
-def _int_or_none(value):
- """Helper: deserialize int value from JSON string."""
- if isinstance(value, int):
- return value
- if value is not None:
- return int(value)
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
deleted file mode 100644
index 712b218..0000000
--- a/bigquery/google/cloud/bigquery/client.py
+++ /dev/null
@@ -1,1357 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Client for interacting with the Google BigQuery API."""
-
-from __future__ import absolute_import
-
-import collections
-import functools
-import os
-import uuid
-
-import six
-
-from google import resumable_media
-from google.resumable_media.requests import MultipartUpload
-from google.resumable_media.requests import ResumableUpload
-
-from google.api_core import page_iterator
-
-from google.cloud import exceptions
-from google.cloud.client import ClientWithProject
-from google.cloud.bigquery._http import Connection
-from google.cloud.bigquery.dataset import Dataset
-from google.cloud.bigquery.dataset import DatasetReference
-from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
-from google.cloud.bigquery.table import TableReference
-from google.cloud.bigquery.table import _row_from_mapping
-from google.cloud.bigquery.job import CopyJob
-from google.cloud.bigquery.job import ExtractJob
-from google.cloud.bigquery.job import LoadJob
-from google.cloud.bigquery.job import QueryJob, QueryJobConfig
-from google.cloud.bigquery.query import QueryResults
-from google.cloud.bigquery._helpers import _item_to_row
-from google.cloud.bigquery._helpers import _rows_page_start
-from google.cloud.bigquery._helpers import _field_to_index_mapping
-from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW
-from google.cloud.bigquery._helpers import DEFAULT_RETRY
-from google.cloud.bigquery._helpers import _snake_to_camel_case
-
-
-_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB
-_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
-_DEFAULT_NUM_RETRIES = 6
-_BASE_UPLOAD_TEMPLATE = (
- u'https://www.googleapis.com/upload/bigquery/v2/projects/'
- u'{project}/jobs?uploadType=')
-_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart'
-_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable'
-_GENERIC_CONTENT_TYPE = u'*/*'
-_READ_LESS_THAN_SIZE = (
- 'Size {:d} was specified but the file-like object only had '
- '{:d} bytes remaining.')
-
-
-class Project(object):
- """Wrapper for resource describing a BigQuery project.
-
- :type project_id: str
- :param project_id: Opaque ID of the project
-
- :type numeric_id: int
- :param numeric_id: Numeric ID of the project
-
- :type friendly_name: str
- :param friendly_name: Display name of the project
- """
- def __init__(self, project_id, numeric_id, friendly_name):
- self.project_id = project_id
- self.numeric_id = numeric_id
- self.friendly_name = friendly_name
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct an instance from a resource dict."""
- return cls(
- resource['id'], resource['numericId'], resource['friendlyName'])
-
-
-class Client(ClientWithProject):
- """Client to bundle configuration needed for API requests.
-
- :type project: str
- :param project: the project which the client acts on behalf of. Will be
- passed when creating a dataset / job. If not passed,
- falls back to the default inferred from the environment.
-
- :type credentials: :class:`~google.auth.credentials.Credentials`
- :param credentials: (Optional) The OAuth2 Credentials to use for this
- client. If not passed (and if no ``_http`` object is
- passed), falls back to the default inferred from the
- environment.
-
- :type _http: :class:`~requests.Session`
- :param _http: (Optional) HTTP object to make requests. Can be any object
- that defines ``request()`` with the same interface as
- :meth:`requests.Session.request`. If not passed, an
- ``_http`` object is created that is bound to the
- ``credentials`` for the current object.
- This parameter should be considered private, and could
- change in the future.
- """
-
- SCOPE = ('https://www.googleapis.com/auth/bigquery',
- 'https://www.googleapis.com/auth/cloud-platform')
- """The scopes required for authenticating as a BigQuery consumer."""
-
- def __init__(self, project=None, credentials=None, _http=None):
- super(Client, self).__init__(
- project=project, credentials=credentials, _http=_http)
- self._connection = Connection(self)
-
- def list_projects(self, max_results=None, page_token=None,
- retry=DEFAULT_RETRY):
- """List projects for the project associated with this client.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list
-
- :type max_results: int
- :param max_results: maximum number of projects to return, If not
- passed, defaults to a value set by the API.
-
- :type page_token: str
- :param page_token: opaque marker for the next "page" of projects. If
- not passed, the API will return the first page of
- projects.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`~google.api_core.page_iterator.Iterator`
- :returns: Iterator of :class:`~google.cloud.bigquery.client.Project`
- accessible to the current client.
- """
- return page_iterator.HTTPIterator(
- client=self,
- api_request=functools.partial(self._call_api, retry),
- path='/projects',
- item_to_value=_item_to_project,
- items_key='projects',
- page_token=page_token,
- max_results=max_results)
-
- def list_datasets(self, include_all=False, filter=None, max_results=None,
- page_token=None, retry=DEFAULT_RETRY):
- """List datasets for the project associated with this client.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list
-
- :type include_all: bool
- :param include_all: True if results include hidden datasets.
-
- :type filter: str
- :param filter: an expression for filtering the results by label.
- For syntax, see
- https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter.
-
- :type max_results: int
- :param max_results: maximum number of datasets to return, If not
- passed, defaults to a value set by the API.
-
- :type page_token: str
- :param page_token: opaque marker for the next "page" of datasets. If
- not passed, the API will return the first page of
- datasets.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`~google.api_core.page_iterator.Iterator`
- :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`.
- accessible to the current client.
- """
- extra_params = {}
- if include_all:
- extra_params['all'] = True
- if filter:
- # TODO: consider supporting a dict of label -> value for filter,
- # and converting it into a string here.
- extra_params['filter'] = filter
- path = '/projects/%s/datasets' % (self.project,)
- return page_iterator.HTTPIterator(
- client=self,
- api_request=functools.partial(self._call_api, retry),
- path=path,
- item_to_value=_item_to_dataset,
- items_key='datasets',
- page_token=page_token,
- max_results=max_results,
- extra_params=extra_params)
-
- def dataset(self, dataset_id, project=None):
- """Construct a reference to a dataset.
-
- :type dataset_id: str
- :param dataset_id: ID of the dataset.
-
- :type project: str
- :param project: (Optional) project ID for the dataset (defaults to
- the project of the client).
-
- :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference`
- :returns: a new ``DatasetReference`` instance
- """
- if project is None:
- project = self.project
-
- return DatasetReference(project, dataset_id)
-
- def create_dataset(self, dataset):
- """API call: create the dataset via a PUT request.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert
-
- :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset`
- :param dataset: A ``Dataset`` populated with the desired initial state.
- If project is missing, it defaults to the project of
- the client.
-
- :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`"
- :returns: a new ``Dataset`` returned from the service.
- """
- path = '/projects/%s/datasets' % (dataset.project,)
- api_response = self._connection.api_request(
- method='POST', path=path, data=dataset._build_resource())
- return Dataset.from_api_repr(api_response)
-
- def create_table(self, table):
- """API call: create a table via a PUT request
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert
-
- :type table: :class:`~google.cloud.bigquery.table.Table`
- :param table: A ``Table`` populated with the desired initial state.
-
- :rtype: ":class:`~google.cloud.bigquery.table.Table`"
- :returns: a new ``Table`` returned from the service.
- """
- path = '/projects/%s/datasets/%s/tables' % (
- table.project, table.dataset_id)
- resource = table._build_resource(Table.all_fields)
- doomed = [field for field in resource if resource[field] is None]
- for field in doomed:
- del resource[field]
- api_response = self._connection.api_request(
- method='POST', path=path, data=resource)
- return Table.from_api_repr(api_response)
-
- def _call_api(self, retry, **kwargs):
- call = functools.partial(self._connection.api_request, **kwargs)
- if retry:
- call = retry(call)
- return call()
-
- def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY):
- """Fetch the dataset referenced by ``dataset_ref``
-
- :type dataset_ref:
- :class:`google.cloud.bigquery.dataset.DatasetReference`
- :param dataset_ref: the dataset to use.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.dataset.Dataset`
- :returns: a ``Dataset`` instance
- """
- api_response = self._call_api(retry,
- method='GET',
- path=dataset_ref.path)
- return Dataset.from_api_repr(api_response)
-
- def get_table(self, table_ref, retry=DEFAULT_RETRY):
- """Fetch the table referenced by ``table_ref``
-
- :type table_ref:
- :class:`google.cloud.bigquery.table.TableReference`
- :param table_ref: the table to use.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.table.Table`
- :returns: a ``Table`` instance
- """
- api_response = self._call_api(retry, method='GET', path=table_ref.path)
- return Table.from_api_repr(api_response)
-
- def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY):
- """Change some fields of a dataset.
-
- Use ``fields`` to specify which fields to update. At least one field
- must be provided. If a field is listed in ``fields`` and is ``None`` in
- ``dataset``, it will be deleted.
-
- If ``dataset.etag`` is not ``None``, the update will only
- succeed if the dataset on the server has the same ETag. Thus
- reading a dataset with ``get_dataset``, changing its fields,
- and then passing it ``update_dataset`` will ensure that the changes
- will only be saved if no modifications to the dataset occurred
- since the read.
-
- :type dataset: :class:`google.cloud.bigquery.dataset.Dataset`
- :param dataset: the dataset to update.
-
- :type fields: sequence of string
- :param fields: the fields of ``dataset`` to change, spelled as the
- Dataset properties (e.g. "friendly_name").
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.dataset.Dataset`
- :returns: the modified ``Dataset`` instance
- """
- path = '/projects/%s/datasets/%s' % (dataset.project,
- dataset.dataset_id)
- partial = {}
- for f in fields:
- if not hasattr(dataset, f):
- raise ValueError('No Dataset field %s' % f)
- # All dataset attributes are trivially convertible to JSON except
- # for access entries.
- if f == 'access_entries':
- attr = dataset._build_access_resource()
- api_field = 'access'
- else:
- attr = getattr(dataset, f)
- api_field = _snake_to_camel_case(f)
- partial[api_field] = attr
- if dataset.etag is not None:
- headers = {'If-Match': dataset.etag}
- else:
- headers = None
- api_response = self._call_api(
- retry, method='PATCH', path=path, data=partial, headers=headers)
- return Dataset.from_api_repr(api_response)
-
- def update_table(self, table, properties, retry=DEFAULT_RETRY):
- """API call: update table properties via a PUT request
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update
-
- :type table:
- :class:`google.cloud.bigquery.table.Table`
- :param table_ref: the table to update.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.table.Table`
- :returns: a ``Table`` instance
- """
- partial = table._build_resource(properties)
- if table.etag is not None:
- headers = {'If-Match': table.etag}
- else:
- headers = None
- api_response = self._call_api(
- retry,
- method='PATCH', path=table.path, data=partial, headers=headers)
- return Table.from_api_repr(api_response)
-
- def list_dataset_tables(self, dataset, max_results=None, page_token=None,
- retry=DEFAULT_RETRY):
- """List tables in the dataset.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
-
- :type dataset: One of:
- :class:`~google.cloud.bigquery.dataset.Dataset`
- :class:`~google.cloud.bigquery.dataset.DatasetReference`
- :param dataset: the dataset whose tables to list, or a reference to it.
-
- :type max_results: int
- :param max_results: (Optional) Maximum number of tables to return.
- If not passed, defaults to a value set by the API.
-
- :type page_token: str
- :param page_token: (Optional) Opaque marker for the next "page" of
- datasets. If not passed, the API will return the
- first page of datasets.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`~google.api_core.page_iterator.Iterator`
- :returns: Iterator of :class:`~google.cloud.bigquery.table.Table`
- contained within the current dataset.
- """
- if not isinstance(dataset, (Dataset, DatasetReference)):
- raise TypeError('dataset must be a Dataset or a DatasetReference')
- path = '%s/tables' % dataset.path
- result = page_iterator.HTTPIterator(
- client=self,
- api_request=functools.partial(self._call_api, retry),
- path=path,
- item_to_value=_item_to_table,
- items_key='tables',
- page_token=page_token,
- max_results=max_results)
- result.dataset = dataset
- return result
-
- def delete_dataset(self, dataset, retry=DEFAULT_RETRY):
- """Delete a dataset.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete
-
- :type dataset: One of:
- :class:`~google.cloud.bigquery.dataset.Dataset`
- :class:`~google.cloud.bigquery.dataset.DatasetReference`
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :param dataset: the dataset to delete, or a reference to it.
- """
- if not isinstance(dataset, (Dataset, DatasetReference)):
- raise TypeError('dataset must be a Dataset or a DatasetReference')
- self._call_api(retry, method='DELETE', path=dataset.path)
-
- def delete_table(self, table, retry=DEFAULT_RETRY):
- """Delete a table
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete
-
- :type table: One of:
- :class:`~google.cloud.bigquery.table.Table`
- :class:`~google.cloud.bigquery.table.TableReference`
- :param table: the table to delete, or a reference to it.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
- """
- if not isinstance(table, (Table, TableReference)):
- raise TypeError('table must be a Table or a TableReference')
- self._call_api(retry, method='DELETE', path=table.path)
-
- def _get_query_results(self, job_id, retry, project=None, timeout_ms=None):
- """Get the query results object for a query job.
-
- :type job_id: str
- :param job_id: Name of the query job.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :type project: str
- :param project:
- (Optional) project ID for the query job (defaults to the project of
- the client).
-
- :type timeout_ms: int
- :param timeout_ms:
- (Optional) number of milliseconds the the API call should wait for
- the query to complete before the request times out.
-
- :rtype: :class:`google.cloud.bigquery.query.QueryResults`
- :returns: a new ``QueryResults`` instance
- """
-
- extra_params = {'maxResults': 0}
-
- if project is None:
- project = self.project
-
- if timeout_ms is not None:
- extra_params['timeoutMs'] = timeout_ms
-
- path = '/projects/{}/queries/{}'.format(project, job_id)
-
- # This call is typically made in a polling loop that checks whether the
- # job is complete (from QueryJob.done(), called ultimately from
- # QueryJob.result()). So we don't need to poll here.
- resource = self._call_api(
- retry, method='GET', path=path, query_params=extra_params)
- return QueryResults.from_api_repr(resource)
-
- def job_from_resource(self, resource):
- """Detect correct job type from resource and instantiate.
-
- :type resource: dict
- :param resource: one job resource from API response
-
- :rtype: One of:
- :class:`google.cloud.bigquery.job.LoadJob`,
- :class:`google.cloud.bigquery.job.CopyJob`,
- :class:`google.cloud.bigquery.job.ExtractJob`,
- :class:`google.cloud.bigquery.job.QueryJob`,
- :class:`google.cloud.bigquery.job.RunSyncQueryJob`
- :returns: the job instance, constructed via the resource
- """
- config = resource['configuration']
- if 'load' in config:
- return LoadJob.from_api_repr(resource, self)
- elif 'copy' in config:
- return CopyJob.from_api_repr(resource, self)
- elif 'extract' in config:
- return ExtractJob.from_api_repr(resource, self)
- elif 'query' in config:
- return QueryJob.from_api_repr(resource, self)
- raise ValueError('Cannot parse job resource')
-
- def get_job(self, job_id, project=None, retry=DEFAULT_RETRY):
- """Fetch a job for the project associated with this client.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
-
- :type job_id: str
- :param job_id: Name of the job.
-
- :type project: str
- :param project:
- project ID owning the job (defaults to the client's project)
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`~google.cloud.bigquery.job._AsyncJob`
- :returns:
- Concrete job instance, based on the resource returned by the API.
- """
- extra_params = {'projection': 'full'}
-
- if project is None:
- project = self.project
-
- path = '/projects/{}/jobs/{}'.format(project, job_id)
-
- resource = self._call_api(
- retry, method='GET', path=path, query_params=extra_params)
-
- return self.job_from_resource(resource)
-
- def list_jobs(self, max_results=None, page_token=None, all_users=None,
- state_filter=None, retry=DEFAULT_RETRY):
- """List jobs for the project associated with this client.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list
-
- :type max_results: int
- :param max_results: maximum number of jobs to return, If not
- passed, defaults to a value set by the API.
-
- :type page_token: str
- :param page_token: opaque marker for the next "page" of jobs. If
- not passed, the API will return the first page of
- jobs.
-
- :type all_users: bool
- :param all_users: if true, include jobs owned by all users in the
- project.
-
- :type state_filter: str
- :param state_filter: if passed, include only jobs matching the given
- state. One of
-
- * ``"done"``
- * ``"pending"``
- * ``"running"``
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`~google.api_core.page_iterator.Iterator`
- :returns: Iterable of job instances.
- """
- extra_params = {'projection': 'full'}
-
- if all_users is not None:
- extra_params['allUsers'] = all_users
-
- if state_filter is not None:
- extra_params['stateFilter'] = state_filter
-
- path = '/projects/%s/jobs' % (self.project,)
- return page_iterator.HTTPIterator(
- client=self,
- api_request=functools.partial(self._call_api, retry),
- path=path,
- item_to_value=_item_to_job,
- items_key='jobs',
- page_token=page_token,
- max_results=max_results,
- extra_params=extra_params)
-
- def load_table_from_uri(self, source_uris, destination,
- job_id=None, job_id_prefix=None,
- job_config=None, retry=DEFAULT_RETRY):
- """Starts a job for loading data into a table from CloudStorage.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load
-
- :type source_uris: One of:
- str
- sequence of string
- :param source_uris: URIs of data files to be loaded; in format
- ``gs://<bucket_name>/<object_name_or_glob>``.
-
- :type destination: :class:`google.cloud.bigquery.table.TableReference`
- :param destination: Table into which data is to be loaded.
-
- :type job_id: str
- :param job_id: (Optional) Name of the job.
-
- :type job_id_prefix: str or ``NoneType``
- :param job_id_prefix: (Optional) the user-provided prefix for a
- randomly generated job ID. This parameter will be
- ignored if a ``job_id`` is also given.
-
- :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig`
- :param job_config: (Optional) Extra configuration options for the job.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.job.LoadJob`
- :returns: a new ``LoadJob`` instance
- """
- job_id = _make_job_id(job_id, job_id_prefix)
- if isinstance(source_uris, six.string_types):
- source_uris = [source_uris]
- job = LoadJob(job_id, source_uris, destination, self, job_config)
- job.begin(retry=retry)
- return job
-
- def load_table_from_file(self, file_obj, destination,
- rewind=False,
- size=None,
- num_retries=_DEFAULT_NUM_RETRIES,
- job_id=None, job_id_prefix=None, job_config=None):
- """Upload the contents of this table from a file-like object.
-
- Like load_table_from_uri, this creates, starts and returns
- a ``LoadJob``.
-
- :type file_obj: file
- :param file_obj: A file handle opened in binary mode for reading.
-
- :type destination: :class:`google.cloud.bigquery.table.TableReference`
- :param destination: Table into which data is to be loaded.
-
- :type rewind: bool
- :param rewind: If True, seek to the beginning of the file handle before
- reading the file.
-
- :type size: int
- :param size: The number of bytes to read from the file handle.
- If size is ``None`` or large, resumable upload will be
- used. Otherwise, multipart upload will be used.
-
- :type num_retries: int
- :param num_retries: Number of upload retries. Defaults to 6.
-
- :type job_id: str
- :param job_id: (Optional) Name of the job.
-
- :type job_id_prefix: str or ``NoneType``
- :param job_id_prefix: (Optional) the user-provided prefix for a
- randomly generated job ID. This parameter will be
- ignored if a ``job_id`` is also given.
-
- :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig`
- :param job_config: (Optional) Extra configuration options for the job.
-
- :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob`
-
- :returns: the job instance used to load the data (e.g., for
- querying status). Note that the job is already started:
- do not call ``job.begin()``.
- :raises: :class:`ValueError` if ``size`` is not passed in and can not
- be determined, or if the ``file_obj`` can be detected to be
- a file opened in text mode.
- """
- job_id = _make_job_id(job_id, job_id_prefix)
- job = LoadJob(job_id, None, destination, self, job_config)
- job_resource = job._build_resource()
- if rewind:
- file_obj.seek(0, os.SEEK_SET)
- _check_mode(file_obj)
- try:
- if size is None or size >= _MAX_MULTIPART_SIZE:
- response = self._do_resumable_upload(
- file_obj, job_resource, num_retries)
- else:
- response = self._do_multipart_upload(
- file_obj, job_resource, size, num_retries)
- except resumable_media.InvalidResponse as exc:
- raise exceptions.from_http_response(exc.response)
- return self.job_from_resource(response.json())
-
- def _do_resumable_upload(self, stream, metadata, num_retries):
- """Perform a resumable upload.
-
- :type stream: IO[bytes]
- :param stream: A bytes IO object open for reading.
-
- :type metadata: dict
- :param metadata: The metadata associated with the upload.
-
- :type num_retries: int
- :param num_retries: Number of upload retries. (Deprecated: This
- argument will be removed in a future release.)
-
- :rtype: :class:`~requests.Response`
- :returns: The "200 OK" response object returned after the final chunk
- is uploaded.
- """
- upload, transport = self._initiate_resumable_upload(
- stream, metadata, num_retries)
-
- while not upload.finished:
- response = upload.transmit_next_chunk(transport)
-
- return response
-
- def _initiate_resumable_upload(self, stream, metadata, num_retries):
- """Initiate a resumable upload.
-
- :type stream: IO[bytes]
- :param stream: A bytes IO object open for reading.
-
- :type metadata: dict
- :param metadata: The metadata associated with the upload.
-
- :type num_retries: int
- :param num_retries: Number of upload retries. (Deprecated: This
- argument will be removed in a future release.)
-
- :rtype: tuple
- :returns:
- Pair of
-
- * The :class:`~google.resumable_media.requests.ResumableUpload`
- that was created
- * The ``transport`` used to initiate the upload.
- """
- chunk_size = _DEFAULT_CHUNKSIZE
- transport = self._http
- headers = _get_upload_headers(self._connection.USER_AGENT)
- upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project)
- # TODO: modify ResumableUpload to take a retry.Retry object
- # that it can use for the initial RPC.
- upload = ResumableUpload(upload_url, chunk_size, headers=headers)
-
- if num_retries is not None:
- upload._retry_strategy = resumable_media.RetryStrategy(
- max_retries=num_retries)
-
- upload.initiate(
- transport, stream, metadata, _GENERIC_CONTENT_TYPE,
- stream_final=False)
-
- return upload, transport
-
- def _do_multipart_upload(self, stream, metadata, size, num_retries):
- """Perform a multipart upload.
-
- :type stream: IO[bytes]
- :param stream: A bytes IO object open for reading.
-
- :type metadata: dict
- :param metadata: The metadata associated with the upload.
-
- :type size: int
- :param size: The number of bytes to be uploaded (which will be read
- from ``stream``). If not provided, the upload will be
- concluded once ``stream`` is exhausted (or :data:`None`).
-
- :type num_retries: int
- :param num_retries: Number of upload retries. (Deprecated: This
- argument will be removed in a future release.)
-
- :rtype: :class:`~requests.Response`
- :returns: The "200 OK" response object returned after the multipart
- upload request.
- :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size``
- bytes remaining.
- """
- data = stream.read(size)
- if len(data) < size:
- msg = _READ_LESS_THAN_SIZE.format(size, len(data))
- raise ValueError(msg)
-
- headers = _get_upload_headers(self._connection.USER_AGENT)
-
- upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project)
- upload = MultipartUpload(upload_url, headers=headers)
-
- if num_retries is not None:
- upload._retry_strategy = resumable_media.RetryStrategy(
- max_retries=num_retries)
-
- response = upload.transmit(
- self._http, data, metadata, _GENERIC_CONTENT_TYPE)
-
- return response
-
- def copy_table(self, sources, destination, job_id=None, job_id_prefix=None,
- job_config=None, retry=DEFAULT_RETRY):
- """Start a job for copying one or more tables into another table.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy
-
- :type sources: One of:
- :class:`~google.cloud.bigquery.table.TableReference`
- sequence of
- :class:`~google.cloud.bigquery.table.TableReference`
- :param sources: Table or tables to be copied.
-
-
- :type destination: :class:`google.cloud.bigquery.table.TableReference`
- :param destination: Table into which data is to be copied.
-
- :type job_id: str
- :param job_id: (Optional) The ID of the job.
-
- :type job_id_prefix: str or ``NoneType``
- :param job_id_prefix: (Optional) the user-provided prefix for a
- randomly generated job ID. This parameter will be
- ignored if a ``job_id`` is also given.
-
- :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig`
- :param job_config: (Optional) Extra configuration options for the job.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.job.CopyJob`
- :returns: a new ``CopyJob`` instance
- """
- job_id = _make_job_id(job_id, job_id_prefix)
-
- if not isinstance(sources, collections.Sequence):
- sources = [sources]
- job = CopyJob(job_id, sources, destination, client=self,
- job_config=job_config)
- job.begin(retry=retry)
- return job
-
- def extract_table(
- self, source, destination_uris, job_config=None, job_id=None,
- job_id_prefix=None, retry=DEFAULT_RETRY):
- """Start a job to extract a table into Cloud Storage files.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
-
- :type source: :class:`google.cloud.bigquery.table.TableReference`
- :param source: table to be extracted.
-
- :type destination_uris: One of:
- str or
- sequence of str
- :param destination_uris:
- URIs of Cloud Storage file(s) into which table data is to be
- extracted; in format ``gs://<bucket_name>/<object_name_or_glob>``.
-
- :type kwargs: dict
- :param kwargs: Additional keyword arguments.
-
- :type job_id: str
- :param job_id: (Optional) The ID of the job.
-
- :type job_id_prefix: str or ``NoneType``
- :param job_id_prefix: (Optional) the user-provided prefix for a
- randomly generated job ID. This parameter will be
- ignored if a ``job_id`` is also given.
-
- :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig`
- :param job_config: (Optional) Extra configuration options for the job.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.job.ExtractJob`
- :returns: a new ``ExtractJob`` instance
- """
- job_id = _make_job_id(job_id, job_id_prefix)
-
- if isinstance(destination_uris, six.string_types):
- destination_uris = [destination_uris]
-
- job = ExtractJob(
- job_id, source, destination_uris, client=self,
- job_config=job_config)
- job.begin(retry=retry)
- return job
-
- def query(self, query, job_config=None, job_id=None, job_id_prefix=None,
- retry=DEFAULT_RETRY):
- """Start a job that runs a SQL query.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query
-
- :type query: str
- :param query:
- SQL query to be executed. Defaults to the standard SQL dialect.
- Use the ``job_config`` parameter to change dialects.
-
- :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig`
- :param job_config: (Optional) Extra configuration options for the job.
-
- :type job_id: str
- :param job_id: (Optional) ID to use for the query job.
-
- :type job_id_prefix: str or ``NoneType``
- :param job_id_prefix: (Optional) the user-provided prefix for a
- randomly generated job ID. This parameter will be
- ignored if a ``job_id`` is also given.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`google.cloud.bigquery.job.QueryJob`
- :returns: a new ``QueryJob`` instance
- """
- job_id = _make_job_id(job_id, job_id_prefix)
- job = QueryJob(job_id, query, client=self, job_config=job_config)
- job.begin(retry=retry)
- return job
-
- def create_rows(self, table, rows, selected_fields=None, **kwargs):
- """API call: insert table data via a POST request
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
-
- :type table: One of:
- :class:`~google.cloud.bigquery.table.Table`
- :class:`~google.cloud.bigquery.table.TableReference`
- :param table: the destination table for the row data, or a reference
- to it.
-
- :type rows: One of:
- list of tuples
- list of dictionaries
- :param rows: Row data to be inserted. If a list of tuples is given,
- each tuple should contain data for each schema field on
- the current table and in the same order as the schema
- fields. If a list of dictionaries is given, the keys must
- include all required fields in the schema. Keys which do
- not correspond to a field in the schema are ignored.
-
- :type selected_fields: list of :class:`SchemaField`
- :param selected_fields:
- The fields to return. Required if ``table`` is a
- :class:`~google.cloud.bigquery.table.TableReference`.
-
- :type kwargs: dict
- :param kwargs: Keyword arguments to
- `~google.cloud.bigquery.client.Client.create_rows_json`
-
- :rtype: list of mappings
- :returns: One mapping per row with insert errors: the "index" key
- identifies the row, and the "errors" key contains a list
- of the mappings describing one or more problems with the
- row.
- :raises: ValueError if table's schema is not set
- """
- if selected_fields is not None:
- schema = selected_fields
- elif isinstance(table, TableReference):
- raise ValueError('need selected_fields with TableReference')
- elif isinstance(table, Table):
- if len(table._schema) == 0:
- raise ValueError(_TABLE_HAS_NO_SCHEMA)
- schema = table.schema
- else:
- raise TypeError('table should be Table or TableReference')
-
- json_rows = []
-
- for index, row in enumerate(rows):
- if isinstance(row, dict):
- row = _row_from_mapping(row, schema)
- json_row = {}
-
- for field, value in zip(schema, row):
- converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
- if converter is not None: # STRING doesn't need converting
- value = converter(value)
- json_row[field.name] = value
-
- json_rows.append(json_row)
-
- return self.create_rows_json(table, json_rows, **kwargs)
-
- def create_rows_json(self, table, json_rows, row_ids=None,
- skip_invalid_rows=None, ignore_unknown_values=None,
- template_suffix=None, retry=DEFAULT_RETRY):
- """API call: insert table data via a POST request
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
-
- :type table: One of:
- :class:`~google.cloud.bigquery.table.Table`
- :class:`~google.cloud.bigquery.table.TableReference`
- :param table: the destination table for the row data, or a reference
- to it.
-
- :type json_rows: list of dictionaries
- :param json_rows: Row data to be inserted. Keys must match the table
- schema fields and values must be JSON-compatible
- representations.
-
- :type row_ids: list of string
- :param row_ids: (Optional) Unique ids, one per row being inserted.
- If omitted, unique IDs are created.
-
- :type skip_invalid_rows: bool
- :param skip_invalid_rows: (Optional) Insert all valid rows of a
- request, even if invalid rows exist.
- The default value is False, which causes
- the entire request to fail if any invalid
- rows exist.
-
- :type ignore_unknown_values: bool
- :param ignore_unknown_values: (Optional) Accept rows that contain
- values that do not match the schema.
- The unknown values are ignored. Default
- is False, which treats unknown values as
- errors.
-
- :type template_suffix: str
- :param template_suffix:
- (Optional) treat ``name`` as a template table and provide a suffix.
- BigQuery will create the table ``<name> + <template_suffix>`` based
- on the schema of the template table. See
- https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: list of mappings
- :returns: One mapping per row with insert errors: the "index" key
- identifies the row, and the "errors" key contains a list
- of the mappings describing one or more problems with the
- row.
- """
- rows_info = []
- data = {'rows': rows_info}
-
- for index, row in enumerate(json_rows):
- info = {'json': row}
- if row_ids is not None:
- info['insertId'] = row_ids[index]
- else:
- info['insertId'] = str(uuid.uuid4())
- rows_info.append(info)
-
- if skip_invalid_rows is not None:
- data['skipInvalidRows'] = skip_invalid_rows
-
- if ignore_unknown_values is not None:
- data['ignoreUnknownValues'] = ignore_unknown_values
-
- if template_suffix is not None:
- data['templateSuffix'] = template_suffix
-
- # We can always retry, because every row has an insert ID.
- response = self._call_api(
- retry,
- method='POST',
- path='%s/insertAll' % table.path,
- data=data)
- errors = []
-
- for error in response.get('insertErrors', ()):
- errors.append({'index': int(error['index']),
- 'errors': error['errors']})
-
- return errors
-
- def query_rows(self, query, job_config=None, job_id=None, timeout=None,
- retry=DEFAULT_RETRY):
- """Start a query job and wait for the results.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query
-
- :type query: str
- :param query:
- SQL query to be executed. Defaults to the standard SQL dialect.
- Use the ``job_config`` parameter to change dialects.
-
- :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig`
- :param job_config: (Optional) Extra configuration options for the job.
-
- :type job_id: str
- :param job_id: (Optional) ID to use for the query job.
-
- :type timeout: float
- :param timeout:
- (Optional) How long (in seconds) to wait for job to complete
- before raising a :class:`TimeoutError`.
-
- :rtype: :class:`~google.api_core.page_iterator.Iterator`
- :returns:
- Iterator of row data :class:`tuple`s. During each page, the
- iterator will have the ``total_rows`` attribute set, which counts
- the total number of rows **in the result set** (this is distinct
- from the total number of rows in the current page:
- ``iterator.page.num_items``).
-
- :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job
- failed or :class:`TimeoutError` if the job did not complete in the
- given timeout.
- """
- job = self.query(
- query, job_config=job_config, job_id=job_id, retry=retry)
- return job.result(timeout=timeout)
-
- def list_rows(self, table, selected_fields=None, max_results=None,
- page_token=None, start_index=None, retry=DEFAULT_RETRY):
- """List the rows of the table.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list
-
- .. note::
-
- This method assumes that the provided schema is up-to-date with the
- schema as defined on the back-end: if the two schemas are not
- identical, the values returned may be incomplete. To ensure that the
- local copy of the schema is up-to-date, call ``client.get_table``.
-
- :type table: One of:
- :class:`~google.cloud.bigquery.table.Table`
- :class:`~google.cloud.bigquery.table.TableReference`
- :param table: the table to list, or a reference to it.
-
- :type selected_fields: list of :class:`SchemaField`
- :param selected_fields:
- The fields to return. Required if ``table`` is a
- :class:`~google.cloud.bigquery.table.TableReference`.
-
- :type max_results: int
- :param max_results: maximum number of rows to return.
-
- :type page_token: str
- :param page_token: (Optional) Token representing a cursor into the
- table's rows.
-
- :type start_index: int
- :param page_token: (Optional) The zero-based index of the starting
- row to read.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`~google.api_core.page_iterator.Iterator`
- :returns: Iterator of row data :class:`tuple`s. During each page, the
- iterator will have the ``total_rows`` attribute set,
- which counts the total number of rows **in the table**
- (this is distinct from the total number of rows in the
- current page: ``iterator.page.num_items``).
-
- """
- if selected_fields is not None:
- schema = selected_fields
- elif isinstance(table, TableReference):
- raise ValueError('need selected_fields with TableReference')
- elif isinstance(table, Table):
- if len(table._schema) == 0:
- raise ValueError(_TABLE_HAS_NO_SCHEMA)
- schema = table.schema
- else:
- raise TypeError('table should be Table or TableReference')
-
- params = {}
- if selected_fields is not None:
- params['selectedFields'] = ','.join(
- field.name for field in selected_fields)
-
- if start_index is not None:
- params['startIndex'] = start_index
-
- iterator = page_iterator.HTTPIterator(
- client=self,
- api_request=functools.partial(self._call_api, retry),
- path='%s/data' % (table.path,),
- item_to_value=_item_to_row,
- items_key='rows',
- page_token=page_token,
- next_token='pageToken',
- max_results=max_results,
- page_start=_rows_page_start,
- extra_params=params)
- iterator.schema = schema
- iterator._field_to_index = _field_to_index_mapping(schema)
- return iterator
-
- def list_partitions(self, table, retry=DEFAULT_RETRY):
- """List the partitions in a table.
-
- :type table: One of:
- :class:`~google.cloud.bigquery.table.Table`
- :class:`~google.cloud.bigquery.table.TableReference`
- :param table: the table to list, or a reference to it.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: list
- :returns: a list of time partitions
- """
- config = QueryJobConfig()
- config.use_legacy_sql = True # required for '$' syntax
- rows = self.query_rows(
- 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' %
- (table.project, table.dataset_id, table.table_id),
- job_config=config,
- retry=retry)
- return [row[0] for row in rows]
-
-
-# pylint: disable=unused-argument
-def _item_to_project(iterator, resource):
- """Convert a JSON project to the native object.
-
- :type iterator: :class:`~google.api_core.page_iterator.Iterator`
- :param iterator: The iterator that is currently in use.
-
- :type resource: dict
- :param resource: An item to be converted to a project.
-
- :rtype: :class:`.Project`
- :returns: The next project in the page.
- """
- return Project.from_api_repr(resource)
-# pylint: enable=unused-argument
-
-
-def _item_to_dataset(iterator, resource):
- """Convert a JSON dataset to the native object.
-
- :type iterator: :class:`~google.api_core.page_iterator.Iterator`
- :param iterator: The iterator that is currently in use.
-
- :type resource: dict
- :param resource: An item to be converted to a dataset.
-
- :rtype: :class:`.Dataset`
- :returns: The next dataset in the page.
- """
- return Dataset.from_api_repr(resource)
-
-
-def _item_to_job(iterator, resource):
- """Convert a JSON job to the native object.
-
- :type iterator: :class:`~google.api_core.page_iterator.Iterator`
- :param iterator: The iterator that is currently in use.
-
- :type resource: dict
- :param resource: An item to be converted to a job.
-
- :rtype: job instance.
- :returns: The next job in the page.
- """
- return iterator.client.job_from_resource(resource)
-
-
-def _item_to_table(iterator, resource):
- """Convert a JSON table to the native object.
-
- :type iterator: :class:`~google.api_core.page_iterator.Iterator`
- :param iterator: The iterator that is currently in use.
-
- :type resource: dict
- :param resource: An item to be converted to a table.
-
- :rtype: :class:`~google.cloud.bigquery.table.Table`
- :returns: The next table in the page.
- """
- return Table.from_api_repr(resource)
-
-
-def _make_job_id(job_id, prefix=None):
- """Construct an ID for a new job.
-
- :type job_id: str or ``NoneType``
- :param job_id: the user-provided job ID
-
- :type prefix: str or ``NoneType``
- :param prefix: (Optional) the user-provided prefix for a job ID
-
- :rtype: str
- :returns: A job ID
- """
- if job_id is not None:
- return job_id
- elif prefix is not None:
- return str(prefix) + str(uuid.uuid4())
- else:
- return str(uuid.uuid4())
-
-
-def _check_mode(stream):
- """Check that a stream was opened in read-binary mode.
-
- :type stream: IO[bytes]
- :param stream: A bytes IO object open for reading.
-
- :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute
- and is not among ``rb``, ``r+b`` or ``rb+``.
- """
- mode = getattr(stream, 'mode', None)
-
- if mode is not None and mode not in ('rb', 'r+b', 'rb+'):
- raise ValueError(
- "Cannot upload files opened in text mode: use "
- "open(filename, mode='rb') or open(filename, mode='r+b')")
-
-
-def _get_upload_headers(user_agent):
- """Get the headers for an upload request.
-
- :type user_agent: str
- :param user_agent: The user-agent for requests.
-
- :rtype: dict
- :returns: The headers to be used for the request.
- """
- return {
- 'Accept': 'application/json',
- 'Accept-Encoding': 'gzip, deflate',
- 'User-Agent': user_agent,
- 'content-type': 'application/json',
- }
diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
deleted file mode 100644
index e464fcf..0000000
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ /dev/null
@@ -1,536 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Define API Datasets."""
-
-from __future__ import absolute_import
-
-import six
-
-from google.cloud._helpers import _datetime_from_microseconds
-from google.cloud.bigquery.table import TableReference
-
-
-class AccessEntry(object):
- """Represent grant of an access role to an entity.
-
- Every entry in the access list will have exactly one of
- ``userByEmail``, ``groupByEmail``, ``domain``, ``specialGroup`` or
- ``view`` set. And if anything but ``view`` is set, it'll also have a
- ``role`` specified. ``role`` is omitted for a ``view``, since
- ``view`` s are always read-only.
-
- See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets.
-
- :type role: str
- :param role: Role granted to the entity. One of
-
- * ``'OWNER'``
- * ``'WRITER'``
- * ``'READER'``
-
- May also be ``None`` if the ``entity_type`` is ``view``.
-
- :type entity_type: str
- :param entity_type: Type of entity being granted the role. One of
- :attr:`ENTITY_TYPES`.
-
- :type entity_id: str
- :param entity_id: ID of entity being granted the role.
-
- :raises: :class:`ValueError` if the ``entity_type`` is not among
- :attr:`ENTITY_TYPES`, or if a ``view`` has ``role`` set or
- a non ``view`` **does not** have a ``role`` set.
- """
-
- ENTITY_TYPES = frozenset(['userByEmail', 'groupByEmail', 'domain',
- 'specialGroup', 'view'])
- """Allowed entity types."""
-
- def __init__(self, role, entity_type, entity_id):
- if entity_type not in self.ENTITY_TYPES:
- message = 'Entity type %r not among: %s' % (
- entity_type, ', '.join(self.ENTITY_TYPES))
- raise ValueError(message)
- if entity_type == 'view':
- if role is not None:
- raise ValueError('Role must be None for a view. Received '
- 'role: %r' % (role,))
- else:
- if role is None:
- raise ValueError('Role must be set for entity '
- 'type %r' % (entity_type,))
-
- self.role = role
- self.entity_type = entity_type
- self.entity_id = entity_id
-
- def __eq__(self, other):
- if not isinstance(other, AccessEntry):
- return NotImplemented
- return (
- self.role == other.role and
- self.entity_type == other.entity_type and
- self.entity_id == other.entity_id)
-
- def __ne__(self, other):
- return not self == other
-
- def __repr__(self):
- return '<AccessEntry: role=%s, %s=%s>' % (
- self.role, self.entity_type, self.entity_id)
-
-
-class DatasetReference(object):
- """DatasetReferences are pointers to datasets.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
-
- :type project: str
- :param project: the ID of the project
-
- :type dataset_id: str
- :param dataset_id: the ID of the dataset
- """
-
- def __init__(self, project, dataset_id):
- if not isinstance(project, six.string_types):
- raise ValueError("Pass a string for project")
- if not isinstance(dataset_id, six.string_types):
- raise ValueError("Pass a string for dataset_id")
- self._project = project
- self._dataset_id = dataset_id
-
- @property
- def project(self):
- """Project ID of the dataset.
-
- :rtype: str
- :returns: the project ID.
- """
- return self._project
-
- @property
- def dataset_id(self):
- """Dataset ID.
-
- :rtype: str
- :returns: the dataset ID.
- """
- return self._dataset_id
-
- @property
- def path(self):
- """URL path for the dataset's APIs.
-
- :rtype: str
- :returns: the path based on project and dataset name.
- """
- return '/projects/%s/datasets/%s' % (self.project, self.dataset_id)
-
- def table(self, table_id):
- """Constructs a TableReference.
-
- :type table_id: str
- :param table_id: the ID of the table.
-
- :rtype: :class:`google.cloud.bigquery.table.TableReference`
- :returns: a TableReference for a table in this dataset.
- """
- return TableReference(self, table_id)
-
- @classmethod
- def from_api_repr(cls, resource):
- project = resource['projectId']
- dataset_id = resource['datasetId']
- return cls(project, dataset_id)
-
- def to_api_repr(self):
- return {
- 'projectId': self._project,
- 'datasetId': self._dataset_id,
- }
-
- def _key(self):
- """A tuple key that uniquely describes this field.
-
- Used to compute this instance's hashcode and evaluate equality.
-
- Returns:
- tuple: The contents of this :class:`DatasetReference`.
- """
- return (
- self._project,
- self._dataset_id,
- )
-
- def __eq__(self, other):
- if not isinstance(other, DatasetReference):
- return NotImplemented
- return self._key() == other._key()
-
- def __ne__(self, other):
- return not self == other
-
- def __hash__(self):
- return hash(self._key())
-
- def __repr__(self):
- return 'DatasetReference{}'.format(self._key())
-
-
-class Dataset(object):
- """Datasets are containers for tables.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
-
- :type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference`
- :param dataset_ref: a pointer to a dataset
- """
-
- def __init__(self, dataset_ref):
- self._project = dataset_ref.project
- self._dataset_id = dataset_ref.dataset_id
- self._properties = {'labels': {}}
- self._access_entries = ()
-
- @property
- def project(self):
- """Project bound to the dataset.
-
- :rtype: str
- :returns: the project.
- """
- return self._project
-
- @property
- def path(self):
- """URL path for the dataset's APIs.
-
- :rtype: str
- :returns: the path based on project and dataset ID.
- """
- return '/projects/%s/datasets/%s' % (self.project, self.dataset_id)
-
- @property
- def access_entries(self):
- """Dataset's access entries.
-
- :rtype: list of :class:`AccessEntry`
- :returns: roles granted to entities for this dataset
- """
- return list(self._access_entries)
-
- @access_entries.setter
- def access_entries(self, value):
- """Update dataset's access entries
-
- :type value: list of :class:`AccessEntry`
- :param value: roles granted to entities for this dataset
-
- :raises: TypeError if 'value' is not a sequence, or ValueError if
- any item in the sequence is not an AccessEntry
- """
- if not all(isinstance(field, AccessEntry) for field in value):
- raise ValueError('Values must be AccessEntry instances')
- self._access_entries = tuple(value)
-
- @property
- def created(self):
- """Datetime at which the dataset was created.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the creation time (None until set from the server).
- """
- creation_time = self._properties.get('creationTime')
- if creation_time is not None:
- # creation_time will be in milliseconds.
- return _datetime_from_microseconds(1000.0 * creation_time)
-
- @property
- def dataset_id(self):
- """Dataset ID.
-
- :rtype: str
- :returns: the dataset ID.
- """
- return self._dataset_id
-
- @property
- def full_dataset_id(self):
- """ID for the dataset resource, in the form "project_id:dataset_id".
-
- :rtype: str, or ``NoneType``
- :returns: the ID (None until set from the server).
- """
- return self._properties.get('id')
-
- @property
- def etag(self):
- """ETag for the dataset resource.
-
- :rtype: str, or ``NoneType``
- :returns: the ETag (None until set from the server).
- """
- return self._properties.get('etag')
-
- @property
- def modified(self):
- """Datetime at which the dataset was last modified.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the modification time (None until set from the server).
- """
- modified_time = self._properties.get('lastModifiedTime')
- if modified_time is not None:
- # modified_time will be in milliseconds.
- return _datetime_from_microseconds(1000.0 * modified_time)
-
- @property
- def self_link(self):
- """URL for the dataset resource.
-
- :rtype: str, or ``NoneType``
- :returns: the URL (None until set from the server).
- """
- return self._properties.get('selfLink')
-
- @property
- def default_table_expiration_ms(self):
- """Default expiration time for tables in the dataset.
-
- :rtype: int, or ``NoneType``
- :returns: The time in milliseconds, or None (the default).
- """
- return self._properties.get('defaultTableExpirationMs')
-
- @default_table_expiration_ms.setter
- def default_table_expiration_ms(self, value):
- """Update default expiration time for tables in the dataset.
-
- :type value: int
- :param value: (Optional) new default time, in milliseconds
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.integer_types) and value is not None:
- raise ValueError("Pass an integer, or None")
- self._properties['defaultTableExpirationMs'] = value
-
- @property
- def description(self):
- """Description of the dataset.
-
- :rtype: str, or ``NoneType``
- :returns: The description as set by the user, or None (the default).
- """
- return self._properties.get('description')
-
- @description.setter
- def description(self, value):
- """Update description of the dataset.
-
- :type value: str
- :param value: (Optional) new description
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.string_types) and value is not None:
- raise ValueError("Pass a string, or None")
- self._properties['description'] = value
-
- @property
- def friendly_name(self):
- """Title of the dataset.
-
- :rtype: str, or ``NoneType``
- :returns: The name as set by the user, or None (the default).
- """
- return self._properties.get('friendlyName')
-
- @friendly_name.setter
- def friendly_name(self, value):
- """Update title of the dataset.
-
- :type value: str
- :param value: (Optional) new title
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.string_types) and value is not None:
- raise ValueError("Pass a string, or None")
- self._properties['friendlyName'] = value
-
- @property
- def location(self):
- """Location in which the dataset is hosted.
-
- :rtype: str, or ``NoneType``
- :returns: The location as set by the user, or None (the default).
- """
- return self._properties.get('location')
-
- @location.setter
- def location(self, value):
- """Update location in which the dataset is hosted.
-
- :type value: str
- :param value: (Optional) new location
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.string_types) and value is not None:
- raise ValueError("Pass a string, or None")
- self._properties['location'] = value
-
- @property
- def labels(self):
- """Labels for the dataset.
-
- This method always returns a dict. To change a dataset's labels,
- modify the dict, then call ``Client.update_dataset``. To delete a
- label, set its value to ``None`` before updating.
-
- :rtype: dict, {str -> str}
- :returns: A dict of the the dataset's labels.
- """
- return self._properties['labels']
-
- @labels.setter
- def labels(self, value):
- """Update labels for the dataset.
-
- :type value: dict, {str -> str}
- :param value: new labels
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, dict):
- raise ValueError("Pass a dict")
- self._properties['labels'] = value
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a dataset given its API representation
-
- :type resource: dict
- :param resource: dataset resource representation returned from the API
-
- :rtype: :class:`google.cloud.bigquery.dataset.Dataset`
- :returns: Dataset parsed from ``resource``.
- """
- dsr = resource.get('datasetReference')
- if dsr is None or 'datasetId' not in dsr:
- raise KeyError('Resource lacks required identity information:'
- '["datasetReference"]["datasetId"]')
- dataset_id = dsr['datasetId']
- dataset = cls(DatasetReference(dsr['projectId'], dataset_id))
- dataset._set_properties(resource)
- return dataset
-
- @staticmethod
- def _parse_access_entries(access):
- """Parse a resource fragment into a set of access entries.
-
- ``role`` augments the entity type and present **unless** the entity
- type is ``view``.
-
- :type access: list of mappings
- :param access: each mapping represents a single access entry.
-
- :rtype: list of :class:`AccessEntry`
- :returns: a list of parsed entries.
- :raises: :class:`ValueError` if a entry in ``access`` has more keys
- than ``role`` and one additional key.
- """
- result = []
- for entry in access:
- entry = entry.copy()
- role = entry.pop('role', None)
- entity_type, entity_id = entry.popitem()
- if len(entry) != 0:
- raise ValueError('Entry has unexpected keys remaining.', entry)
- result.append(
- AccessEntry(role, entity_type, entity_id))
- return result
-
- def _set_properties(self, api_response):
- """Update properties from resource in body of ``api_response``
-
- :type api_response: dict
- :param api_response: response returned from an API call.
- """
- self._properties.clear()
- cleaned = api_response.copy()
- access = cleaned.pop('access', ())
- self.access_entries = self._parse_access_entries(access)
- if 'creationTime' in cleaned:
- cleaned['creationTime'] = float(cleaned['creationTime'])
- if 'lastModifiedTime' in cleaned:
- cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime'])
- if 'defaultTableExpirationMs' in cleaned:
- cleaned['defaultTableExpirationMs'] = int(
- cleaned['defaultTableExpirationMs'])
- if 'labels' not in cleaned:
- cleaned['labels'] = {}
- self._properties.update(cleaned)
-
- def _build_access_resource(self):
- """Generate a resource fragment for dataset's access entries."""
- result = []
- for entry in self.access_entries:
- info = {entry.entity_type: entry.entity_id}
- if entry.role is not None:
- info['role'] = entry.role
- result.append(info)
- return result
-
- def _build_resource(self):
- """Generate a resource for ``create`` or ``update``."""
- resource = {
- 'datasetReference': {
- 'projectId': self.project, 'datasetId': self.dataset_id},
- }
- if self.default_table_expiration_ms is not None:
- value = self.default_table_expiration_ms
- resource['defaultTableExpirationMs'] = value
-
- if self.description is not None:
- resource['description'] = self.description
-
- if self.friendly_name is not None:
- resource['friendlyName'] = self.friendly_name
-
- if self.location is not None:
- resource['location'] = self.location
-
- if len(self.access_entries) > 0:
- resource['access'] = self._build_access_resource()
-
- resource['labels'] = self.labels # labels is never None
-
- return resource
-
- def table(self, table_id):
- """Constructs a TableReference.
-
- :type table_id: str
- :param table_id: the ID of the table.
-
- :rtype: :class:`google.cloud.bigquery.table.TableReference`
- :returns: a TableReference for a table in this dataset.
- """
- return TableReference(self, table_id)
diff --git a/bigquery/google/cloud/bigquery/dbapi/_helpers.py b/bigquery/google/cloud/bigquery/dbapi/_helpers.py
deleted file mode 100644
index a2cee9c..0000000
--- a/bigquery/google/cloud/bigquery/dbapi/_helpers.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import collections
-import datetime
-import numbers
-
-import six
-
-from google.cloud import bigquery
-from google.cloud.bigquery.dbapi import exceptions
-
-
-def scalar_to_query_parameter(value, name=None):
- """Convert a scalar value into a query parameter.
-
- :type value: any
- :param value: A scalar value to convert into a query parameter.
-
- :type name: str
- :param name: (Optional) Name of the query parameter.
-
- :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter`
- :returns:
- A query parameter corresponding with the type and value of the plain
- Python object.
- :raises: :class:`~google.cloud.bigquery.dbapi.exceptions.ProgrammingError`
- if the type cannot be determined.
- """
- parameter_type = None
-
- if isinstance(value, bool):
- parameter_type = 'BOOL'
- elif isinstance(value, numbers.Integral):
- parameter_type = 'INT64'
- elif isinstance(value, numbers.Real):
- parameter_type = 'FLOAT64'
- elif isinstance(value, six.text_type):
- parameter_type = 'STRING'
- elif isinstance(value, six.binary_type):
- parameter_type = 'BYTES'
- elif isinstance(value, datetime.datetime):
- parameter_type = 'DATETIME' if value.tzinfo is None else 'TIMESTAMP'
- elif isinstance(value, datetime.date):
- parameter_type = 'DATE'
- elif isinstance(value, datetime.time):
- parameter_type = 'TIME'
- else:
- raise exceptions.ProgrammingError(
- 'encountered parameter {} with value {} of unexpected type'.format(
- name, value))
- return bigquery.ScalarQueryParameter(name, parameter_type, value)
-
-
-def to_query_parameters_list(parameters):
- """Converts a sequence of parameter values into query parameters.
-
- :type parameters: Sequence[Any]
- :param parameters: Sequence of query parameter values.
-
- :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter]
- :returns: A list of query parameters.
- """
- return [scalar_to_query_parameter(value) for value in parameters]
-
-
-def to_query_parameters_dict(parameters):
- """Converts a dictionary of parameter values into query parameters.
-
- :type parameters: Mapping[str, Any]
- :param parameters: Dictionary of query parameter values.
-
- :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter]
- :returns: A list of named query parameters.
- """
- return [
- scalar_to_query_parameter(value, name=name)
- for name, value
- in six.iteritems(parameters)]
-
-
-def to_query_parameters(parameters):
- """Converts DB-API parameter values into query parameters.
-
- :type parameters: Mapping[str, Any] or Sequence[Any]
- :param parameters: A dictionary or sequence of query parameter values.
-
- :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter]
- :returns: A list of query parameters.
- """
- if parameters is None:
- return []
-
- if isinstance(parameters, collections.Mapping):
- return to_query_parameters_dict(parameters)
-
- return to_query_parameters_list(parameters)
diff --git a/bigquery/google/cloud/bigquery/dbapi/cursor.py b/bigquery/google/cloud/bigquery/dbapi/cursor.py
deleted file mode 100644
index 914d2e0..0000000
--- a/bigquery/google/cloud/bigquery/dbapi/cursor.py
+++ /dev/null
@@ -1,340 +0,0 @@
-# Copyright 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Cursor for the Google BigQuery DB-API."""
-
-import collections
-
-import six
-
-from google.cloud.bigquery import job
-from google.cloud.bigquery.dbapi import _helpers
-from google.cloud.bigquery.dbapi import exceptions
-import google.cloud.exceptions
-
-# Per PEP 249: A 7-item sequence containing information describing one result
-# column. The first two items (name and type_code) are mandatory, the other
-# five are optional and are set to None if no meaningful values can be
-# provided.
-Column = collections.namedtuple(
- 'Column',
- [
- 'name', 'type_code', 'display_size', 'internal_size', 'precision',
- 'scale', 'null_ok',
- ])
-
-
-class Cursor(object):
- """DB-API Cursor to Google BigQuery.
-
- :type connection: :class:`~google.cloud.bigquery.dbapi.Connection`
- :param connection: A DB-API connection to Google BigQuery.
- """
- def __init__(self, connection):
- self.connection = connection
- self.description = None
- # Per PEP 249: The attribute is -1 in case no .execute*() has been
- # performed on the cursor or the rowcount of the last operation
- # cannot be determined by the interface.
- self.rowcount = -1
- # Per PEP 249: The arraysize attribute defaults to 1, meaning to fetch
- # a single row at a time.
- self.arraysize = 1
- self._query_data = None
- self._query_job = None
-
- def close(self):
- """No-op."""
-
- def _set_description(self, schema):
- """Set description from schema.
-
- :type schema: Sequence[google.cloud.bigquery.schema.SchemaField]
- :param schema: A description of fields in the schema.
- """
- if schema is None:
- self.description = None
- return
-
- self.description = tuple([
- Column(
- name=field.name,
- type_code=field.field_type,
- display_size=None,
- internal_size=None,
- precision=None,
- scale=None,
- null_ok=field.is_nullable)
- for field in schema])
-
- def _set_rowcount(self, query_results):
- """Set the rowcount from query results.
-
- Normally, this sets rowcount to the number of rows returned by the
- query, but if it was a DML statement, it sets rowcount to the number
- of modified rows.
-
- :type query_results:
- :class:`~google.cloud.bigquery.query.QueryResults`
- :param query_results: results of a query
- """
- total_rows = 0
- num_dml_affected_rows = query_results.num_dml_affected_rows
-
- if (query_results.total_rows is not None
- and query_results.total_rows > 0):
- total_rows = query_results.total_rows
- if num_dml_affected_rows is not None and num_dml_affected_rows > 0:
- total_rows = num_dml_affected_rows
- self.rowcount = total_rows
-
- def execute(self, operation, parameters=None, job_id=None):
- """Prepare and execute a database operation.
-
- .. note::
- When setting query parameters, values which are "text"
- (``unicode`` in Python2, ``str`` in Python3) will use
- the 'STRING' BigQuery type. Values which are "bytes" (``str`` in
- Python2, ``bytes`` in Python3), will use using the 'BYTES' type.
-
- A `~datetime.datetime` parameter without timezone information uses
- the 'DATETIME' BigQuery type (example: Global Pi Day Celebration
- March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with
- timezone information uses the 'TIMESTAMP' BigQuery type (example:
- a wedding on April 29, 2011 at 11am, British Summer Time).
-
- For more information about BigQuery data types, see:
- https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
-
- ``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not
- yet supported. See:
- https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524
-
- :type operation: str
- :param operation: A Google BigQuery query string.
-
- :type parameters: Mapping[str, Any] or Sequence[Any]
- :param parameters:
- (Optional) dictionary or sequence of parameter values.
-
- :type job_id: str
- :param job_id: (Optional) The job_id to use. If not set, a job ID
- is generated at random.
- """
- self._query_data = None
- self._query_job = None
- client = self.connection._client
-
- # The DB-API uses the pyformat formatting, since the way BigQuery does
- # query parameters was not one of the standard options. Convert both
- # the query and the parameters to the format expected by the client
- # libraries.
- formatted_operation = _format_operation(
- operation, parameters=parameters)
- query_parameters = _helpers.to_query_parameters(parameters)
-
- config = job.QueryJobConfig()
- config.query_parameters = query_parameters
- config.use_legacy_sql = False
- self._query_job = client.query(
- formatted_operation, job_config=config, job_id=job_id)
-
- # Wait for the query to finish.
- try:
- self._query_job.result()
- except google.cloud.exceptions.GoogleCloudError:
- raise exceptions.DatabaseError(self._query_job.errors)
-
- query_results = self._query_job.query_results()
- self._set_rowcount(query_results)
- self._set_description(query_results.schema)
-
- def executemany(self, operation, seq_of_parameters):
- """Prepare and execute a database operation multiple times.
-
- :type operation: str
- :param operation: A Google BigQuery query string.
-
- :type seq_of_parameters: Sequence[Mapping[str, Any] or Sequence[Any]]
- :param parameters: Sequence of many sets of parameter values.
- """
- for parameters in seq_of_parameters:
- self.execute(operation, parameters)
-
- def _try_fetch(self, size=None):
- """Try to start fetching data, if not yet started.
-
- Mutates self to indicate that iteration has started.
- """
- if self._query_job is None:
- raise exceptions.InterfaceError(
- 'No query results: execute() must be called before fetch.')
-
- is_dml = (
- self._query_job.statement_type
- and self._query_job.statement_type.upper() != 'SELECT')
- if is_dml:
- self._query_data = iter([])
- return
-
- if self._query_data is None:
- client = self.connection._client
- # TODO(tswast): pass in page size to list_rows based on arraysize
- rows_iter = client.list_rows(
- self._query_job.destination,
- selected_fields=self._query_job.query_results().schema)
- self._query_data = iter(rows_iter)
-
- def fetchone(self):
- """Fetch a single row from the results of the last ``execute*()`` call.
-
- :rtype: tuple
- :returns:
- A tuple representing a row or ``None`` if no more data is
- available.
- :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError`
- if called before ``execute()``.
- """
- self._try_fetch()
- try:
- return six.next(self._query_data)
- except StopIteration:
- return None
-
- def fetchmany(self, size=None):
- """Fetch multiple results from the last ``execute*()`` call.
-
- .. note::
- The size parameter is not used for the request/response size.
- Set the ``arraysize`` attribute before calling ``execute()`` to
- set the batch size.
-
- :type size: int
- :param size:
- (Optional) Maximum number of rows to return. Defaults to the
- ``arraysize`` property value.
-
- :rtype: List[tuple]
- :returns: A list of rows.
- :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError`
- if called before ``execute()``.
- """
- if size is None:
- size = self.arraysize
-
- self._try_fetch(size=size)
- rows = []
-
- for row in self._query_data:
- rows.append(row)
- if len(rows) >= size:
- break
-
- return rows
-
- def fetchall(self):
- """Fetch all remaining results from the last ``execute*()`` call.
-
- :rtype: List[tuple]
- :returns: A list of all the rows in the results.
- :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError`
- if called before ``execute()``.
- """
- self._try_fetch()
- return list(self._query_data)
-
- def setinputsizes(self, sizes):
- """No-op."""
-
- def setoutputsize(self, size, column=None):
- """No-op."""
-
-
-def _format_operation_list(operation, parameters):
- """Formats parameters in operation in the way BigQuery expects.
-
- The input operation will be a query like ``SELECT %s`` and the output
- will be a query like ``SELECT ?``.
-
- :type operation: str
- :param operation: A Google BigQuery query string.
-
- :type parameters: Sequence[Any]
- :param parameters: Sequence of parameter values.
-
- :rtype: str
- :returns: A formatted query string.
- :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError`
- if a parameter used in the operation is not found in the
- ``parameters`` argument.
- """
- formatted_params = ['?' for _ in parameters]
-
- try:
- return operation % tuple(formatted_params)
- except TypeError as exc:
- raise exceptions.ProgrammingError(exc)
-
-
-def _format_operation_dict(operation, parameters):
- """Formats parameters in operation in the way BigQuery expects.
-
- The input operation will be a query like ``SELECT %(namedparam)s`` and
- the output will be a query like ``SELECT @namedparam``.
-
- :type operation: str
- :param operation: A Google BigQuery query string.
-
- :type parameters: Mapping[str, Any]
- :param parameters: Dictionary of parameter values.
-
- :rtype: str
- :returns: A formatted query string.
- :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError`
- if a parameter used in the operation is not found in the
- ``parameters`` argument.
- """
- formatted_params = {}
- for name in parameters:
- escaped_name = name.replace('`', r'\`')
- formatted_params[name] = '@`{}`'.format(escaped_name)
-
- try:
- return operation % formatted_params
- except KeyError as exc:
- raise exceptions.ProgrammingError(exc)
-
-
-def _format_operation(operation, parameters=None):
- """Formats parameters in operation in way BigQuery expects.
-
- :type: str
- :param operation: A Google BigQuery query string.
-
- :type: Mapping[str, Any] or Sequence[Any]
- :param parameters: Optional parameter values.
-
- :rtype: str
- :returns: A formatted query string.
- :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError`
- if a parameter used in the operation is not found in the
- ``parameters`` argument.
- """
- if parameters is None:
- return operation
-
- if isinstance(parameters, collections.Mapping):
- return _format_operation_dict(operation, parameters)
-
- return _format_operation_list(operation, parameters)
diff --git a/bigquery/google/cloud/bigquery/external_config.py b/bigquery/google/cloud/bigquery/external_config.py
deleted file mode 100644
index e356022..0000000
--- a/bigquery/google/cloud/bigquery/external_config.py
+++ /dev/null
@@ -1,492 +0,0 @@
-# Copyright 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Define classes that describe external data sources.
-
- These are used for both Table.externalDataConfiguration and
- Job.configuration.query.tableDefinitions.
-"""
-
-from __future__ import absolute_import
-
-import base64
-import copy
-
-import six
-
-from google.cloud.bigquery._helpers import _to_bytes
-from google.cloud.bigquery._helpers import _bytes_to_json
-from google.cloud.bigquery._helpers import _TypedApiResourceProperty
-from google.cloud.bigquery._helpers import _ListApiResourceProperty
-from google.cloud.bigquery._helpers import _int_or_none
-from google.cloud.bigquery.schema import SchemaField
-from google.cloud.bigquery.table import _build_schema_resource
-from google.cloud.bigquery.table import _parse_schema_resource
-
-
-class BigtableColumn(object):
- """Options for a Bigtable column."""
-
- def __init__(self):
- self._properties = {}
-
- encoding = _TypedApiResourceProperty(
- 'encoding', 'encoding', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.encoding
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding
- """
-
- field_name = _TypedApiResourceProperty(
- 'field_name', 'fieldName', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.field_name
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.field_name
- """
-
- only_read_latest = _TypedApiResourceProperty(
- 'only_read_latest', 'onlyReadLatest', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.only_read_latest
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.only_read_latest
- """
-
- qualifier_encoded = _TypedApiResourceProperty(
- 'qualifier_encoded', 'qualifierEncoded', six.binary_type)
- """The qualifier encoded in binary. The type is ``str`` (Python 2.x) or
- ``bytes`` (Python 3.x). The module will handle base64 encoding for you.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_encoded
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_encoded
- """
-
- qualifier_string = _TypedApiResourceProperty(
- 'qualifier_string', 'qualifierString', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_string
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_string
- """
-
- type_ = _TypedApiResourceProperty('type_', 'type', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.type
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type
- """
-
- def to_api_repr(self):
- """Build an API representation of this object.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- config = copy.deepcopy(self._properties)
- qe = config.get('qualifierEncoded')
- if qe is not None:
- config['qualifierEncoded'] = _bytes_to_json(qe)
- return config
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a BigtableColumn given its API representation
-
- :type resource: dict
- :param resource:
- A column in the same representation as is returned from the API.
-
- :rtype: :class:`google.cloud.bigquery.external_config.BigtableColumn`
- :returns: Configuration parsed from ``resource``.
- """
- config = cls()
- config._properties = copy.deepcopy(resource)
- qe = resource.get('qualifierEncoded')
- if qe:
- config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe))
- return config
-
-
-class BigtableColumnFamily(object):
- """Options for a Bigtable column family."""
-
- def __init__(self):
- self._properties = {}
-
- encoding = _TypedApiResourceProperty(
- 'encoding', 'encoding', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding
- """
-
- family_id = _TypedApiResourceProperty(
- 'family_id', 'familyId', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId
- """
-
- only_read_latest = _TypedApiResourceProperty(
- 'only_read_latest', 'onlyReadLatest', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest
- """
-
- type_ = _TypedApiResourceProperty('type_', 'type', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type
- """
-
- columns = _ListApiResourceProperty(
- 'columns', 'columns', BigtableColumn)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns
- """
-
- def to_api_repr(self):
- """Build an API representation of this object.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- config = copy.deepcopy(self._properties)
- config['columns'] = [c.to_api_repr() for c in config['columns']]
- return config
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a BigtableColumnFamily given its
- API representation
-
- :type resource: dict
- :param resource:
- A column family in the same representation as is returned
- from the API.
-
- :rtype:
- :class:`google.cloud.bigquery.external_config.BigtableColumnFamily`
- :returns: Configuration parsed from ``resource``.
- """
- config = cls()
- config._properties = copy.deepcopy(resource)
- config.columns = [BigtableColumn.from_api_repr(c)
- for c in resource['columns']]
- return config
-
-
-class BigtableOptions(object):
- """Options that describe how to treat Bigtable tables
- as BigQuery tables."""
-
- _SOURCE_FORMAT = 'BIGTABLE'
- _RESOURCE_NAME = 'bigtableOptions'
-
- def __init__(self):
- self._properties = {}
-
- ignore_unspecified_column_families = _TypedApiResourceProperty(
- 'ignore_unspecified_column_families',
- 'ignoreUnspecifiedColumnFamilies', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies
- """
-
- read_rowkey_as_string = _TypedApiResourceProperty(
- 'read_rowkey_as_string', 'readRowkeyAsString', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString
- """
-
- column_families = _ListApiResourceProperty(
- 'column_families', 'columnFamilies', BigtableColumnFamily)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies
- """
-
- def to_api_repr(self):
- """Build an API representation of this object.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- config = copy.deepcopy(self._properties)
- config['columnFamilies'] = [cf.to_api_repr()
- for cf in config['columnFamilies']]
- return config
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a BigtableOptions given its API representation
-
- :type resource: dict
- :param resource:
- A BigtableOptions in the same representation as is returned
- from the API.
-
- :rtype: :class:`google.cloud.bigquery.external_config.BigtableOptions`
- :returns: Configuration parsed from ``resource``.
- """
- config = cls()
- config._properties = copy.deepcopy(resource)
- config.column_families = [BigtableColumnFamily.from_api_repr(cf)
- for cf in resource['columnFamilies']]
- return config
-
-
-class CSVOptions(object):
- """Options that describe how to treat CSV files as BigQuery tables."""
-
- _SOURCE_FORMAT = 'CSV'
- _RESOURCE_NAME = 'csvOptions'
-
- def __init__(self):
- self._properties = {}
-
- allow_jagged_rows = _TypedApiResourceProperty(
- 'allow_jagged_rows', 'allowJaggedRows', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows
- """
-
- allow_quoted_newlines = _TypedApiResourceProperty(
- 'allow_quoted_newlines', 'allowQuotedNewlines', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines
- """
-
- encoding = _TypedApiResourceProperty(
- 'encoding', 'encoding', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding
- """
-
- field_delimiter = _TypedApiResourceProperty(
- 'field_delimiter', 'fieldDelimiter', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter
- """
-
- quote_character = _TypedApiResourceProperty(
- 'quote_character', 'quote', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote
- """
-
- skip_leading_rows = _TypedApiResourceProperty(
- 'skip_leading_rows', 'skipLeadingRows', six.integer_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows
- """
-
- def to_api_repr(self):
- """Build an API representation of this object.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- config = copy.deepcopy(self._properties)
- slr = config.pop('skipLeadingRows', None)
- if slr is not None:
- config['skipLeadingRows'] = str(slr)
- return config
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a CSVOptions given its API representation
-
- :type resource: dict
- :param resource:
- A CSVOptions in the same representation as is
- returned from the API.
-
- :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions`
- :returns: Configuration parsed from ``resource``.
- """
- slr = resource.get('skipLeadingRows')
- config = cls()
- config._properties = copy.deepcopy(resource)
- config.skip_leading_rows = _int_or_none(slr)
- return config
-
-
-class GoogleSheetsOptions(object):
- """Options that describe how to treat Google Sheets as BigQuery tables."""
-
- _SOURCE_FORMAT = 'GOOGLE_SHEETS'
- _RESOURCE_NAME = 'googleSheetsOptions'
-
- def __init__(self):
- self._properties = {}
-
- skip_leading_rows = _TypedApiResourceProperty(
- 'skip_leading_rows', 'skipLeadingRows', six.integer_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows
- """
-
- def to_api_repr(self):
- """Build an API representation of this object.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- config = copy.deepcopy(self._properties)
- slr = config.pop('skipLeadingRows', None)
- if slr is not None:
- config['skipLeadingRows'] = str(slr)
- return config
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a GoogleSheetsOptions given its API representation
-
- :type resource: dict
- :param resource:
- An GoogleSheetsOptions in the same representation as is
- returned from the API.
-
- :rtype:
- :class:`google.cloud.bigquery.external_config.GoogleSheetsOptions`
- :returns: Configuration parsed from ``resource``.
- """
- slr = resource.get('skipLeadingRows')
- config = cls()
- config._properties = copy.deepcopy(resource)
- config.skip_leading_rows = _int_or_none(slr)
- return config
-
-
-_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions)
-
-
-class ExternalConfig(object):
- """Description of an external data source.
-
- :type source_format: str
- :param source_format: the format of the external data. See
- the ``source_format`` property on this class.
- """
-
- def __init__(self, source_format):
- self._properties = {'sourceFormat': source_format}
- self._options = None
- for optcls in _OPTION_CLASSES:
- if source_format == optcls._SOURCE_FORMAT:
- self._options = optcls()
- break
-
- @property
- def source_format(self):
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat
- """
- return self._properties['sourceFormat']
-
- @property
- def options(self):
- """Source-specific options."""
- return self._options
-
- autodetect = _TypedApiResourceProperty(
- 'autodetect', 'autodetect', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect
- """
-
- compression = _TypedApiResourceProperty(
- 'compression', 'compression', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression
- """
-
- ignore_unknown_values = _TypedApiResourceProperty(
- 'ignore_unknown_values', 'ignoreUnknownValues', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues
- """
-
- max_bad_records = _TypedApiResourceProperty(
- 'max_bad_records', 'maxBadRecords', six.integer_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords
- """
-
- source_uris = _ListApiResourceProperty(
- 'source_uris', 'sourceUris', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris
- """
-
- schema = _ListApiResourceProperty('schema', 'schema', SchemaField)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema
- """
-
- def to_api_repr(self):
- """Build an API representation of this object.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- config = copy.deepcopy(self._properties)
- if self.schema:
- config['schema'] = {'fields': _build_schema_resource(self.schema)}
- if self.options is not None:
- r = self.options.to_api_repr()
- if r != {}:
- config[self.options._RESOURCE_NAME] = r
- return config
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a CSVOptions given its API representation
-
- :type resource: dict
- :param resource:
- An extract job configuration in the same representation as is
- returned from the API.
-
- :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions`
- :returns: Configuration parsed from ``resource``.
- """
- config = cls(resource['sourceFormat'])
- schema = resource.get('schema')
- for optcls in _OPTION_CLASSES:
- opts = resource.get(optcls._RESOURCE_NAME)
- if opts is not None:
- config._options = optcls.from_api_repr(opts)
- break
- config._properties = copy.deepcopy(resource)
- if schema:
- config.schema = _parse_schema_resource(schema)
- return config
diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py
deleted file mode 100644
index 5c7ffd3..0000000
--- a/bigquery/google/cloud/bigquery/job.py
+++ /dev/null
@@ -1,2085 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Define API Jobs."""
-
-import copy
-import threading
-
-import six
-from six.moves import http_client
-
-import google.api_core.future.polling
-from google.cloud import exceptions
-from google.cloud.exceptions import NotFound
-from google.cloud._helpers import _datetime_from_microseconds
-from google.cloud.bigquery.dataset import DatasetReference
-from google.cloud.bigquery.external_config import ExternalConfig
-from google.cloud.bigquery.query import _AbstractQueryParameter
-from google.cloud.bigquery.query import _query_param_from_api_repr
-from google.cloud.bigquery.query import ArrayQueryParameter
-from google.cloud.bigquery.query import ScalarQueryParameter
-from google.cloud.bigquery.query import StructQueryParameter
-from google.cloud.bigquery.query import UDFResource
-from google.cloud.bigquery.schema import SchemaField
-from google.cloud.bigquery.table import TableReference
-from google.cloud.bigquery.table import _build_schema_resource
-from google.cloud.bigquery.table import _parse_schema_resource
-from google.cloud.bigquery._helpers import _EnumApiResourceProperty
-from google.cloud.bigquery._helpers import _ListApiResourceProperty
-from google.cloud.bigquery._helpers import _TypedApiResourceProperty
-from google.cloud.bigquery._helpers import DEFAULT_RETRY
-from google.cloud.bigquery._helpers import _int_or_none
-
-_DONE_STATE = 'DONE'
-_STOPPED_REASON = 'stopped'
-_TIMEOUT_BUFFER_SECS = 0.1
-
-_ERROR_REASON_TO_EXCEPTION = {
- 'accessDenied': http_client.FORBIDDEN,
- 'backendError': http_client.INTERNAL_SERVER_ERROR,
- 'billingNotEnabled': http_client.FORBIDDEN,
- 'billingTierLimitExceeded': http_client.BAD_REQUEST,
- 'blocked': http_client.FORBIDDEN,
- 'duplicate': http_client.CONFLICT,
- 'internalError': http_client.INTERNAL_SERVER_ERROR,
- 'invalid': http_client.BAD_REQUEST,
- 'invalidQuery': http_client.BAD_REQUEST,
- 'notFound': http_client.NOT_FOUND,
- 'notImplemented': http_client.NOT_IMPLEMENTED,
- 'quotaExceeded': http_client.FORBIDDEN,
- 'rateLimitExceeded': http_client.FORBIDDEN,
- 'resourceInUse': http_client.BAD_REQUEST,
- 'resourcesExceeded': http_client.BAD_REQUEST,
- 'responseTooLarge': http_client.FORBIDDEN,
- 'stopped': http_client.OK,
- 'tableUnavailable': http_client.BAD_REQUEST,
-}
-
-
-def _error_result_to_exception(error_result):
- """Maps BigQuery error reasons to an exception.
-
- The reasons and their matching HTTP status codes are documented on
- the `troubleshooting errors`_ page.
-
- .. _troubleshooting errors: https://cloud.google.com/bigquery\
- /troubleshooting-errors
-
- :type error_result: Mapping[str, str]
- :param error_result: The error result from BigQuery.
-
- :rtype google.cloud.exceptions.GoogleCloudError:
- :returns: The mapped exception.
- """
- reason = error_result.get('reason')
- status_code = _ERROR_REASON_TO_EXCEPTION.get(
- reason, http_client.INTERNAL_SERVER_ERROR)
- return exceptions.from_http_status(
- status_code, error_result.get('message', ''), errors=[error_result])
-
-
-class Compression(_EnumApiResourceProperty):
- """Pseudo-enum for ``compression`` properties."""
- GZIP = 'GZIP'
- NONE = 'NONE'
-
-
-class CreateDisposition(_EnumApiResourceProperty):
- """Pseudo-enum for ``create_disposition`` properties."""
- CREATE_IF_NEEDED = 'CREATE_IF_NEEDED'
- CREATE_NEVER = 'CREATE_NEVER'
-
-
-class DestinationFormat(_EnumApiResourceProperty):
- """Pseudo-enum for ``destination_format`` properties."""
- CSV = 'CSV'
- NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON'
- AVRO = 'AVRO'
-
-
-class Encoding(_EnumApiResourceProperty):
- """Pseudo-enum for ``encoding`` properties."""
- UTF_8 = 'UTF-8'
- ISO_8559_1 = 'ISO-8559-1'
-
-
-class QueryPriority(_EnumApiResourceProperty):
- """Pseudo-enum for ``QueryJob.priority`` property."""
- INTERACTIVE = 'INTERACTIVE'
- BATCH = 'BATCH'
-
-
-class SourceFormat(_EnumApiResourceProperty):
- """Pseudo-enum for ``source_format`` properties."""
- CSV = 'CSV'
- DATASTORE_BACKUP = 'DATASTORE_BACKUP'
- NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON'
- AVRO = 'AVRO'
-
-
-class WriteDisposition(_EnumApiResourceProperty):
- """Pseudo-enum for ``write_disposition`` properties."""
- WRITE_APPEND = 'WRITE_APPEND'
- WRITE_TRUNCATE = 'WRITE_TRUNCATE'
- WRITE_EMPTY = 'WRITE_EMPTY'
-
-
-class AutoDetectSchema(_TypedApiResourceProperty):
- """Property for ``autodetect`` properties.
-
- :raises ValueError: on ``set`` operation if ``instance.schema``
- is already defined.
- """
- def __set__(self, instance, value):
- self._validate(value)
- instance._properties[self.resource_name] = value
-
-
-class _AsyncJob(google.api_core.future.polling.PollingFuture):
- """Base class for asynchronous jobs.
-
- :type job_id: str
- :param job_id: the job's ID in the project associated with the client.
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: A client which holds credentials and project configuration.
- """
- def __init__(self, job_id, client):
- super(_AsyncJob, self).__init__()
- self.job_id = job_id
- self._client = client
- self._properties = {}
- self._result_set = False
- self._completion_lock = threading.Lock()
-
- @property
- def project(self):
- """Project bound to the job.
-
- :rtype: str
- :returns: the project (derived from the client).
- """
- return self._client.project
-
- def _require_client(self, client):
- """Check client or verify over-ride.
-
- :type client: :class:`~google.cloud.bigquery.client.Client` or
- ``NoneType``
- :param client: the client to use. If not passed, falls back to the
- ``client`` stored on the current dataset.
-
- :rtype: :class:`google.cloud.bigquery.client.Client`
- :returns: The client passed in or the currently bound client.
- """
- if client is None:
- client = self._client
- return client
-
- @property
- def job_type(self):
- """Type of job
-
- :rtype: str
- :returns: one of 'load', 'copy', 'extract', 'query'
- """
- return self._JOB_TYPE
-
- @property
- def path(self):
- """URL path for the job's APIs.
-
- :rtype: str
- :returns: the path based on project and job ID.
- """
- return '/projects/%s/jobs/%s' % (self.project, self.job_id)
-
- @property
- def etag(self):
- """ETag for the job resource.
-
- :rtype: str, or ``NoneType``
- :returns: the ETag (None until set from the server).
- """
- return self._properties.get('etag')
-
- @property
- def self_link(self):
- """URL for the job resource.
-
- :rtype: str, or ``NoneType``
- :returns: the URL (None until set from the server).
- """
- return self._properties.get('selfLink')
-
- @property
- def user_email(self):
- """E-mail address of user who submitted the job.
-
- :rtype: str, or ``NoneType``
- :returns: the URL (None until set from the server).
- """
- return self._properties.get('user_email')
-
- @property
- def created(self):
- """Datetime at which the job was created.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the creation time (None until set from the server).
- """
- statistics = self._properties.get('statistics')
- if statistics is not None:
- millis = statistics.get('creationTime')
- if millis is not None:
- return _datetime_from_microseconds(millis * 1000.0)
-
- @property
- def started(self):
- """Datetime at which the job was started.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the start time (None until set from the server).
- """
- statistics = self._properties.get('statistics')
- if statistics is not None:
- millis = statistics.get('startTime')
- if millis is not None:
- return _datetime_from_microseconds(millis * 1000.0)
-
- @property
- def ended(self):
- """Datetime at which the job finished.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the end time (None until set from the server).
- """
- statistics = self._properties.get('statistics')
- if statistics is not None:
- millis = statistics.get('endTime')
- if millis is not None:
- return _datetime_from_microseconds(millis * 1000.0)
-
- def _job_statistics(self):
- """Helper for job-type specific statistics-based properties."""
- statistics = self._properties.get('statistics', {})
- return statistics.get(self._JOB_TYPE, {})
-
- @property
- def error_result(self):
- """Error information about the job as a whole.
-
- :rtype: mapping, or ``NoneType``
- :returns: the error information (None until set from the server).
- """
- status = self._properties.get('status')
- if status is not None:
- return status.get('errorResult')
-
- @property
- def errors(self):
- """Information about individual errors generated by the job.
-
- :rtype: list of mappings, or ``NoneType``
- :returns: the error information (None until set from the server).
- """
- status = self._properties.get('status')
- if status is not None:
- return status.get('errors')
-
- @property
- def state(self):
- """Status of the job.
-
- :rtype: str, or ``NoneType``
- :returns: the state (None until set from the server).
- """
- status = self._properties.get('status')
- if status is not None:
- return status.get('state')
-
- def _scrub_local_properties(self, cleaned):
- """Helper: handle subclass properties in cleaned."""
- pass
-
- def _copy_configuration_properties(self, configuration):
- """Helper: assign subclass configuration properties in cleaned."""
- raise NotImplementedError("Abstract")
-
- def _set_properties(self, api_response):
- """Update properties from resource in body of ``api_response``
-
- :type api_response: dict
- :param api_response: response returned from an API call
- """
- cleaned = api_response.copy()
- self._scrub_local_properties(cleaned)
-
- statistics = cleaned.get('statistics', {})
- if 'creationTime' in statistics:
- statistics['creationTime'] = float(statistics['creationTime'])
- if 'startTime' in statistics:
- statistics['startTime'] = float(statistics['startTime'])
- if 'endTime' in statistics:
- statistics['endTime'] = float(statistics['endTime'])
-
- self._properties.clear()
- self._properties.update(cleaned)
- configuration = cleaned['configuration'][self._JOB_TYPE]
- self._copy_configuration_properties(configuration)
-
- # For Future interface
- self._set_future_result()
-
- @classmethod
- def _get_resource_config(cls, resource):
- """Helper for :meth:`from_api_repr`
-
- :type resource: dict
- :param resource: resource for the job
-
- :rtype: dict
- :returns: tuple (string, dict), where the first element is the
- job ID and the second contains job-specific configuration.
- :raises: :class:`KeyError` if the resource has no identifier, or
- is missing the appropriate configuration.
- """
- if ('jobReference' not in resource or
- 'jobId' not in resource['jobReference']):
- raise KeyError('Resource lacks required identity information: '
- '["jobReference"]["jobId"]')
- job_id = resource['jobReference']['jobId']
- if ('configuration' not in resource or
- cls._JOB_TYPE not in resource['configuration']):
- raise KeyError('Resource lacks required configuration: '
- '["configuration"]["%s"]' % cls._JOB_TYPE)
- config = resource['configuration'][cls._JOB_TYPE]
- return job_id, config
-
- def begin(self, client=None, retry=DEFAULT_RETRY):
- """API call: begin the job via a POST request
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
-
- :type client: :class:`~google.cloud.bigquery.client.Client` or
- ``NoneType``
- :param client: the client to use. If not passed, falls back to the
- ``client`` stored on the current dataset.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :raises: :exc:`ValueError` if the job has already begin.
- """
- if self.state is not None:
- raise ValueError("Job already begun.")
-
- client = self._require_client(client)
- path = '/projects/%s/jobs' % (self.project,)
-
- # jobs.insert is idempotent because we ensure that every new
- # job has an ID.
- api_response = client._call_api(
- retry,
- method='POST', path=path, data=self._build_resource())
- self._set_properties(api_response)
-
- def exists(self, client=None, retry=DEFAULT_RETRY):
- """API call: test for the existence of the job via a GET request
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
-
- :type client: :class:`~google.cloud.bigquery.client.Client` or
- ``NoneType``
- :param client: the client to use. If not passed, falls back to the
- ``client`` stored on the current dataset.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: bool
- :returns: Boolean indicating existence of the job.
- """
- client = self._require_client(client)
-
- try:
- client._call_api(retry,
- method='GET', path=self.path,
- query_params={'fields': 'id'})
- except NotFound:
- return False
- else:
- return True
-
- def reload(self, client=None, retry=DEFAULT_RETRY):
- """API call: refresh job properties via a GET request.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
-
- :type client: :class:`~google.cloud.bigquery.client.Client` or
- ``NoneType``
- :param client: the client to use. If not passed, falls back to the
- ``client`` stored on the current dataset.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
- """
- client = self._require_client(client)
-
- api_response = client._call_api(retry, method='GET', path=self.path)
- self._set_properties(api_response)
-
- def cancel(self, client=None):
- """API call: cancel job via a POST request
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel
-
- :type client: :class:`~google.cloud.bigquery.client.Client` or
- ``NoneType``
- :param client: the client to use. If not passed, falls back to the
- ``client`` stored on the current dataset.
-
- :rtype: bool
- :returns: Boolean indicating that the cancel request was sent.
- """
- client = self._require_client(client)
-
- api_response = client._connection.api_request(
- method='POST', path='%s/cancel' % (self.path,))
- self._set_properties(api_response['job'])
- # The Future interface requires that we return True if the *attempt*
- # to cancel was successful.
- return True
-
- # The following methods implement the PollingFuture interface. Note that
- # the methods above are from the pre-Future interface and are left for
- # compatibility. The only "overloaded" method is :meth:`cancel`, which
- # satisfies both interfaces.
-
- def _set_future_result(self):
- """Set the result or exception from the job if it is complete."""
- # This must be done in a lock to prevent the polling thread
- # and main thread from both executing the completion logic
- # at the same time.
- with self._completion_lock:
- # If the operation isn't complete or if the result has already been
- # set, do not call set_result/set_exception again.
- # Note: self._result_set is set to True in set_result and
- # set_exception, in case those methods are invoked directly.
- if self.state != _DONE_STATE or self._result_set:
- return
-
- if self.error_result is not None:
- exception = _error_result_to_exception(self.error_result)
- self.set_exception(exception)
- else:
- self.set_result(self)
-
- def done(self, retry=DEFAULT_RETRY):
- """Refresh the job and checks if it is complete.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: bool
- :returns: True if the job is complete, False otherwise.
- """
- # Do not refresh is the state is already done, as the job will not
- # change once complete.
- if self.state != _DONE_STATE:
- self.reload(retry=retry)
- return self.state == _DONE_STATE
-
- def result(self, timeout=None):
- """Start the job and wait for it to complete and get the result.
-
- :type timeout: int
- :param timeout: How long to wait for job to complete before raising
- a :class:`TimeoutError`.
-
- :rtype: _AsyncJob
- :returns: This instance.
-
- :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job
- failed or :class:`TimeoutError` if the job did not complete in the
- given timeout.
- """
- if self.state is None:
- self.begin()
- # TODO: modify PollingFuture so it can pass a retry argument to done().
- return super(_AsyncJob, self).result(timeout=timeout)
-
- def cancelled(self):
- """Check if the job has been cancelled.
-
- This always returns False. It's not possible to check if a job was
- cancelled in the API. This method is here to satisfy the interface
- for :class:`google.api_core.future.Future`.
-
- :rtype: bool
- :returns: False
- """
- return (self.error_result is not None
- and self.error_result.get('reason') == _STOPPED_REASON)
-
-
-class LoadJobConfig(object):
- """Configuration options for load jobs.
-
- All properties in this class are optional. Values which are ``None`` ->
- server defaults.
- """
-
- def __init__(self):
- self._properties = {}
- self._schema = ()
-
- allow_jagged_rows = _TypedApiResourceProperty(
- 'allow_jagged_rows', 'allowJaggedRows', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows
- """
-
- allow_quoted_newlines = _TypedApiResourceProperty(
- 'allow_quoted_newlines', 'allowQuotedNewlines', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines
- """
-
- autodetect = AutoDetectSchema('autodetect', 'autodetect', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect
- """
-
- create_disposition = CreateDisposition('create_disposition',
- 'createDisposition')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition
- """
-
- encoding = Encoding('encoding', 'encoding')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding
- """
-
- field_delimiter = _TypedApiResourceProperty(
- 'field_delimiter', 'fieldDelimiter', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter
- """
-
- ignore_unknown_values = _TypedApiResourceProperty(
- 'ignore_unknown_values', 'ignoreUnknownValues', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues
- """
-
- max_bad_records = _TypedApiResourceProperty(
- 'max_bad_records', 'maxBadRecords', six.integer_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords
- """
-
- null_marker = _TypedApiResourceProperty(
- 'null_marker', 'nullMarker', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker
- """
-
- quote_character = _TypedApiResourceProperty(
- 'quote_character', 'quote', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote
- """
-
- skip_leading_rows = _TypedApiResourceProperty(
- 'skip_leading_rows', 'skipLeadingRows', six.integer_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows
- """
-
- source_format = SourceFormat('source_format', 'sourceFormat')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat
- """
-
- write_disposition = WriteDisposition('write_disposition',
- 'writeDisposition')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition
- """
-
- @property
- def schema(self):
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
- """
- return list(self._schema)
-
- @schema.setter
- def schema(self, value):
- if not all(isinstance(field, SchemaField) for field in value):
- raise ValueError('Schema items must be fields')
- self._schema = tuple(value)
-
- def to_api_repr(self):
- """Build an API representation of the load job config.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- config = copy.deepcopy(self._properties)
- if len(self.schema) > 0:
- config['schema'] = {'fields': _build_schema_resource(self.schema)}
- # skipLeadingRows is a string because it's defined as an int64, which
- # can't be represented as a JSON number.
- slr = config.get('skipLeadingRows')
- if slr is not None:
- config['skipLeadingRows'] = str(slr)
- return config
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a job configuration given its API representation
-
- :type resource: dict
- :param resource:
- An extract job configuration in the same representation as is
- returned from the API.
-
- :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
- :returns: Configuration parsed from ``resource``.
- """
- schema = resource.pop('schema', {'fields': ()})
- slr = resource.pop('skipLeadingRows', None)
- config = cls()
- config._properties = copy.deepcopy(resource)
- config.schema = _parse_schema_resource(schema)
- config.skip_leading_rows = _int_or_none(slr)
-
-
-class LoadJob(_AsyncJob):
- """Asynchronous job for loading data into a table.
-
- Can load from Google Cloud Storage URIs or from a file.
-
- :type job_id: str
- :param job_id: the job's ID
-
- :type source_uris: sequence of string or ``NoneType``
- :param source_uris:
- URIs of one or more data files to be loaded. See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris
- for supported URI formats. Pass None for jobs that load from a file.
-
- :type destination: :class:`google.cloud.bigquery.table.TableReference`
- :param destination: reference to table into which data is to be loaded.
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: A client which holds credentials and project configuration
- for the dataset (which requires a project).
- """
-
- _JOB_TYPE = 'load'
-
- def __init__(self, job_id, source_uris, destination, client,
- job_config=None):
- super(LoadJob, self).__init__(job_id, client)
-
- if job_config is None:
- job_config = LoadJobConfig()
-
- self.source_uris = source_uris
- self.destination = destination
- self._configuration = job_config
-
- @property
- def allow_jagged_rows(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`.
- """
- return self._configuration.allow_jagged_rows
-
- @property
- def allow_quoted_newlines(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`.
- """
- return self._configuration.allow_quoted_newlines
-
- @property
- def autodetect(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.autodetect`.
- """
- return self._configuration.autodetect
-
- @property
- def create_disposition(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.create_disposition`.
- """
- return self._configuration.create_disposition
-
- @property
- def encoding(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.encoding`.
- """
- return self._configuration.encoding
-
- @property
- def field_delimiter(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.field_delimiter`.
- """
- return self._configuration.field_delimiter
-
- @property
- def ignore_unknown_values(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`.
- """
- return self._configuration.ignore_unknown_values
-
- @property
- def max_bad_records(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.max_bad_records`.
- """
- return self._configuration.max_bad_records
-
- @property
- def null_marker(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.null_marker`.
- """
- return self._configuration.null_marker
-
- @property
- def quote_character(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.quote_character`.
- """
- return self._configuration.quote_character
-
- @property
- def skip_leading_rows(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`.
- """
- return self._configuration.skip_leading_rows
-
- @property
- def source_format(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.source_format`.
- """
- return self._configuration.source_format
-
- @property
- def write_disposition(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.write_disposition`.
- """
- return self._configuration.write_disposition
-
- @property
- def schema(self):
- """See
- :class:`~google.cloud.bigquery.job.LoadJobConfig.schema`.
- """
- return self._configuration.schema
-
- @property
- def input_file_bytes(self):
- """Count of bytes loaded from source files.
-
- :rtype: int, or ``NoneType``
- :returns: the count (None until set from the server).
- """
- statistics = self._properties.get('statistics')
- if statistics is not None:
- return int(statistics['load']['inputFileBytes'])
-
- @property
- def input_files(self):
- """Count of source files.
-
- :rtype: int, or ``NoneType``
- :returns: the count (None until set from the server).
- """
- statistics = self._properties.get('statistics')
- if statistics is not None:
- return int(statistics['load']['inputFiles'])
-
- @property
- def output_bytes(self):
- """Count of bytes saved to destination table.
-
- :rtype: int, or ``NoneType``
- :returns: the count (None until set from the server).
- """
- statistics = self._properties.get('statistics')
- if statistics is not None:
- return int(statistics['load']['outputBytes'])
-
- @property
- def output_rows(self):
- """Count of rows saved to destination table.
-
- :rtype: int, or ``NoneType``
- :returns: the count (None until set from the server).
- """
- statistics = self._properties.get('statistics')
- if statistics is not None:
- return int(statistics['load']['outputRows'])
-
- def _build_resource(self):
- """Generate a resource for :meth:`begin`."""
- configuration = self._configuration.to_api_repr()
- if self.source_uris is not None:
- configuration['sourceUris'] = self.source_uris
- configuration['destinationTable'] = self.destination.to_api_repr()
-
- return {
- 'jobReference': {
- 'projectId': self.project,
- 'jobId': self.job_id,
- },
- 'configuration': {
- self._JOB_TYPE: configuration,
- },
- }
-
- def _copy_configuration_properties(self, configuration):
- """Helper: assign subclass configuration properties in cleaned."""
- self._configuration._properties = copy.deepcopy(configuration)
-
- @classmethod
- def from_api_repr(cls, resource, client):
- """Factory: construct a job given its API representation
-
- .. note:
-
- This method assumes that the project found in the resource matches
- the client's project.
-
- :type resource: dict
- :param resource: dataset job representation returned from the API
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: Client which holds credentials and project
- configuration for the dataset.
-
- :rtype: :class:`google.cloud.bigquery.job.LoadJob`
- :returns: Job parsed from ``resource``.
- """
- job_id, config_resource = cls._get_resource_config(resource)
- config = LoadJobConfig.from_api_repr(config_resource)
- dest_config = config_resource['destinationTable']
- ds_ref = DatasetReference(dest_config['projectId'],
- dest_config['datasetId'],)
- destination = TableReference(ds_ref, dest_config['tableId'])
- # sourceUris will be absent if this is a file upload.
- source_uris = config_resource.get('sourceUris')
- job = cls(job_id, source_uris, destination, client, config)
- job._set_properties(resource)
- return job
-
-
-class CopyJobConfig(object):
- """Configuration options for copy jobs.
-
- All properties in this class are optional. Values which are ``None`` ->
- server defaults.
- """
-
- def __init__(self):
- self._properties = {}
-
- create_disposition = CreateDisposition('create_disposition',
- 'createDisposition')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition
- """
-
- write_disposition = WriteDisposition('write_disposition',
- 'writeDisposition')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition
- """
-
- def to_api_repr(self):
- """Build an API representation of the copy job config.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- return copy.deepcopy(self._properties)
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a job configuration given its API representation
-
- :type resource: dict
- :param resource:
- An extract job configuration in the same representation as is
- returned from the API.
-
- :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
- :returns: Configuration parsed from ``resource``.
- """
- config = cls()
- config._properties = copy.deepcopy(resource)
- return config
-
-
-class CopyJob(_AsyncJob):
- """Asynchronous job: copy data into a table from other tables.
-
- :type job_id: str
- :param job_id: the job's ID, within the project belonging to ``client``.
-
- :type sources: list of :class:`google.cloud.bigquery.table.TableReference`
- :param sources: Table into which data is to be loaded.
-
- :type destination: :class:`google.cloud.bigquery.table.TableReference`
- :param destination: Table into which data is to be loaded.
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: A client which holds credentials and project configuration
- for the dataset (which requires a project).
-
- :type job_config: :class:`~google.cloud.bigquery.job.CopyJobConfig`
- :param job_config:
- (Optional) Extra configuration options for the copy job.
- """
- _JOB_TYPE = 'copy'
-
- def __init__(self, job_id, sources, destination, client, job_config=None):
- super(CopyJob, self).__init__(job_id, client)
-
- if job_config is None:
- job_config = CopyJobConfig()
-
- self.destination = destination
- self.sources = sources
- self._configuration = job_config
-
- @property
- def create_disposition(self):
- """See
- :class:`~google.cloud.bigquery.job.CopyJobConfig.create_disposition`.
- """
- return self._configuration.create_disposition
-
- @property
- def write_disposition(self):
- """See
- :class:`~google.cloud.bigquery.job.CopyJobConfig.write_disposition`.
- """
- return self._configuration.write_disposition
-
- def _build_resource(self):
- """Generate a resource for :meth:`begin`."""
-
- source_refs = [{
- 'projectId': table.project,
- 'datasetId': table.dataset_id,
- 'tableId': table.table_id,
- } for table in self.sources]
-
- configuration = self._configuration.to_api_repr()
- configuration['sourceTables'] = source_refs
- configuration['destinationTable'] = {
- 'projectId': self.destination.project,
- 'datasetId': self.destination.dataset_id,
- 'tableId': self.destination.table_id,
- }
-
- return {
- 'jobReference': {
- 'projectId': self.project,
- 'jobId': self.job_id,
- },
- 'configuration': {
- self._JOB_TYPE: configuration,
- },
- }
-
- def _copy_configuration_properties(self, configuration):
- """Helper: assign subclass configuration properties in cleaned."""
- self._configuration._properties = copy.deepcopy(configuration)
-
- @classmethod
- def from_api_repr(cls, resource, client):
- """Factory: construct a job given its API representation
-
- .. note:
-
- This method assumes that the project found in the resource matches
- the client's project.
-
- :type resource: dict
- :param resource: dataset job representation returned from the API
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: Client which holds credentials and project
- configuration for the dataset.
-
- :rtype: :class:`google.cloud.bigquery.job.CopyJob`
- :returns: Job parsed from ``resource``.
- """
- job_id, config_resource = cls._get_resource_config(resource)
- config = CopyJobConfig.from_api_repr(config_resource)
- destination = TableReference.from_api_repr(
- config_resource['destinationTable'])
- sources = []
- source_configs = config_resource.get('sourceTables')
- if source_configs is None:
- single = config_resource.get('sourceTable')
- if single is None:
- raise KeyError(
- "Resource missing 'sourceTables' / 'sourceTable'")
- source_configs = [single]
- for source_config in source_configs:
- table_ref = TableReference.from_api_repr(source_config)
- sources.append(table_ref)
- job = cls(
- job_id, sources, destination, client=client, job_config=config)
- job._set_properties(resource)
- return job
-
-
-class ExtractJobConfig(object):
- """Configuration options for extract jobs.
-
- All properties in this class are optional. Values which are ``None`` ->
- server defaults.
- """
-
- def __init__(self):
- self._properties = {}
-
- compression = Compression('compression', 'compression')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression
- """
-
- destination_format = DestinationFormat(
- 'destination_format', 'destinationFormat')
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat
- """
-
- field_delimiter = _TypedApiResourceProperty(
- 'field_delimiter', 'fieldDelimiter', six.string_types)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter
- """
-
- print_header = _TypedApiResourceProperty(
- 'print_header', 'printHeader', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader
- """
-
- def to_api_repr(self):
- """Build an API representation of the extract job config.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- return copy.deepcopy(self._properties)
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a job configuration given its API representation
-
- :type resource: dict
- :param resource:
- An extract job configuration in the same representation as is
- returned from the API.
-
- :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
- :returns: Configuration parsed from ``resource``.
- """
- config = cls()
- config._properties = copy.deepcopy(resource)
- return config
-
-
-class ExtractJob(_AsyncJob):
- """Asynchronous job: extract data from a table into Cloud Storage.
-
- :type job_id: str
- :param job_id: the job's ID
-
- :type source: :class:`google.cloud.bigquery.table.TableReference`
- :param source: Table into which data is to be loaded.
-
- :type destination_uris: list of string
- :param destination_uris:
- URIs describing where the extracted data will be written in Cloud
- Storage, using the format ``gs://<bucket_name>/<object_name_or_glob>``.
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client:
- A client which holds credentials and project configuration.
-
- :type job_config: :class:`~google.cloud.bigquery.job.ExtractJobConfig`
- :param job_config:
- (Optional) Extra configuration options for the extract job.
- """
- _JOB_TYPE = 'extract'
-
- def __init__(
- self, job_id, source, destination_uris, client, job_config=None):
- super(ExtractJob, self).__init__(job_id, client)
-
- if job_config is None:
- job_config = ExtractJobConfig()
-
- self.source = source
- self.destination_uris = destination_uris
- self._configuration = job_config
-
- @property
- def compression(self):
- """See
- :class:`~google.cloud.bigquery.job.ExtractJobConfig.compression`.
- """
- return self._configuration.compression
-
- @property
- def destination_format(self):
- """See
- :class:`~google.cloud.bigquery.job.ExtractJobConfig.destination_format`.
- """
- return self._configuration.destination_format
-
- @property
- def field_delimiter(self):
- """See
- :class:`~google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`.
- """
- return self._configuration.field_delimiter
-
- @property
- def print_header(self):
- """See
- :class:`~google.cloud.bigquery.job.ExtractJobConfig.print_header`.
- """
- return self._configuration.print_header
-
- @property
- def destination_uri_file_counts(self):
- """Return file counts from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts
-
- :rtype: int or None
- :returns: number of DML rows affectd by the job, or None if job is not
- yet complete.
- """
- result = self._job_statistics().get('destinationUriFileCounts')
- if result is not None:
- result = int(result)
- return result
-
- def _build_resource(self):
- """Generate a resource for :meth:`begin`."""
-
- source_ref = {
- 'projectId': self.source.project,
- 'datasetId': self.source.dataset_id,
- 'tableId': self.source.table_id,
- }
-
- configuration = self._configuration.to_api_repr()
- configuration['sourceTable'] = source_ref
- configuration['destinationUris'] = self.destination_uris
-
- resource = {
- 'jobReference': {
- 'projectId': self.project,
- 'jobId': self.job_id,
- },
- 'configuration': {
- self._JOB_TYPE: configuration,
- },
- }
-
- return resource
-
- def _copy_configuration_properties(self, configuration):
- """Helper: assign subclass configuration properties in cleaned."""
- self._configuration._properties = copy.deepcopy(configuration)
-
- @classmethod
- def from_api_repr(cls, resource, client):
- """Factory: construct a job given its API representation
-
- .. note:
-
- This method assumes that the project found in the resource matches
- the client's project.
-
- :type resource: dict
- :param resource: dataset job representation returned from the API
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: Client which holds credentials and project
- configuration for the dataset.
-
- :rtype: :class:`google.cloud.bigquery.job.ExtractJob`
- :returns: Job parsed from ``resource``.
- """
- job_id, config_resource = cls._get_resource_config(resource)
- config = ExtractJobConfig.from_api_repr(config_resource)
- source_config = config_resource['sourceTable']
- dataset = DatasetReference(
- source_config['projectId'], source_config['datasetId'])
- source = dataset.table(source_config['tableId'])
- destination_uris = config_resource['destinationUris']
-
- job = cls(
- job_id, source, destination_uris, client=client, job_config=config)
- job._set_properties(resource)
- return job
-
-
-def _from_api_repr_query_parameters(resource):
- return [
- _query_param_from_api_repr(mapping)
- for mapping in resource
- ]
-
-
-def _to_api_repr_query_parameters(value):
- return [
- query_parameter.to_api_repr()
- for query_parameter in value
- ]
-
-
-def _from_api_repr_udf_resources(resource):
- udf_resources = []
- for udf_mapping in resource:
- for udf_type, udf_value in udf_mapping.items():
- udf_resources.append(UDFResource(udf_type, udf_value))
- return udf_resources
-
-
-def _to_api_repr_udf_resources(value):
- return [
- {udf_resource.udf_type: udf_resource.value}
- for udf_resource in value
- ]
-
-
-def _from_api_repr_table_defs(resource):
- return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()}
-
-
-def _to_api_repr_table_defs(value):
- return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()}
-
-
-class QueryJobConfig(object):
- """Configuration options for query jobs.
-
- All properties in this class are optional. Values which are ``None`` ->
- server defaults.
- """
-
- _QUERY_PARAMETERS_KEY = 'queryParameters'
- _UDF_RESOURCES_KEY = 'userDefinedFunctionResources'
-
- def __init__(self):
- self._properties = {}
-
- def to_api_repr(self):
- """Build an API representation of the copy job config.
-
- :rtype: dict
- :returns: A dictionary in the format used by the BigQuery API.
- """
- resource = copy.deepcopy(self._properties)
-
- # Query parameters have an addition property associated with them
- # to indicate if the query is using named or positional parameters.
- query_parameters = resource.get(self._QUERY_PARAMETERS_KEY)
- if query_parameters:
- if query_parameters[0].name is None:
- resource['parameterMode'] = 'POSITIONAL'
- else:
- resource['parameterMode'] = 'NAMED'
-
- for prop, convert in self._NESTED_PROPERTIES.items():
- _, to_resource = convert
- nested_resource = resource.get(prop)
- if nested_resource is not None:
- resource[prop] = to_resource(nested_resource)
-
- return resource
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a job configuration given its API representation
-
- :type resource: dict
- :param resource:
- An extract job configuration in the same representation as is
- returned from the API.
-
- :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
- :returns: Configuration parsed from ``resource``.
- """
- config = cls()
- config._properties = copy.deepcopy(resource)
-
- for prop, convert in cls._NESTED_PROPERTIES.items():
- from_resource, _ = convert
- nested_resource = resource.get(prop)
- if nested_resource is not None:
- config._properties[prop] = from_resource(nested_resource)
-
- return config
-
- allow_large_results = _TypedApiResourceProperty(
- 'allow_large_results', 'allowLargeResults', bool)
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults
- """
-
- create_disposition = CreateDisposition(
- 'create_disposition', 'createDisposition')
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition
- """
-
- default_dataset = _TypedApiResourceProperty(
- 'default_dataset', 'defaultDataset', DatasetReference)
- """See
- https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset
- """
-
- destination = _TypedApiResourceProperty(
- 'destination', 'destinationTable', TableReference)
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable
- """
-
- dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool)
- """See
- https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun
- """
-
- flatten_results = _TypedApiResourceProperty(
- 'flatten_results', 'flattenResults', bool)
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults
- """
-
- maximum_billing_tier = _TypedApiResourceProperty(
- 'maximum_billing_tier', 'maximumBillingTier', int)
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier
- """
-
- maximum_bytes_billed = _TypedApiResourceProperty(
- 'maximum_bytes_billed', 'maximumBytesBilled', int)
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled
- """
-
- priority = QueryPriority('priority', 'priority')
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority
- """
-
- query_parameters = _ListApiResourceProperty(
- 'query_parameters', _QUERY_PARAMETERS_KEY, _AbstractQueryParameter)
- """
- A list of
- :class:`google.cloud.bigquery.query.ArrayQueryParameter`,
- :class:`google.cloud.bigquery.query.ScalarQueryParameter`, or
- :class:`google.cloud.bigquery.query.StructQueryParameter`
- (empty by default)
-
- See:
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters
- """
-
- udf_resources = _ListApiResourceProperty(
- 'udf_resources', _UDF_RESOURCES_KEY, UDFResource)
- """
- A list of :class:`google.cloud.bigquery.query.UDFResource` (empty
- by default)
-
- See:
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources
- """
-
- use_legacy_sql = _TypedApiResourceProperty(
- 'use_legacy_sql', 'useLegacySql', bool)
- """See
- https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql
- """
-
- use_query_cache = _TypedApiResourceProperty(
- 'use_query_cache', 'useQueryCache', bool)
- """See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache
- """
-
- write_disposition = WriteDisposition(
- 'write_disposition', 'writeDisposition')
- """See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition
- """
-
- table_definitions = _TypedApiResourceProperty(
- 'table_definitions', 'tableDefinitions', dict)
- """
- Definitions for external tables. A dictionary from table names (strings)
- to :class:`google.cloud.bigquery.external_config.ExternalConfig`.
-
- See
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions
- """
-
- _maximum_billing_tier = None
- _maximum_bytes_billed = None
-
- _NESTED_PROPERTIES = {
- 'defaultDataset': (
- DatasetReference.from_api_repr, DatasetReference.to_api_repr),
- 'destinationTable': (
- TableReference.from_api_repr, TableReference.to_api_repr),
- 'maximumBytesBilled': (int, str),
- 'tableDefinitions': (_from_api_repr_table_defs,
- _to_api_repr_table_defs),
- _QUERY_PARAMETERS_KEY: (
- _from_api_repr_query_parameters, _to_api_repr_query_parameters),
- _UDF_RESOURCES_KEY: (
- _from_api_repr_udf_resources, _to_api_repr_udf_resources),
- }
-
-
-class QueryJob(_AsyncJob):
- """Asynchronous job: query tables.
-
- :type job_id: str
- :param job_id: the job's ID, within the project belonging to ``client``.
-
- :type query: str
- :param query: SQL query string
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: A client which holds credentials and project configuration
- for the dataset (which requires a project).
-
- :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig`
- :param job_config:
- (Optional) Extra configuration options for the query job.
- """
- _JOB_TYPE = 'query'
- _UDF_KEY = 'userDefinedFunctionResources'
-
- def __init__(self, job_id, query, client, job_config=None):
- super(QueryJob, self).__init__(job_id, client)
-
- if job_config is None:
- job_config = QueryJobConfig()
- if job_config.use_legacy_sql is None:
- job_config.use_legacy_sql = False
-
- self.query = query
- self._configuration = job_config
- self._query_results = None
- self._done_timeout = None
-
- @property
- def allow_large_results(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.allow_large_results`.
- """
- return self._configuration.allow_large_results
-
- @property
- def create_disposition(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.create_disposition`.
- """
- return self._configuration.create_disposition
-
- @property
- def default_dataset(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.default_dataset`.
- """
- return self._configuration.default_dataset
-
- @property
- def destination(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.destination`.
- """
- return self._configuration.destination
-
- @property
- def dry_run(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`.
- """
- return self._configuration.dry_run
-
- @property
- def flatten_results(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.flatten_results`.
- """
- return self._configuration.flatten_results
-
- @property
- def priority(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.priority`.
- """
- return self._configuration.priority
-
- @property
- def query_parameters(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.query_parameters`.
- """
- return self._configuration.query_parameters
-
- @property
- def udf_resources(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.udf_resources`.
- """
- return self._configuration.udf_resources
-
- @property
- def use_legacy_sql(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`.
- """
- return self._configuration.use_legacy_sql
-
- @property
- def use_query_cache(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.use_query_cache`.
- """
- return self._configuration.use_query_cache
-
- @property
- def write_disposition(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.write_disposition`.
- """
- return self._configuration.write_disposition
-
- @property
- def maximum_billing_tier(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`.
- """
- return self._configuration.maximum_billing_tier
-
- @property
- def maximum_bytes_billed(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`.
- """
- return self._configuration.maximum_bytes_billed
-
- @property
- def table_definitions(self):
- """See
- :class:`~google.cloud.bigquery.job.QueryJobConfig.table_definitions`.
- """
- return self._configuration.table_definitions
-
- def _build_resource(self):
- """Generate a resource for :meth:`begin`."""
- configuration = self._configuration.to_api_repr()
-
- resource = {
- 'jobReference': {
- 'projectId': self.project,
- 'jobId': self.job_id,
- },
- 'configuration': {
- self._JOB_TYPE: configuration,
- },
- }
-
- # The dryRun property only applies to query jobs, but it is defined at
- # a level higher up. We need to remove it from the query config.
- if 'dryRun' in configuration:
- dry_run = configuration['dryRun']
- del configuration['dryRun']
- resource['configuration']['dryRun'] = dry_run
-
- configuration['query'] = self.query
-
- return resource
-
- def _scrub_local_properties(self, cleaned):
- """Helper: handle subclass properties in cleaned.
-
- .. note:
-
- This method assumes that the project found in the resource matches
- the client's project.
- """
- configuration = cleaned['configuration']['query']
- self.query = configuration['query']
-
- # The dryRun property only applies to query jobs, but it is defined at
- # a level higher up. We need to copy it to the query config.
- self._configuration.dry_run = cleaned['configuration'].get('dryRun')
-
- def _copy_configuration_properties(self, configuration):
- """Helper: assign subclass configuration properties in cleaned."""
- # The dryRun property only applies to query jobs, but it is defined at
- # a level higher up. We need to copy it to the query config.
- # It should already be correctly set by the _scrub_local_properties()
- # method.
- dry_run = self.dry_run
- self._configuration = QueryJobConfig.from_api_repr(configuration)
- self._configuration.dry_run = dry_run
-
- @classmethod
- def from_api_repr(cls, resource, client):
- """Factory: construct a job given its API representation
-
- :type resource: dict
- :param resource: dataset job representation returned from the API
-
- :type client: :class:`google.cloud.bigquery.client.Client`
- :param client: Client which holds credentials and project
- configuration for the dataset.
-
- :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob`
- :returns: Job parsed from ``resource``.
- """
- job_id, config = cls._get_resource_config(resource)
- query = config['query']
- job = cls(job_id, query, client=client)
- job._set_properties(resource)
- return job
-
- @property
- def query_plan(self):
- """Return query plan from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.queryPlan
-
- :rtype: list of :class:`QueryPlanEntry`
- :returns: mappings describing the query plan, or an empty list
- if the query has not yet completed.
- """
- plan_entries = self._job_statistics().get('queryPlan', ())
- return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries]
-
- @property
- def total_bytes_processed(self):
- """Return total bytes processed from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed
-
- :rtype: int or None
- :returns: total bytes processed by the job, or None if job is not
- yet complete.
- """
- result = self._job_statistics().get('totalBytesProcessed')
- if result is not None:
- result = int(result)
- return result
-
- @property
- def total_bytes_billed(self):
- """Return total bytes billed from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled
-
- :rtype: int or None
- :returns: total bytes processed by the job, or None if job is not
- yet complete.
- """
- result = self._job_statistics().get('totalBytesBilled')
- if result is not None:
- result = int(result)
- return result
-
- @property
- def billing_tier(self):
- """Return billing tier from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier
-
- :rtype: int or None
- :returns: billing tier used by the job, or None if job is not
- yet complete.
- """
- return self._job_statistics().get('billingTier')
-
- @property
- def cache_hit(self):
- """Return billing tier from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit
-
- :rtype: bool or None
- :returns: whether the query results were returned from cache, or None
- if job is not yet complete.
- """
- return self._job_statistics().get('cacheHit')
-
- @property
- def num_dml_affected_rows(self):
- """Return total bytes billed from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows
-
- :rtype: int or None
- :returns: number of DML rows affectd by the job, or None if job is not
- yet complete.
- """
- result = self._job_statistics().get('numDmlAffectedRows')
- if result is not None:
- result = int(result)
- return result
-
- @property
- def statement_type(self):
- """Return statement type from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType
-
- :rtype: str or None
- :returns: type of statement used by the job, or None if job is not
- yet complete.
- """
- return self._job_statistics().get('statementType')
-
- @property
- def referenced_tables(self):
- """Return referenced tables from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables
-
- :rtype: list of dict
- :returns: mappings describing the query plan, or an empty list
- if the query has not yet completed.
- """
- tables = []
- datasets_by_project_name = {}
-
- for table in self._job_statistics().get('referencedTables', ()):
-
- t_project = table['projectId']
-
- ds_id = table['datasetId']
- t_dataset = datasets_by_project_name.get((t_project, ds_id))
- if t_dataset is None:
- t_dataset = DatasetReference(t_project, ds_id)
- datasets_by_project_name[(t_project, ds_id)] = t_dataset
-
- t_name = table['tableId']
- tables.append(t_dataset.table(t_name))
-
- return tables
-
- @property
- def undeclared_query_paramters(self):
- """Return undeclared query parameters from job statistics, if present.
-
- See:
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParamters
-
- :rtype:
- list of
- :class:`~google.cloud.bigquery.query.ArrayQueryParameter`,
- :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, or
- :class:`~google.cloud.bigquery.query.StructQueryParameter`
- :returns: undeclared parameters, or an empty list if the query has
- not yet completed.
- """
- parameters = []
- undeclared = self._job_statistics().get('undeclaredQueryParamters', ())
-
- for parameter in undeclared:
- p_type = parameter['parameterType']
-
- if 'arrayType' in p_type:
- klass = ArrayQueryParameter
- elif 'structTypes' in p_type:
- klass = StructQueryParameter
- else:
- klass = ScalarQueryParameter
-
- parameters.append(klass.from_api_repr(parameter))
-
- return parameters
-
- def query_results(self, retry=DEFAULT_RETRY):
- """Construct a QueryResults instance, bound to this job.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the RPC.
-
- :rtype: :class:`~google.cloud.bigquery.query.QueryResults`
- :returns: results instance
- """
- if not self._query_results:
- self._query_results = self._client._get_query_results(
- self.job_id, retry, project=self.project)
- return self._query_results
-
- def done(self, retry=DEFAULT_RETRY):
- """Refresh the job and checks if it is complete.
-
- :rtype: bool
- :returns: True if the job is complete, False otherwise.
- """
- # Since the API to getQueryResults can hang up to the timeout value
- # (default of 10 seconds), set the timeout parameter to ensure that
- # the timeout from the futures API is respected. See:
- # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135
- timeout_ms = None
- if self._done_timeout is not None:
- # Subtract a buffer for context switching, network latency, etc.
- timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS
- timeout = max(min(timeout, 10), 0)
- self._done_timeout -= timeout
- self._done_timeout = max(0, self._done_timeout)
- timeout_ms = int(timeout * 1000)
-
- # Do not refresh is the state is already done, as the job will not
- # change once complete.
- if self.state != _DONE_STATE:
- self._query_results = self._client._get_query_results(
- self.job_id, retry,
- project=self.project, timeout_ms=timeout_ms)
-
- # Only reload the job once we know the query is complete.
- # This will ensure that fields such as the destination table are
- # correctly populated.
- if self._query_results.complete:
- self.reload(retry=retry)
-
- return self.state == _DONE_STATE
-
- def _blocking_poll(self, timeout=None):
- self._done_timeout = timeout
- super(QueryJob, self)._blocking_poll(timeout=timeout)
-
- def result(self, timeout=None, retry=DEFAULT_RETRY):
- """Start the job and wait for it to complete and get the result.
-
- :type timeout: float
- :param timeout:
- How long to wait for job to complete before raising a
- :class:`TimeoutError`.
-
- :type retry: :class:`google.api_core.retry.Retry`
- :param retry: (Optional) How to retry the call that retrieves rows.
-
- :rtype: :class:`~google.api_core.page_iterator.Iterator`
- :returns:
- Iterator of row data :class:`tuple`s. During each page, the
- iterator will have the ``total_rows`` attribute set, which counts
- the total number of rows **in the result set** (this is distinct
- from the total number of rows in the current page:
- ``iterator.page.num_items``).
-
- :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job
- failed or :class:`TimeoutError` if the job did not complete in the
- given timeout.
- """
- super(QueryJob, self).result(timeout=timeout)
- # Return an iterator instead of returning the job.
- schema = self.query_results().schema
- dest_table = self.destination
- return self._client.list_rows(dest_table, selected_fields=schema,
- retry=retry)
-
-
-class QueryPlanEntryStep(object):
- """Map a single step in a query plan entry.
-
- :type kind: str
- :param kind: step type
-
- :type substeps:
- :param substeps: names of substeps
- """
- def __init__(self, kind, substeps):
- self.kind = kind
- self.substeps = list(substeps)
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct instance from the JSON repr.
-
- :type resource: dict
- :param resource: JSON representation of the entry
-
- :rtype: :class:`QueryPlanEntryStep`
- :return: new instance built from the resource
- """
- return cls(
- kind=resource.get('kind'),
- substeps=resource.get('substeps', ()),
- )
-
- def __eq__(self, other):
- if not isinstance(other, self.__class__):
- return NotImplemented
- return self.kind == other.kind and self.substeps == other.substeps
-
-
-class QueryPlanEntry(object):
- """Map a single entry in a query plan.
-
- :type name: str
- :param name: name of the entry
-
- :type entry_id: int
- :param entry_id: ID of the entry
-
- :type wait_ratio_avg: float
- :param wait_ratio_avg: average wait ratio
-
- :type wait_ratio_max: float
- :param wait_ratio_avg: maximum wait ratio
-
- :type read_ratio_avg: float
- :param read_ratio_avg: average read ratio
-
- :type read_ratio_max: float
- :param read_ratio_avg: maximum read ratio
-
- :type copute_ratio_avg: float
- :param copute_ratio_avg: average copute ratio
-
- :type copute_ratio_max: float
- :param copute_ratio_avg: maximum copute ratio
-
- :type write_ratio_avg: float
- :param write_ratio_avg: average write ratio
-
- :type write_ratio_max: float
- :param write_ratio_avg: maximum write ratio
-
- :type records_read: int
- :param records_read: number of records read
-
- :type records_written: int
- :param records_written: number of records written
-
- :type status: str
- :param status: entry status
-
- :type steps: List(QueryPlanEntryStep)
- :param steps: steps in the entry
- """
- def __init__(self,
- name,
- entry_id,
- wait_ratio_avg,
- wait_ratio_max,
- read_ratio_avg,
- read_ratio_max,
- compute_ratio_avg,
- compute_ratio_max,
- write_ratio_avg,
- write_ratio_max,
- records_read,
- records_written,
- status,
- steps):
- self.name = name
- self.entry_id = entry_id
- self.wait_ratio_avg = wait_ratio_avg
- self.wait_ratio_max = wait_ratio_max
- self.read_ratio_avg = read_ratio_avg
- self.read_ratio_max = read_ratio_max
- self.compute_ratio_avg = compute_ratio_avg
- self.compute_ratio_max = compute_ratio_max
- self.write_ratio_avg = write_ratio_avg
- self.write_ratio_max = write_ratio_max
- self.records_read = records_read
- self.records_written = records_written
- self.status = status
- self.steps = steps
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct instance from the JSON repr.
-
- :type resource: dict
- :param resource: JSON representation of the entry
-
- :rtype: :class:`QueryPlanEntry`
- :return: new instance built from the resource
- """
- records_read = resource.get('recordsRead')
- if records_read is not None:
- records_read = int(records_read)
-
- records_written = resource.get('recordsWritten')
- if records_written is not None:
- records_written = int(records_written)
-
- return cls(
- name=resource.get('name'),
- entry_id=resource.get('id'),
- wait_ratio_avg=resource.get('waitRatioAvg'),
- wait_ratio_max=resource.get('waitRatioMax'),
- read_ratio_avg=resource.get('readRatioAvg'),
- read_ratio_max=resource.get('readRatioMax'),
- compute_ratio_avg=resource.get('computeRatioAvg'),
- compute_ratio_max=resource.get('computeRatioMax'),
- write_ratio_avg=resource.get('writeRatioAvg'),
- write_ratio_max=resource.get('writeRatioMax'),
- records_read=records_read,
- records_written=records_written,
- status=resource.get('status'),
- steps=[QueryPlanEntryStep.from_api_repr(step)
- for step in resource.get('steps', ())],
- )
diff --git a/bigquery/google/cloud/bigquery/query.py b/bigquery/google/cloud/bigquery/query.py
deleted file mode 100644
index 9577fa5..0000000
--- a/bigquery/google/cloud/bigquery/query.py
+++ /dev/null
@@ -1,633 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""BigQuery query processing."""
-
-from collections import OrderedDict
-import copy
-
-from google.cloud.bigquery.table import _parse_schema_resource
-from google.cloud.bigquery._helpers import _rows_from_json
-from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON
-from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM
-
-
-class UDFResource(object):
- """Describe a single user-defined function (UDF) resource.
-
- :type udf_type: str
- :param udf_type: the type of the resource ('inlineCode' or 'resourceUri')
-
- :type value: str
- :param value: the inline code or resource URI.
-
- See
- https://cloud.google.com/bigquery/user-defined-functions#api
- """
- def __init__(self, udf_type, value):
- self.udf_type = udf_type
- self.value = value
-
- def __eq__(self, other):
- if not isinstance(other, UDFResource):
- return NotImplemented
- return(
- self.udf_type == other.udf_type and
- self.value == other.value)
-
- def __ne__(self, other):
- return not self == other
-
-
-class _AbstractQueryParameter(object):
- """Base class for named / positional query parameters.
- """
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct parameter from JSON resource.
-
- :type resource: dict
- :param resource: JSON mapping of parameter
-
- :rtype: :class:`ScalarQueryParameter`
- """
- raise NotImplementedError
-
- def to_api_repr(self):
- """Construct JSON API representation for the parameter.
-
- :rtype: dict
- """
- raise NotImplementedError
-
-
-class ScalarQueryParameter(_AbstractQueryParameter):
- """Named / positional query parameters for scalar values.
-
- :type name: str or None
- :param name: Parameter name, used via ``@foo`` syntax. If None, the
- parameter can only be addressed via position (``?``).
-
- :type type_: str
- :param type_: name of parameter type. One of 'STRING', 'INT64',
- 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'.
-
- :type value: str, int, float, bool, :class:`datetime.datetime`, or
- :class:`datetime.date`.
- :param value: the scalar parameter value.
- """
- def __init__(self, name, type_, value):
- self.name = name
- self.type_ = type_
- self.value = value
-
- @classmethod
- def positional(cls, type_, value):
- """Factory for positional paramater.
-
- :type type_: str
- :param type_:
- name of parameter type. One of 'STRING', 'INT64',
- 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'.
-
- :type value: str, int, float, bool, :class:`datetime.datetime`, or
- :class:`datetime.date`.
- :param value: the scalar parameter value.
-
- :rtype: :class:`ScalarQueryParameter`
- :returns: instance without name
- """
- return cls(None, type_, value)
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct parameter from JSON resource.
-
- :type resource: dict
- :param resource: JSON mapping of parameter
-
- :rtype: :class:`ScalarQueryParameter`
- :returns: instance
- """
- name = resource.get('name')
- type_ = resource['parameterType']['type']
- value = resource['parameterValue']['value']
- converted = _QUERY_PARAMS_FROM_JSON[type_](value, None)
- return cls(name, type_, converted)
-
- def to_api_repr(self):
- """Construct JSON API representation for the parameter.
-
- :rtype: dict
- :returns: JSON mapping
- """
- value = self.value
- converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_)
- if converter is not None:
- value = converter(value)
- resource = {
- 'parameterType': {
- 'type': self.type_,
- },
- 'parameterValue': {
- 'value': value,
- },
- }
- if self.name is not None:
- resource['name'] = self.name
- return resource
-
- def _key(self):
- """A tuple key that uniquely describes this field.
-
- Used to compute this instance's hashcode and evaluate equality.
-
- Returns:
- tuple: The contents of this :class:`ScalarQueryParameter`.
- """
- return (
- self.name,
- self.type_.upper(),
- self.value,
- )
-
- def __eq__(self, other):
- if not isinstance(other, ScalarQueryParameter):
- return NotImplemented
- return self._key() == other._key()
-
- def __ne__(self, other):
- return not self == other
-
- def __repr__(self):
- return 'ScalarQueryParameter{}'.format(self._key())
-
-
-class ArrayQueryParameter(_AbstractQueryParameter):
- """Named / positional query parameters for array values.
-
- :type name: str or None
- :param name: Parameter name, used via ``@foo`` syntax. If None, the
- parameter can only be addressed via position (``?``).
-
- :type array_type: str
- :param array_type:
- name of type of array elements. One of `'STRING'`, `'INT64'`,
- `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
-
- :type values: list of appropriate scalar type.
- :param values: the parameter array values.
- """
- def __init__(self, name, array_type, values):
- self.name = name
- self.array_type = array_type
- self.values = values
-
- @classmethod
- def positional(cls, array_type, values):
- """Factory for positional parameters.
-
- :type array_type: str
- :param array_type:
- name of type of array elements. One of `'STRING'`, `'INT64'`,
- `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
-
- :type values: list of appropriate scalar type
- :param values: the parameter array values.
-
- :rtype: :class:`ArrayQueryParameter`
- :returns: instance without name
- """
- return cls(None, array_type, values)
-
- @classmethod
- def _from_api_repr_struct(cls, resource):
- name = resource.get('name')
- converted = []
- # We need to flatten the array to use the StructQueryParameter
- # parse code.
- resource_template = {
- # The arrayType includes all the types of the fields of the STRUCT
- 'parameterType': resource['parameterType']['arrayType']
- }
- for array_value in resource['parameterValue']['arrayValues']:
- struct_resource = copy.deepcopy(resource_template)
- struct_resource['parameterValue'] = array_value
- struct_value = StructQueryParameter.from_api_repr(struct_resource)
- converted.append(struct_value)
- return cls(name, 'STRUCT', converted)
-
- @classmethod
- def _from_api_repr_scalar(cls, resource):
- name = resource.get('name')
- array_type = resource['parameterType']['arrayType']['type']
- values = [
- value['value']
- for value
- in resource['parameterValue']['arrayValues']]
- converted = [
- _QUERY_PARAMS_FROM_JSON[array_type](value, None)
- for value in values
- ]
- return cls(name, array_type, converted)
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct parameter from JSON resource.
-
- :type resource: dict
- :param resource: JSON mapping of parameter
-
- :rtype: :class:`ArrayQueryParameter`
- :returns: instance
- """
- array_type = resource['parameterType']['arrayType']['type']
- if array_type == 'STRUCT':
- return cls._from_api_repr_struct(resource)
- return cls._from_api_repr_scalar(resource)
-
- def to_api_repr(self):
- """Construct JSON API representation for the parameter.
-
- :rtype: dict
- :returns: JSON mapping
- """
- values = self.values
- if self.array_type == 'RECORD' or self.array_type == 'STRUCT':
- reprs = [value.to_api_repr() for value in values]
- a_type = reprs[0]['parameterType']
- a_values = [repr_['parameterValue'] for repr_ in reprs]
- else:
- a_type = {'type': self.array_type}
- converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type)
- if converter is not None:
- values = [converter(value) for value in values]
- a_values = [{'value': value} for value in values]
- resource = {
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': a_type,
- },
- 'parameterValue': {
- 'arrayValues': a_values,
- },
- }
- if self.name is not None:
- resource['name'] = self.name
- return resource
-
- def _key(self):
- """A tuple key that uniquely describes this field.
-
- Used to compute this instance's hashcode and evaluate equality.
-
- Returns:
- tuple: The contents of this :class:`ArrayQueryParameter`.
- """
- return (
- self.name,
- self.array_type.upper(),
- self.values,
- )
-
- def __eq__(self, other):
- if not isinstance(other, ArrayQueryParameter):
- return NotImplemented
- return self._key() == other._key()
-
- def __ne__(self, other):
- return not self == other
-
- def __repr__(self):
- return 'ArrayQueryParameter{}'.format(self._key())
-
-
-class StructQueryParameter(_AbstractQueryParameter):
- """Named / positional query parameters for struct values.
-
- :type name: str or None
- :param name: Parameter name, used via ``@foo`` syntax. If None, the
- parameter can only be addressed via position (``?``).
-
- :type sub_params: tuple of :class:`ScalarQueryParameter`
- :param sub_params: the sub-parameters for the struct
- """
- def __init__(self, name, *sub_params):
- self.name = name
- types = self.struct_types = OrderedDict()
- values = self.struct_values = {}
- for sub in sub_params:
- if isinstance(sub, self.__class__):
- types[sub.name] = 'STRUCT'
- values[sub.name] = sub
- elif isinstance(sub, ArrayQueryParameter):
- types[sub.name] = 'ARRAY'
- values[sub.name] = sub
- else:
- types[sub.name] = sub.type_
- values[sub.name] = sub.value
-
- @classmethod
- def positional(cls, *sub_params):
- """Factory for positional parameters.
-
- :type sub_params: tuple of :class:`ScalarQueryParameter`
- :param sub_params: the sub-parameters for the struct
-
- :rtype: :class:`StructQueryParameter`
- :returns: instance without name
- """
- return cls(None, *sub_params)
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct parameter from JSON resource.
-
- :type resource: dict
- :param resource: JSON mapping of parameter
-
- :rtype: :class:`StructQueryParameter`
- :returns: instance
- """
- name = resource.get('name')
- instance = cls(name)
- type_resources = {}
- types = instance.struct_types
- for item in resource['parameterType']['structTypes']:
- types[item['name']] = item['type']['type']
- type_resources[item['name']] = item['type']
- struct_values = resource['parameterValue']['structValues']
- for key, value in struct_values.items():
- type_ = types[key]
- converted = None
- if type_ == 'STRUCT':
- struct_resource = {
- 'name': key,
- 'parameterType': type_resources[key],
- 'parameterValue': value,
- }
- converted = StructQueryParameter.from_api_repr(struct_resource)
- elif type_ == 'ARRAY':
- struct_resource = {
- 'name': key,
- 'parameterType': type_resources[key],
- 'parameterValue': value,
- }
- converted = ArrayQueryParameter.from_api_repr(struct_resource)
- else:
- value = value['value']
- converted = _QUERY_PARAMS_FROM_JSON[type_](value, None)
- instance.struct_values[key] = converted
- return instance
-
- def to_api_repr(self):
- """Construct JSON API representation for the parameter.
-
- :rtype: dict
- :returns: JSON mapping
- """
- s_types = {}
- values = {}
- for name, value in self.struct_values.items():
- type_ = self.struct_types[name]
- if type_ in ('STRUCT', 'ARRAY'):
- repr_ = value.to_api_repr()
- s_types[name] = {'name': name, 'type': repr_['parameterType']}
- values[name] = repr_['parameterValue']
- else:
- s_types[name] = {'name': name, 'type': {'type': type_}}
- converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
- if converter is not None:
- value = converter(value)
- values[name] = {'value': value}
-
- resource = {
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [s_types[key] for key in self.struct_types],
- },
- 'parameterValue': {
- 'structValues': values,
- },
- }
- if self.name is not None:
- resource['name'] = self.name
- return resource
-
- def _key(self):
- """A tuple key that uniquely describes this field.
-
- Used to compute this instance's hashcode and evaluate equality.
-
- Returns:
- tuple: The contents of this :class:`ArrayQueryParameter`.
- """
- return (
- self.name,
- self.struct_types,
- self.struct_values,
- )
-
- def __eq__(self, other):
- if not isinstance(other, StructQueryParameter):
- return NotImplemented
- return self._key() == other._key()
-
- def __ne__(self, other):
- return not self == other
-
- def __repr__(self):
- return 'StructQueryParameter{}'.format(self._key())
-
-
-class QueryResults(object):
- """Results of a query.
-
- See:
- https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs/getQueryResults
- """
-
- def __init__(self, properties):
- self._properties = {}
- self._set_properties(properties)
-
- @classmethod
- def from_api_repr(cls, api_response):
- return cls(api_response)
-
- @property
- def project(self):
- """Project bound to the query job.
-
- :rtype: str
- :returns: the project that the query job is associated with.
- """
- return self._properties.get('jobReference', {}).get('projectId')
-
- @property
- def cache_hit(self):
- """Query results served from cache.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#cacheHit
-
- :rtype: bool or ``NoneType``
- :returns: True if the query results were served from cache (None
- until set by the server).
- """
- return self._properties.get('cacheHit')
-
- @property
- def complete(self):
- """Server completed query.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobComplete
-
- :rtype: bool or ``NoneType``
- :returns: True if the query completed on the server (None
- until set by the server).
- """
- return self._properties.get('jobComplete')
-
- @property
- def errors(self):
- """Errors generated by the query.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#errors
-
- :rtype: list of mapping, or ``NoneType``
- :returns: Mappings describing errors generated on the server (None
- until set by the server).
- """
- return self._properties.get('errors')
-
- @property
- def job_id(self):
- """Job ID of the query job these results are from.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference
-
- :rtype: string
- :returns: Job ID of the query job.
- """
- return self._properties.get('jobReference', {}).get('jobId')
-
- @property
- def page_token(self):
- """Token for fetching next bach of results.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#pageToken
-
- :rtype: str, or ``NoneType``
- :returns: Token generated on the server (None until set by the server).
- """
- return self._properties.get('pageToken')
-
- @property
- def total_rows(self):
- """Total number of rows returned by the query.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalRows
-
- :rtype: int, or ``NoneType``
- :returns: Count generated on the server (None until set by the server).
- """
- total_rows = self._properties.get('totalRows')
- if total_rows is not None:
- return int(total_rows)
-
- @property
- def total_bytes_processed(self):
- """Total number of bytes processed by the query.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalBytesProcessed
-
- :rtype: int, or ``NoneType``
- :returns: Count generated on the server (None until set by the server).
- """
- total_bytes_processed = self._properties.get('totalBytesProcessed')
- if total_bytes_processed is not None:
- return int(total_bytes_processed)
-
- @property
- def num_dml_affected_rows(self):
- """Total number of rows affected by a DML query.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#numDmlAffectedRows
-
- :rtype: int, or ``NoneType``
- :returns: Count generated on the server (None until set by the server).
- """
- num_dml_affected_rows = self._properties.get('numDmlAffectedRows')
- if num_dml_affected_rows is not None:
- return int(num_dml_affected_rows)
-
- @property
- def rows(self):
- """Query results.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows
-
- :rtype: list of :class:`~google.cloud.bigquery.Row`
- :returns: fields describing the schema (None until set by the server).
- """
- return _rows_from_json(self._properties.get('rows', ()), self.schema)
-
- @property
- def schema(self):
- """Schema for query results.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#schema
-
- :rtype: list of :class:`SchemaField`, or ``NoneType``
- :returns: fields describing the schema (None until set by the server).
- """
- return _parse_schema_resource(self._properties.get('schema', {}))
-
- def _set_properties(self, api_response):
- """Update properties from resource in body of ``api_response``
-
- :type api_response: dict
- :param api_response: response returned from an API call
- """
- job_id_present = (
- 'jobReference' in api_response
- and 'jobId' in api_response['jobReference']
- and 'projectId' in api_response['jobReference'])
- if not job_id_present:
- raise ValueError('QueryResult requires a job reference')
-
- self._properties.clear()
- self._properties.update(copy.deepcopy(api_response))
-
-
-def _query_param_from_api_repr(resource):
- """Helper: construct concrete query parameter from JSON resource."""
- qp_type = resource['parameterType']
- if 'arrayType' in qp_type:
- klass = ArrayQueryParameter
- elif 'structTypes' in qp_type:
- klass = StructQueryParameter
- else:
- klass = ScalarQueryParameter
- return klass.from_api_repr(resource)
diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py
deleted file mode 100644
index 1aa9527..0000000
--- a/bigquery/google/cloud/bigquery/schema.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Schemas for BigQuery tables / queries."""
-
-
-class SchemaField(object):
- """Describe a single field within a table schema.
-
- :type name: str
- :param name: the name of the field.
-
- :type field_type: str
- :param field_type: the type of the field (one of 'STRING', 'INTEGER',
- 'FLOAT', 'BOOLEAN', 'TIMESTAMP' or 'RECORD').
-
- :type mode: str
- :param mode: the mode of the field (one of 'NULLABLE', 'REQUIRED',
- or 'REPEATED').
-
- :type description: str
- :param description: optional description for the field.
-
- :type fields: tuple of :class:`SchemaField`
- :param fields: subfields (requires ``field_type`` of 'RECORD').
- """
- def __init__(self, name, field_type, mode='NULLABLE',
- description=None, fields=()):
- self._name = name
- self._field_type = field_type
- self._mode = mode
- self._description = description
- self._fields = tuple(fields)
-
- @classmethod
- def from_api_repr(cls, api_repr):
- """Return a ``SchemaField`` object deserialized from a dictionary.
-
- Args:
- api_repr (Mapping[str, str]): The serialized representation
- of the SchemaField, such as what is output by
- :meth:`to_api_repr`.
-
- Returns:
- SchemaField: The ``SchemaField`` object.
- """
- return cls(
- field_type=api_repr['type'].upper(),
- fields=[cls.from_api_repr(f) for f in api_repr.get('fields', ())],
- mode=api_repr['mode'].upper(),
- name=api_repr['name'],
- )
-
- @property
- def name(self):
- """str: The name of the field."""
- return self._name
-
- @property
- def field_type(self):
- """str: The type of the field.
-
- Will be one of 'STRING', 'INTEGER', 'FLOAT', 'BOOLEAN',
- 'TIMESTAMP' or 'RECORD'.
- """
- return self._field_type
-
- @property
- def mode(self):
- """str: The mode of the field.
-
- Will be one of 'NULLABLE', 'REQUIRED', or 'REPEATED'.
- """
- return self._mode
-
- @property
- def is_nullable(self):
- """Check whether 'mode' is 'nullable'."""
- return self._mode == 'NULLABLE'
-
- @property
- def description(self):
- """Optional[str]: Description for the field."""
- return self._description
-
- @property
- def fields(self):
- """tuple: Subfields contained in this field.
-
- If ``field_type`` is not 'RECORD', this property must be
- empty / unset.
- """
- return self._fields
-
- def to_api_repr(self):
- """Return a dictionary representing this schema field.
-
- Returns:
- dict: A dictionary representing the SchemaField in a serialized
- form.
- """
- # Put together the basic representation. See http://bit.ly/2hOAT5u.
- answer = {
- 'mode': self.mode.lower(),
- 'name': self.name,
- 'type': self.field_type.lower(),
- }
-
- # If this is a RECORD type, then sub-fields are also included,
- # add this to the serialized representation.
- if self.field_type.upper() == 'RECORD':
- answer['fields'] = [f.to_api_repr() for f in self.fields]
-
- # Done; return the serialized dictionary.
- return answer
-
- def _key(self):
- """A tuple key that uniquely describes this field.
-
- Used to compute this instance's hashcode and evaluate equality.
-
- Returns:
- tuple: The contents of this :class:`SchemaField`.
- """
- return (
- self._name,
- self._field_type.lower(),
- self._mode,
- self._description,
- self._fields,
- )
-
- def __eq__(self, other):
- if not isinstance(other, SchemaField):
- return NotImplemented
- return self._key() == other._key()
-
- def __ne__(self, other):
- return not self == other
-
- def __hash__(self):
- return hash(self._key())
-
- def __repr__(self):
- return 'SchemaField{}'.format(self._key())
-
-
-def _parse_schema_resource(info):
- """Parse a resource fragment into a schema field.
-
- :type info: mapping
- :param info: should contain a "fields" key to be parsed
-
- :rtype: list of :class:`SchemaField`, or ``NoneType``
- :returns: a list of parsed fields, or ``None`` if no "fields" key is
- present in ``info``.
- """
- if 'fields' not in info:
- return ()
-
- schema = []
- for r_field in info['fields']:
- name = r_field['name']
- field_type = r_field['type']
- mode = r_field.get('mode', 'NULLABLE')
- description = r_field.get('description')
- sub_fields = _parse_schema_resource(r_field)
- schema.append(
- SchemaField(name, field_type, mode, description, sub_fields))
- return schema
-
-
-def _build_schema_resource(fields):
- """Generate a resource fragment for a schema.
-
- :type fields: sequence of :class:`SchemaField`
- :param fields: schema to be dumped
-
- :rtype: mapping
- :returns: a mapping describing the schema of the supplied fields.
- """
- infos = []
- for field in fields:
- info = {'name': field.name,
- 'type': field.field_type,
- 'mode': field.mode}
- if field.description is not None:
- info['description'] = field.description
- if field.fields:
- info['fields'] = _build_schema_resource(field.fields)
- infos.append(info)
- return infos
diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py
deleted file mode 100644
index 2b9dea0..0000000
--- a/bigquery/google/cloud/bigquery/table.py
+++ /dev/null
@@ -1,760 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Define API Datasets."""
-
-from __future__ import absolute_import
-
-import datetime
-
-import six
-
-from google.cloud._helpers import _datetime_from_microseconds
-from google.cloud._helpers import _millis_from_datetime
-from google.cloud.bigquery._helpers import _snake_to_camel_case
-from google.cloud.bigquery.schema import SchemaField
-from google.cloud.bigquery.schema import _build_schema_resource
-from google.cloud.bigquery.schema import _parse_schema_resource
-from google.cloud.bigquery.external_config import ExternalConfig
-
-
-_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'"
-_MARKER = object()
-
-
-class TableReference(object):
- """TableReferences are pointers to tables.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
-
- :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference`
- :param dataset_ref: a pointer to the dataset
-
- :type table_id: str
- :param table_id: the ID of the table
- """
-
- def __init__(self, dataset_ref, table_id):
- self._project = dataset_ref.project
- self._dataset_id = dataset_ref.dataset_id
- self._table_id = table_id
-
- @property
- def project(self):
- """Project bound to the table.
-
- :rtype: str
- :returns: the project (derived from the dataset reference).
- """
- return self._project
-
- @property
- def dataset_id(self):
- """ID of dataset containing the table.
-
- :rtype: str
- :returns: the ID (derived from the dataset reference).
- """
- return self._dataset_id
-
- @property
- def table_id(self):
- """Table ID.
-
- :rtype: str
- :returns: the table ID.
- """
- return self._table_id
-
- @property
- def path(self):
- """URL path for the table's APIs.
-
- :rtype: str
- :returns: the path based on project, dataset and table IDs.
- """
- return '/projects/%s/datasets/%s/tables/%s' % (
- self._project, self._dataset_id, self._table_id)
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a table reference given its API representation
-
- :type resource: dict
- :param resource: table reference representation returned from the API
-
- :rtype: :class:`google.cloud.bigquery.table.TableReference`
- :returns: Table reference parsed from ``resource``.
- """
- from google.cloud.bigquery.dataset import DatasetReference
-
- project = resource['projectId']
- dataset_id = resource['datasetId']
- table_id = resource['tableId']
- return cls(DatasetReference(project, dataset_id), table_id)
-
- def to_api_repr(self):
- """Construct the API resource representation of this table reference.
-
- :rtype: dict
- :returns: Table reference as represented as an API resource
- """
- return {
- 'projectId': self._project,
- 'datasetId': self._dataset_id,
- 'tableId': self._table_id,
- }
-
- def _key(self):
- """A tuple key that uniquely describes this field.
-
- Used to compute this instance's hashcode and evaluate equality.
-
- Returns:
- tuple: The contents of this :class:`DatasetReference`.
- """
- return (
- self._project,
- self._dataset_id,
- self._table_id,
- )
-
- def __eq__(self, other):
- if not isinstance(other, TableReference):
- return NotImplemented
- return self._key() == other._key()
-
- def __ne__(self, other):
- return not self == other
-
- def __hash__(self):
- return hash(self._key())
-
- def __repr__(self):
- return 'TableReference{}'.format(self._key())
-
-
-class Table(object):
- """Tables represent a set of rows whose values correspond to a schema.
-
- See
- https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
-
- :type table_ref: :class:`google.cloud.bigquery.table.TableReference`
- :param table_ref: a pointer to a table
-
- :type schema: list of :class:`SchemaField`
- :param schema: The table's schema
- """
-
- _schema = None
-
- all_fields = [
- 'description', 'friendly_name', 'expires', 'location',
- 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema',
- 'external_data_configuration', 'labels',
- ]
-
- def __init__(self, table_ref, schema=()):
- self._project = table_ref.project
- self._table_id = table_ref.table_id
- self._dataset_id = table_ref.dataset_id
- self._external_config = None
- self._properties = {'labels': {}}
- # Let the @property do validation.
- self.schema = schema
-
- @property
- def project(self):
- """Project bound to the table.
-
- :rtype: str
- :returns: the project (derived from the dataset).
- """
- return self._project
-
- @property
- def dataset_id(self):
- """ID of dataset containing the table.
-
- :rtype: str
- :returns: the ID (derived from the dataset).
- """
- return self._dataset_id
-
- @property
- def table_id(self):
- """ID of the table.
-
- :rtype: str
- :returns: the table ID.
- """
- return self._table_id
-
- @property
- def path(self):
- """URL path for the table's APIs.
-
- :rtype: str
- :returns: the path based on project, dataset and table IDs.
- """
- return '/projects/%s/datasets/%s/tables/%s' % (
- self._project, self._dataset_id, self._table_id)
-
- @property
- def schema(self):
- """Table's schema.
-
- :rtype: list of :class:`SchemaField`
- :returns: fields describing the schema
- """
- return list(self._schema)
-
- @schema.setter
- def schema(self, value):
- """Update table's schema
-
- :type value: list of :class:`SchemaField`
- :param value: fields describing the schema
-
- :raises: TypeError if 'value' is not a sequence, or ValueError if
- any item in the sequence is not a SchemaField
- """
- if value is None:
- self._schema = ()
- elif not all(isinstance(field, SchemaField) for field in value):
- raise ValueError('Schema items must be fields')
- else:
- self._schema = tuple(value)
-
- @property
- def labels(self):
- """Labels for the table.
-
- This method always returns a dict. To change a table's labels,
- modify the dict, then call ``Client.update_table``. To delete a
- label, set its value to ``None`` before updating.
-
- :rtype: dict, {str -> str}
- :returns: A dict of the the table's labels.
- """
- return self._properties['labels']
-
- @labels.setter
- def labels(self, value):
- """Update labels for the table.
-
- :type value: dict, {str -> str}
- :param value: new labels
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, dict):
- raise ValueError("Pass a dict")
- self._properties['labels'] = value
-
- @property
- def created(self):
- """Datetime at which the table was created.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the creation time (None until set from the server).
- """
- creation_time = self._properties.get('creationTime')
- if creation_time is not None:
- # creation_time will be in milliseconds.
- return _datetime_from_microseconds(1000.0 * creation_time)
-
- @property
- def etag(self):
- """ETag for the table resource.
-
- :rtype: str, or ``NoneType``
- :returns: the ETag (None until set from the server).
- """
- return self._properties.get('etag')
-
- @property
- def modified(self):
- """Datetime at which the table was last modified.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the modification time (None until set from the server).
- """
- modified_time = self._properties.get('lastModifiedTime')
- if modified_time is not None:
- # modified_time will be in milliseconds.
- return _datetime_from_microseconds(1000.0 * modified_time)
-
- @property
- def num_bytes(self):
- """The size of the table in bytes.
-
- :rtype: int, or ``NoneType``
- :returns: the byte count (None until set from the server).
- """
- num_bytes_as_str = self._properties.get('numBytes')
- if num_bytes_as_str is not None:
- return int(num_bytes_as_str)
-
- @property
- def num_rows(self):
- """The number of rows in the table.
-
- :rtype: int, or ``NoneType``
- :returns: the row count (None until set from the server).
- """
- num_rows_as_str = self._properties.get('numRows')
- if num_rows_as_str is not None:
- return int(num_rows_as_str)
-
- @property
- def self_link(self):
- """URL for the table resource.
-
- :rtype: str, or ``NoneType``
- :returns: the URL (None until set from the server).
- """
- return self._properties.get('selfLink')
-
- @property
- def full_table_id(self):
- """ID for the table, in the form ``project_id:dataset_id:table_id``.
-
- :rtype: str, or ``NoneType``
- :returns: the full ID (None until set from the server).
- """
- return self._properties.get('id')
-
- @property
- def table_type(self):
- """The type of the table.
-
- Possible values are "TABLE", "VIEW", or "EXTERNAL".
-
- :rtype: str, or ``NoneType``
- :returns: the URL (None until set from the server).
- """
- return self._properties.get('type')
-
- @property
- def partitioning_type(self):
- """Time partitioning of the table.
- :rtype: str, or ``NoneType``
- :returns: Returns type if the table is partitioned, None otherwise.
- """
- return self._properties.get('timePartitioning', {}).get('type')
-
- @partitioning_type.setter
- def partitioning_type(self, value):
- """Update the partitioning type of the table
-
- :type value: str
- :param value: partitioning type only "DAY" is currently supported
- """
- if value not in ('DAY', None):
- raise ValueError("value must be one of ['DAY', None]")
-
- if value is None:
- self._properties.pop('timePartitioning', None)
- else:
- time_part = self._properties.setdefault('timePartitioning', {})
- time_part['type'] = value.upper()
-
- @property
- def partition_expiration(self):
- """Expiration time in ms for a partition
- :rtype: int, or ``NoneType``
- :returns: Returns the time in ms for partition expiration
- """
- return self._properties.get('timePartitioning', {}).get('expirationMs')
-
- @partition_expiration.setter
- def partition_expiration(self, value):
- """Update the experation time in ms for a partition
-
- :type value: int
- :param value: partition experiation time in ms
- """
- if not isinstance(value, (int, type(None))):
- raise ValueError(
- "must be an integer representing millisseconds or None")
-
- if value is None:
- if 'timePartitioning' in self._properties:
- self._properties['timePartitioning'].pop('expirationMs')
- else:
- try:
- self._properties['timePartitioning']['expirationMs'] = value
- except KeyError:
- self._properties['timePartitioning'] = {'type': 'DAY'}
- self._properties['timePartitioning']['expirationMs'] = value
-
- @property
- def description(self):
- """Description of the table.
-
- :rtype: str, or ``NoneType``
- :returns: The description as set by the user, or None (the default).
- """
- return self._properties.get('description')
-
- @description.setter
- def description(self, value):
- """Update description of the table.
-
- :type value: str
- :param value: (Optional) new description
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.string_types) and value is not None:
- raise ValueError("Pass a string, or None")
- self._properties['description'] = value
-
- @property
- def expires(self):
- """Datetime at which the table will be removed.
-
- :rtype: ``datetime.datetime``, or ``NoneType``
- :returns: the expiration time, or None
- """
- expiration_time = self._properties.get('expirationTime')
- if expiration_time is not None:
- # expiration_time will be in milliseconds.
- return _datetime_from_microseconds(1000.0 * expiration_time)
-
- @expires.setter
- def expires(self, value):
- """Update datetime at which the table will be removed.
-
- :type value: ``datetime.datetime``
- :param value: (Optional) the new expiration time, or None
- """
- if not isinstance(value, datetime.datetime) and value is not None:
- raise ValueError("Pass a datetime, or None")
- self._properties['expirationTime'] = _millis_from_datetime(value)
-
- @property
- def friendly_name(self):
- """Title of the table.
-
- :rtype: str, or ``NoneType``
- :returns: The name as set by the user, or None (the default).
- """
- return self._properties.get('friendlyName')
-
- @friendly_name.setter
- def friendly_name(self, value):
- """Update title of the table.
-
- :type value: str
- :param value: (Optional) new title
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.string_types) and value is not None:
- raise ValueError("Pass a string, or None")
- self._properties['friendlyName'] = value
-
- @property
- def location(self):
- """Location in which the table is hosted.
-
- :rtype: str, or ``NoneType``
- :returns: The location as set by the user, or None (the default).
- """
- return self._properties.get('location')
-
- @location.setter
- def location(self, value):
- """Update location in which the table is hosted.
-
- :type value: str
- :param value: (Optional) new location
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.string_types) and value is not None:
- raise ValueError("Pass a string, or None")
- self._properties['location'] = value
-
- @property
- def view_query(self):
- """SQL query defining the table as a view.
-
- By default, the query is treated as Standard SQL. To use Legacy
- SQL, set view_use_legacy_sql to True.
-
- :rtype: str, or ``NoneType``
- :returns: The query as set by the user, or None (the default).
- """
- view = self._properties.get('view')
- if view is not None:
- return view.get('query')
-
- @view_query.setter
- def view_query(self, value):
- """Update SQL query defining the table as a view.
-
- :type value: str
- :param value: new query
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, six.string_types):
- raise ValueError("Pass a string")
- view = self._properties.get('view')
- if view is None:
- view = self._properties['view'] = {}
- view['query'] = value
- # The service defaults useLegacySql to True, but this
- # client uses Standard SQL by default.
- if view.get('useLegacySql') is None:
- view['useLegacySql'] = False
-
- @view_query.deleter
- def view_query(self):
- """Delete SQL query defining the table as a view."""
- self._properties.pop('view', None)
-
- @property
- def view_use_legacy_sql(self):
- """Specifies whether to execute the view with Legacy or Standard SQL.
-
- The default is False for views (use Standard SQL).
- If this table is not a view, None is returned.
-
- :rtype: bool or ``NoneType``
- :returns: The boolean for view.useLegacySql, or None if not a view.
- """
- view = self._properties.get('view')
- if view is not None:
- # useLegacySql is never missing from the view dict if this table
- # was created client-side, because the view_query setter populates
- # it. So a missing or None can only come from the server, whose
- # default is True.
- return view.get('useLegacySql', True)
-
- @view_use_legacy_sql.setter
- def view_use_legacy_sql(self, value):
- """Update the view sub-property 'useLegacySql'.
-
- This boolean specifies whether to execute the view with Legacy SQL
- (True) or Standard SQL (False). The default, if not specified, is
- 'False'.
-
- :type value: bool
- :param value: The boolean for view.useLegacySql
-
- :raises: ValueError for invalid value types.
- """
- if not isinstance(value, bool):
- raise ValueError("Pass a boolean")
- if self._properties.get('view') is None:
- self._properties['view'] = {}
- self._properties['view']['useLegacySql'] = value
-
- @property
- def streaming_buffer(self):
- """Information about a table's streaming buffer.
-
- :rtype: :class:`StreamingBuffer`
- :returns: Streaming buffer information, returned from get_table.
- """
- sb = self._properties.get('streamingBuffer')
- if sb is not None:
- return StreamingBuffer(sb)
-
- @property
- def external_data_configuration(self):
- """Configuration for an external data source.
-
- If not set, None is returned.
-
- :rtype: :class:`ExternalConfig`, or ``NoneType``
- :returns: The external configuration, or None (the default).
- """
- return self._external_config
-
- @external_data_configuration.setter
- def external_data_configuration(self, value):
- """Sets the configuration for an external data source.
-
- :type value: :class:`ExternalConfig`, or ``NoneType``
- :param value: The ExternalConfig, or None to unset.
- """
- if not (value is None or isinstance(value, ExternalConfig)):
- raise ValueError("Pass an ExternalConfig or None")
- self._external_config = value
-
- @classmethod
- def from_api_repr(cls, resource):
- """Factory: construct a table given its API representation
-
- :type resource: dict
- :param resource: table resource representation returned from the API
-
- :type dataset: :class:`google.cloud.bigquery.dataset.Dataset`
- :param dataset: The dataset containing the table.
-
- :rtype: :class:`google.cloud.bigquery.table.Table`
- :returns: Table parsed from ``resource``.
- """
- from google.cloud.bigquery import dataset
-
- if ('tableReference' not in resource or
- 'tableId' not in resource['tableReference']):
- raise KeyError('Resource lacks required identity information:'
- '["tableReference"]["tableId"]')
- project_id = resource['tableReference']['projectId']
- table_id = resource['tableReference']['tableId']
- dataset_id = resource['tableReference']['datasetId']
- dataset_ref = dataset.DatasetReference(project_id, dataset_id)
-
- table = cls(dataset_ref.table(table_id))
- table._set_properties(resource)
- return table
-
- def _set_properties(self, api_response):
- """Update properties from resource in body of ``api_response``
-
- :type api_response: dict
- :param api_response: response returned from an API call
- """
- self._properties.clear()
- cleaned = api_response.copy()
- schema = cleaned.pop('schema', {'fields': ()})
- self.schema = _parse_schema_resource(schema)
- ec = cleaned.pop('externalDataConfiguration', None)
- if ec:
- self.external_data_configuration = ExternalConfig.from_api_repr(ec)
- if 'creationTime' in cleaned:
- cleaned['creationTime'] = float(cleaned['creationTime'])
- if 'lastModifiedTime' in cleaned:
- cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime'])
- if 'expirationTime' in cleaned:
- cleaned['expirationTime'] = float(cleaned['expirationTime'])
- if 'labels' not in cleaned:
- cleaned['labels'] = {}
- self._properties.update(cleaned)
-
- def _populate_expires_resource(self, resource):
- resource['expirationTime'] = _millis_from_datetime(self.expires)
-
- def _populate_partitioning_type_resource(self, resource):
- resource['timePartitioning'] = self._properties.get('timePartitioning')
-
- def _populate_view_use_legacy_sql_resource(self, resource):
- if 'view' not in resource:
- resource['view'] = {}
- resource['view']['useLegacySql'] = self.view_use_legacy_sql
-
- def _populate_view_query_resource(self, resource):
- if self.view_query is None:
- resource['view'] = None
- return
- if 'view' not in resource:
- resource['view'] = {}
- resource['view']['query'] = self.view_query
-
- def _populate_schema_resource(self, resource):
- if not self._schema:
- resource['schema'] = None
- else:
- resource['schema'] = {
- 'fields': _build_schema_resource(self._schema),
- }
-
- def _populate_external_config(self, resource):
- if not self.external_data_configuration:
- resource['externalDataConfiguration'] = None
- else:
- resource['externalDataConfiguration'] = ExternalConfig.to_api_repr(
- self.external_data_configuration)
-
- custom_resource_fields = {
- 'expires': _populate_expires_resource,
- 'partitioning_type': _populate_partitioning_type_resource,
- 'view_query': _populate_view_query_resource,
- 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource,
- 'schema': _populate_schema_resource,
- 'external_data_configuration': _populate_external_config,
- }
-
- def _build_resource(self, filter_fields):
- """Generate a resource for ``create`` or ``update``."""
- resource = {
- 'tableReference': {
- 'projectId': self._project,
- 'datasetId': self._dataset_id,
- 'tableId': self.table_id},
- }
- for f in filter_fields:
- if f in self.custom_resource_fields:
- self.custom_resource_fields[f](self, resource)
- else:
- api_field = _snake_to_camel_case(f)
- resource[api_field] = getattr(self, f)
- return resource
-
-
-def _row_from_mapping(mapping, schema):
- """Convert a mapping to a row tuple using the schema.
-
- :type mapping: dict
- :param mapping: Mapping of row data: must contain keys for all
- required fields in the schema. Keys which do not correspond
- to a field in the schema are ignored.
-
- :type schema: list of :class:`SchemaField`
- :param schema: The schema of the table destination for the rows
-
- :rtype: tuple
- :returns: Tuple whose elements are ordered according to the schema.
- :raises: ValueError if schema is empty
- """
- if len(schema) == 0:
- raise ValueError(_TABLE_HAS_NO_SCHEMA)
-
- row = []
- for field in schema:
- if field.mode == 'REQUIRED':
- row.append(mapping[field.name])
- elif field.mode == 'REPEATED':
- row.append(mapping.get(field.name, ()))
- elif field.mode == 'NULLABLE':
- row.append(mapping.get(field.name))
- else:
- raise ValueError(
- "Unknown field mode: {}".format(field.mode))
- return tuple(row)
-
-
-class StreamingBuffer(object):
- """Information about a table's streaming buffer.
-
- See https://cloud.google.com/bigquery/streaming-data-into-bigquery.
-
- :type resource: dict
- :param resource: streaming buffer representation returned from the API
- """
-
- def __init__(self, resource):
- self.estimated_bytes = int(resource['estimatedBytes'])
- self.estimated_rows = int(resource['estimatedRows'])
- # time is in milliseconds since the epoch.
- self.oldest_entry_time = _datetime_from_microseconds(
- 1000.0 * int(resource['oldestEntryTime']))
diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py
deleted file mode 100644
index 335beda..0000000
--- a/bigquery/tests/system.py
+++ /dev/null
@@ -1,1431 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import base64
-import csv
-import datetime
-import json
-import operator
-import os
-import time
-import unittest
-import uuid
-
-import six
-
-from google.api_core.exceptions import PreconditionFailed
-from google.cloud import bigquery
-from google.cloud.bigquery.dataset import Dataset, DatasetReference
-from google.cloud.bigquery.table import Table
-from google.cloud._helpers import UTC
-from google.cloud.bigquery import dbapi
-from google.cloud.exceptions import Forbidden, NotFound
-
-from test_utils.retry import RetryErrors
-from test_utils.retry import RetryInstanceState
-from test_utils.retry import RetryResult
-from test_utils.system import unique_resource_id
-
-
-JOB_TIMEOUT = 120 # 2 minutes
-WHERE = os.path.abspath(os.path.dirname(__file__))
-
-# Common table data used for many tests.
-ROWS = [
- ('Phred Phlyntstone', 32),
- ('Bharney Rhubble', 33),
- ('Wylma Phlyntstone', 29),
- ('Bhettye Rhubble', 27),
-]
-HEADER_ROW = ('Full Name', 'Age')
-SCHEMA = [
- bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'),
- bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
-]
-
-
-def _has_rows(result):
- return len(result) > 0
-
-
-def _make_dataset_id(prefix):
- return '%s%s' % (prefix, unique_resource_id())
-
-
-def _load_json_schema(filename='data/schema.json'):
- from google.cloud.bigquery.table import _parse_schema_resource
-
- json_filename = os.path.join(WHERE, filename)
-
- with open(json_filename, 'r') as schema_file:
- return _parse_schema_resource(json.load(schema_file))
-
-
-def _rate_limit_exceeded(forbidden):
- """Predicate: pass only exceptions with 'rateLimitExceeded' as reason."""
- return any(error['reason'] == 'rateLimitExceeded'
- for error in forbidden._errors)
-
-
-# We need to wait to stay within the rate limits.
-# The alternative outcome is a 403 Forbidden response from upstream, which
-# they return instead of the more appropriate 429.
-# See https://cloud.google.com/bigquery/quota-policy
-retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded)
-
-
-class Config(object):
- """Run-time configuration to be modified at set-up.
-
- This is a mutable stand-in to allow test set-up to modify
- global state.
- """
- CLIENT = None
- CURSOR = None
-
-
-def setUpModule():
- Config.CLIENT = bigquery.Client()
- Config.CURSOR = dbapi.connect(Config.CLIENT).cursor()
-
-
-class TestBigQuery(unittest.TestCase):
-
- def setUp(self):
- self.to_delete = []
-
- def tearDown(self):
- from google.cloud.storage import Bucket
- from google.cloud.exceptions import BadRequest
- from google.cloud.exceptions import Conflict
-
- def _still_in_use(bad_request):
- return any(error['reason'] == 'resourceInUse'
- for error in bad_request._errors)
-
- retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
- retry_409 = RetryErrors(Conflict)
- for doomed in self.to_delete:
- if isinstance(doomed, Bucket):
- retry_409(doomed.delete)(force=True)
- elif isinstance(doomed, Dataset):
- retry_in_use(Config.CLIENT.delete_dataset)(doomed)
- elif isinstance(doomed, Table):
- retry_in_use(Config.CLIENT.delete_table)(doomed)
- else:
- doomed.delete()
-
- def test_create_dataset(self):
- DATASET_ID = _make_dataset_id('create_dataset')
- dataset = self.temp_dataset(DATASET_ID)
-
- self.assertTrue(_dataset_exists(dataset))
- self.assertEqual(dataset.dataset_id, DATASET_ID)
- self.assertEqual(dataset.project, Config.CLIENT.project)
-
- def test_get_dataset(self):
- DATASET_ID = _make_dataset_id('get_dataset')
- client = Config.CLIENT
- dataset_arg = Dataset(client.dataset(DATASET_ID))
- dataset_arg.friendly_name = 'Friendly'
- dataset_arg.description = 'Description'
- dataset = retry_403(client.create_dataset)(dataset_arg)
- self.to_delete.append(dataset)
- dataset_ref = client.dataset(DATASET_ID)
-
- got = client.get_dataset(dataset_ref)
-
- self.assertEqual(got.friendly_name, 'Friendly')
- self.assertEqual(got.description, 'Description')
-
- def test_update_dataset(self):
- dataset = self.temp_dataset(_make_dataset_id('update_dataset'))
- self.assertTrue(_dataset_exists(dataset))
- self.assertIsNone(dataset.friendly_name)
- self.assertIsNone(dataset.description)
- self.assertEquals(dataset.labels, {})
-
- dataset.friendly_name = 'Friendly'
- dataset.description = 'Description'
- dataset.labels = {'priority': 'high', 'color': 'blue'}
- ds2 = Config.CLIENT.update_dataset(
- dataset,
- ('friendly_name', 'description', 'labels'))
- self.assertEqual(ds2.friendly_name, 'Friendly')
- self.assertEqual(ds2.description, 'Description')
- self.assertEqual(ds2.labels, {'priority': 'high', 'color': 'blue'})
-
- ds2.labels = {
- 'color': 'green', # change
- 'shape': 'circle', # add
- 'priority': None, # delete
- }
- ds3 = Config.CLIENT.update_dataset(ds2, ['labels'])
- self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'})
-
- # If we try to update using d2 again, it will fail because the
- # previous update changed the ETag.
- ds2.description = 'no good'
- with self.assertRaises(PreconditionFailed):
- Config.CLIENT.update_dataset(ds2, ['description'])
-
- def test_list_datasets(self):
- datasets_to_create = [
- 'new' + unique_resource_id(),
- 'newer' + unique_resource_id(),
- 'newest' + unique_resource_id(),
- ]
- for dataset_id in datasets_to_create:
- self.temp_dataset(dataset_id)
-
- # Retrieve the datasets.
- iterator = Config.CLIENT.list_datasets()
- all_datasets = list(iterator)
- self.assertIsNone(iterator.next_page_token)
- created = [dataset for dataset in all_datasets
- if dataset.dataset_id in datasets_to_create and
- dataset.project == Config.CLIENT.project]
- self.assertEqual(len(created), len(datasets_to_create))
-
- def test_create_table(self):
- dataset = self.temp_dataset(_make_dataset_id('create_table'))
- table_id = 'test_table'
- table_arg = Table(dataset.table(table_id), schema=SCHEMA)
- self.assertFalse(_table_exists(table_arg))
-
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- self.assertTrue(_table_exists(table))
- self.assertEqual(table.table_id, table_id)
-
- def test_get_table_w_public_dataset(self):
- PUBLIC = 'bigquery-public-data'
- DATASET_ID = 'samples'
- TABLE_ID = 'shakespeare'
- table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_ID)
-
- table = Config.CLIENT.get_table(table_ref)
-
- self.assertEqual(table.table_id, TABLE_ID)
- self.assertEqual(table.dataset_id, DATASET_ID)
- self.assertEqual(table.project, PUBLIC)
- schema_names = [field.name for field in table.schema]
- self.assertEqual(
- schema_names, ['word', 'word_count', 'corpus', 'corpus_date'])
-
- def test_list_dataset_tables(self):
- DATASET_ID = _make_dataset_id('list_tables')
- dataset = self.temp_dataset(DATASET_ID)
- # Retrieve tables before any are created for the dataset.
- iterator = Config.CLIENT.list_dataset_tables(dataset)
- all_tables = list(iterator)
- self.assertEqual(all_tables, [])
- self.assertIsNone(iterator.next_page_token)
-
- # Insert some tables to be listed.
- tables_to_create = [
- 'new' + unique_resource_id(),
- 'newer' + unique_resource_id(),
- 'newest' + unique_resource_id(),
- ]
- for table_name in tables_to_create:
- table = Table(dataset.table(table_name), schema=SCHEMA)
- created_table = retry_403(Config.CLIENT.create_table)(table)
- self.to_delete.insert(0, created_table)
-
- # Retrieve the tables.
- iterator = Config.CLIENT.list_dataset_tables(dataset)
- all_tables = list(iterator)
- self.assertIsNone(iterator.next_page_token)
- created = [table for table in all_tables
- if (table.table_id in tables_to_create and
- table.dataset_id == DATASET_ID)]
- self.assertEqual(len(created), len(tables_to_create))
-
- def test_update_table(self):
- dataset = self.temp_dataset(_make_dataset_id('update_table'))
-
- TABLE_NAME = 'test_table'
- table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
- self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
- self.assertTrue(_table_exists(table))
- self.assertIsNone(table.friendly_name)
- self.assertIsNone(table.description)
- self.assertEquals(table.labels, {})
- table.friendly_name = 'Friendly'
- table.description = 'Description'
- table.labels = {'priority': 'high', 'color': 'blue'}
-
- table2 = Config.CLIENT.update_table(
- table, ['friendly_name', 'description', 'labels'])
-
- self.assertEqual(table2.friendly_name, 'Friendly')
- self.assertEqual(table2.description, 'Description')
- self.assertEqual(table2.labels, {'priority': 'high', 'color': 'blue'})
-
- table2.description = None
- table2.labels = {
- 'color': 'green', # change
- 'shape': 'circle', # add
- 'priority': None, # delete
- }
- table3 = Config.CLIENT.update_table(table2, ['description', 'labels'])
- self.assertIsNone(table3.description)
- self.assertEqual(table3.labels, {'color': 'green', 'shape': 'circle'})
-
- # If we try to update using table2 again, it will fail because the
- # previous update changed the ETag.
- table2.description = 'no good'
- with self.assertRaises(PreconditionFailed):
- Config.CLIENT.update_table(table2, ['description'])
-
- def test_update_table_schema(self):
- dataset = self.temp_dataset(_make_dataset_id('update_table'))
-
- TABLE_NAME = 'test_table'
- table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
- self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
- self.assertTrue(_table_exists(table))
- voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE')
- schema = table.schema
- schema.append(voter)
- table.schema = schema
-
- updated_table = Config.CLIENT.update_table(table, ['schema'])
-
- self.assertEqual(len(updated_table.schema), len(schema))
- for found, expected in zip(updated_table.schema, schema):
- self.assertEqual(found.name, expected.name)
- self.assertEqual(found.field_type, expected.field_type)
- self.assertEqual(found.mode, expected.mode)
-
- @staticmethod
- def _fetch_single_page(table, selected_fields=None):
- iterator = Config.CLIENT.list_rows(
- table, selected_fields=selected_fields)
- page = six.next(iterator.pages)
- return list(page)
-
- def test_create_rows_then_dump_table(self):
- NOW_SECONDS = 1448911495.484366
- NOW = datetime.datetime.utcfromtimestamp(
- NOW_SECONDS).replace(tzinfo=UTC)
- ROWS = [
- ('Phred Phlyntstone', 32, NOW),
- ('Bharney Rhubble', 33, NOW + datetime.timedelta(seconds=10)),
- ('Wylma Phlyntstone', 29, NOW + datetime.timedelta(seconds=20)),
- ('Bhettye Rhubble', 27, None),
- ]
- ROW_IDS = range(len(ROWS))
-
- dataset = self.temp_dataset(_make_dataset_id('create_rows_then_dump'))
- TABLE_ID = 'test_table'
- schema = [
- bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'),
- bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
- bigquery.SchemaField('now', 'TIMESTAMP'),
- ]
- table_arg = Table(dataset.table(TABLE_ID), schema=schema)
- self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
- self.assertTrue(_table_exists(table))
-
- errors = Config.CLIENT.create_rows(table, ROWS, row_ids=ROW_IDS)
- self.assertEqual(len(errors), 0)
-
- rows = ()
-
- # Allow for "warm up" before rows visible. See
- # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
- # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
- retry = RetryResult(_has_rows, max_tries=8)
- rows = retry(self._fetch_single_page)(table)
- row_tuples = [r.values() for r in rows]
- by_age = operator.itemgetter(1)
- self.assertEqual(sorted(row_tuples, key=by_age),
- sorted(ROWS, key=by_age))
-
- def test_load_table_from_local_file_then_dump_table(self):
- from google.cloud._testing import _NamedTemporaryFile
-
- TABLE_NAME = 'test_table'
-
- dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump'))
- table_ref = dataset.table(TABLE_NAME)
- table_arg = Table(table_ref, schema=SCHEMA)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- with _NamedTemporaryFile() as temp:
- with open(temp.name, 'w') as csv_write:
- writer = csv.writer(csv_write)
- writer.writerow(HEADER_ROW)
- writer.writerows(ROWS)
-
- with open(temp.name, 'rb') as csv_read:
- config = bigquery.LoadJobConfig()
- config.source_format = 'CSV'
- config.skip_leading_rows = 1
- config.create_disposition = 'CREATE_NEVER'
- config.write_disposition = 'WRITE_EMPTY'
- config.schema = table.schema
- job = Config.CLIENT.load_table_from_file(
- csv_read, table_ref, job_config=config)
-
- # Retry until done.
- job.result(timeout=JOB_TIMEOUT)
-
- self.assertEqual(job.output_rows, len(ROWS))
-
- rows = self._fetch_single_page(table)
- row_tuples = [r.values() for r in rows]
- by_age = operator.itemgetter(1)
- self.assertEqual(sorted(row_tuples, key=by_age),
- sorted(ROWS, key=by_age))
-
- def test_load_table_from_local_avro_file_then_dump_table(self):
- TABLE_NAME = 'test_table_avro'
- ROWS = [
- ("violet", 400),
- ("indigo", 445),
- ("blue", 475),
- ("green", 510),
- ("yellow", 570),
- ("orange", 590),
- ("red", 650)]
-
- dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump'))
- table_ref = dataset.table(TABLE_NAME)
- table = Table(table_ref)
- self.to_delete.insert(0, table)
-
- with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof:
- config = bigquery.LoadJobConfig()
- config.source_format = 'AVRO'
- config.write_disposition = 'WRITE_TRUNCATE'
- job = Config.CLIENT.load_table_from_file(
- avrof, table_ref, job_config=config)
- # Retry until done.
- job.result(timeout=JOB_TIMEOUT)
-
- self.assertEqual(job.output_rows, len(ROWS))
-
- table = Config.CLIENT.get_table(table)
- rows = self._fetch_single_page(table)
- row_tuples = [r.values() for r in rows]
- by_wavelength = operator.itemgetter(1)
- self.assertEqual(sorted(row_tuples, key=by_wavelength),
- sorted(ROWS, key=by_wavelength))
-
- def test_load_table_from_uri_then_dump_table(self):
- TABLE_ID = 'test_table'
- GS_URL = self._write_csv_to_storage(
- 'bq_load_test' + unique_resource_id(), 'person_ages.csv',
- HEADER_ROW, ROWS)
-
- dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump'))
-
- table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- config = bigquery.LoadJobConfig()
- config.create_disposition = 'CREATE_NEVER'
- config.skip_leading_rows = 1
- config.source_format = 'CSV'
- config.write_disposition = 'WRITE_EMPTY'
- job = Config.CLIENT.load_table_from_uri(
- GS_URL, dataset.table(TABLE_ID), job_config=config)
-
- # Allow for 90 seconds of "warm up" before rows visible. See
- # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
- # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
- retry = RetryInstanceState(_job_done, max_tries=8)
- retry(job.reload)()
-
- rows = self._fetch_single_page(table)
- row_tuples = [r.values() for r in rows]
- by_age = operator.itemgetter(1)
- self.assertEqual(sorted(row_tuples, key=by_age),
- sorted(ROWS, key=by_age))
-
- def test_load_table_from_uri_w_autodetect_schema_then_get_job(self):
- from google.cloud.bigquery import SchemaField
- from google.cloud.bigquery.job import LoadJob
-
- rows = ROWS * 100
- # BigQuery internally uses the first 100 rows to detect schema
-
- gs_url = self._write_csv_to_storage(
- 'bq_load_test' + unique_resource_id(), 'person_ages.csv',
- HEADER_ROW, rows)
- dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump'))
- table_ref = dataset.table('test_table')
- JOB_ID = 'load_table_w_autodetect_{}'.format(str(uuid.uuid4()))
-
- config = bigquery.LoadJobConfig()
- config.autodetect = True
- job = Config.CLIENT.load_table_from_uri(
- gs_url, table_ref, job_config=config, job_id=JOB_ID)
-
- # Allow for 90 seconds of "warm up" before rows visible. See
- # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
- # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
- retry = RetryInstanceState(_job_done, max_tries=8)
- retry(job.reload)()
-
- table = Config.CLIENT.get_table(table_ref)
- self.to_delete.insert(0, table)
- field_name = SchemaField(
- u'Full_Name', u'string', u'NULLABLE', None, ())
- field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ())
- self.assertEqual(table.schema, [field_name, field_age])
-
- actual_rows = self._fetch_single_page(table)
- actual_row_tuples = [r.values() for r in actual_rows]
- by_age = operator.itemgetter(1)
- self.assertEqual(
- sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age))
-
- fetched_job = Config.CLIENT.get_job(JOB_ID)
-
- self.assertIsInstance(fetched_job, LoadJob)
- self.assertEqual(fetched_job.job_id, JOB_ID)
- self.assertEqual(fetched_job.autodetect, True)
-
- def _write_csv_to_storage(self, bucket_name, blob_name, header_row,
- data_rows):
- from google.cloud._testing import _NamedTemporaryFile
- from google.cloud.storage import Client as StorageClient
-
- storage_client = StorageClient()
-
- # In the **very** rare case the bucket name is reserved, this
- # fails with a ConnectionError.
- bucket = storage_client.create_bucket(bucket_name)
- self.to_delete.append(bucket)
-
- blob = bucket.blob(blob_name)
-
- with _NamedTemporaryFile() as temp:
- with open(temp.name, 'w') as csv_write:
- writer = csv.writer(csv_write)
- writer.writerow(header_row)
- writer.writerows(data_rows)
-
- with open(temp.name, 'rb') as csv_read:
- blob.upload_from_file(csv_read, content_type='text/csv')
-
- self.to_delete.insert(0, blob)
-
- return 'gs://{}/{}'.format(bucket_name, blob_name)
-
- def _load_table_for_extract_table(
- self, storage_client, rows, bucket_name, blob_name, table):
- from google.cloud._testing import _NamedTemporaryFile
-
- gs_url = 'gs://{}/{}'.format(bucket_name, blob_name)
-
- # In the **very** rare case the bucket name is reserved, this
- # fails with a ConnectionError.
- bucket = storage_client.create_bucket(bucket_name)
- self.to_delete.append(bucket)
- blob = bucket.blob(blob_name)
-
- with _NamedTemporaryFile() as temp:
- with open(temp.name, 'w') as csv_write:
- writer = csv.writer(csv_write)
- writer.writerow(HEADER_ROW)
- writer.writerows(rows)
-
- with open(temp.name, 'rb') as csv_read:
- blob.upload_from_file(csv_read, content_type='text/csv')
- self.to_delete.insert(0, blob)
-
- dataset = self.temp_dataset(table.dataset_id)
- table_ref = dataset.table(table.table_id)
- config = bigquery.LoadJobConfig()
- config.autodetect = True
- job = Config.CLIENT.load_table_from_uri(gs_url, table_ref,
- job_config=config)
- # TODO(jba): do we need this retry now that we have job.result()?
- # Allow for 90 seconds of "warm up" before rows visible. See
- # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
- # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
- retry = RetryInstanceState(_job_done, max_tries=8)
- retry(job.reload)()
-
- def test_extract_table(self):
- from google.cloud.storage import Client as StorageClient
-
- storage_client = StorageClient()
- local_id = unique_resource_id()
- bucket_name = 'bq_extract_test' + local_id
- blob_name = 'person_ages.csv'
- dataset_id = _make_dataset_id('load_gcs_then_extract')
- table_id = 'test_table'
- table_ref = Config.CLIENT.dataset(dataset_id).table(table_id)
- table = Table(table_ref)
- self.to_delete.insert(0, table)
- self._load_table_for_extract_table(
- storage_client, ROWS, bucket_name, blob_name, table_ref)
- bucket = storage_client.bucket(bucket_name)
- destination_blob_name = 'person_ages_out.csv'
- destination = bucket.blob(destination_blob_name)
- destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name)
-
- job = Config.CLIENT.extract_table(table_ref, destination_uri)
- job.result(timeout=100)
-
- self.to_delete.insert(0, destination)
- got = destination.download_as_string().decode('utf-8')
- self.assertIn('Bharney Rhubble', got)
-
- def test_extract_table_w_job_config(self):
- from google.cloud.storage import Client as StorageClient
-
- storage_client = StorageClient()
- local_id = unique_resource_id()
- bucket_name = 'bq_extract_test' + local_id
- blob_name = 'person_ages.csv'
- dataset_id = _make_dataset_id('load_gcs_then_extract')
- table_id = 'test_table'
- table_ref = Config.CLIENT.dataset(dataset_id).table(table_id)
- table = Table(table_ref)
- self.to_delete.insert(0, table)
- self._load_table_for_extract_table(
- storage_client, ROWS, bucket_name, blob_name, table_ref)
- bucket = storage_client.bucket(bucket_name)
- destination_blob_name = 'person_ages_out.csv'
- destination = bucket.blob(destination_blob_name)
- destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name)
-
- job_config = bigquery.ExtractJobConfig()
- job_config.destination_format = 'NEWLINE_DELIMITED_JSON'
- job = Config.CLIENT.extract_table(
- table, destination_uri, job_config=job_config)
- job.result()
-
- self.to_delete.insert(0, destination)
- got = destination.download_as_string().decode('utf-8')
- self.assertIn('"Bharney Rhubble"', got)
-
- def test_copy_table(self):
- # If we create a new table to copy from, the test won't work
- # because the new rows will be stored in the streaming buffer,
- # and copy jobs don't read the streaming buffer.
- # We could wait for the streaming buffer to empty, but that could
- # take minutes. Instead we copy a small public table.
- source_dataset = DatasetReference('bigquery-public-data', 'samples')
- source_ref = source_dataset.table('shakespeare')
- dest_dataset = self.temp_dataset(_make_dataset_id('copy_table'))
- dest_ref = dest_dataset.table('destination_table')
- job_config = bigquery.CopyJobConfig()
- job = Config.CLIENT.copy_table(
- source_ref, dest_ref, job_config=job_config)
- job.result()
-
- dest_table = Config.CLIENT.get_table(dest_ref)
- self.to_delete.insert(0, dest_table)
- # Just check that we got some rows.
- got_rows = self._fetch_single_page(dest_table)
- self.assertTrue(len(got_rows) > 0)
-
- def test_job_cancel(self):
- DATASET_ID = _make_dataset_id('job_cancel')
- JOB_ID_PREFIX = 'fetch_' + DATASET_ID
- TABLE_NAME = 'test_table'
- QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME)
-
- dataset = self.temp_dataset(DATASET_ID)
-
- table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX)
- job.cancel()
-
- retry = RetryInstanceState(_job_done, max_tries=8)
- retry(job.reload)()
-
- # The `cancel` API doesn't leave any reliable traces on
- # the status of the job resource, so we can't really assert for
- # them here. The best we can do is not that the API call didn't
- # raise an error, and that the job completed (in the `retry()`
- # above).
-
- def test_query_rows_w_legacy_sql_types(self):
- naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
- stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat())
- zoned = naive.replace(tzinfo=UTC)
- examples = [
- {
- 'sql': 'SELECT 1',
- 'expected': 1,
- },
- {
- 'sql': 'SELECT 1.3',
- 'expected': 1.3,
- },
- {
- 'sql': 'SELECT TRUE',
- 'expected': True,
- },
- {
- 'sql': 'SELECT "ABC"',
- 'expected': 'ABC',
- },
- {
- 'sql': 'SELECT CAST("foo" AS BYTES)',
- 'expected': b'foo',
- },
- {
- 'sql': 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,),
- 'expected': zoned,
- },
- ]
- for example in examples:
- job_config = bigquery.QueryJobConfig()
- job_config.use_legacy_sql = True
- rows = list(Config.CLIENT.query_rows(
- example['sql'], job_config=job_config))
- self.assertEqual(len(rows), 1)
- self.assertEqual(len(rows[0]), 1)
- self.assertEqual(rows[0][0], example['expected'])
-
- def _generate_standard_sql_types_examples(self):
- naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
- naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000)
- stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat())
- stamp_microseconds = stamp + '.250000'
- zoned = naive.replace(tzinfo=UTC)
- zoned_microseconds = naive_microseconds.replace(tzinfo=UTC)
- return [
- {
- 'sql': 'SELECT 1',
- 'expected': 1,
- },
- {
- 'sql': 'SELECT 1.3',
- 'expected': 1.3,
- },
- {
- 'sql': 'SELECT TRUE',
- 'expected': True,
- },
- {
- 'sql': 'SELECT "ABC"',
- 'expected': 'ABC',
- },
- {
- 'sql': 'SELECT CAST("foo" AS BYTES)',
- 'expected': b'foo',
- },
- {
- 'sql': 'SELECT TIMESTAMP "%s"' % (stamp,),
- 'expected': zoned,
- },
- {
- 'sql': 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,),
- 'expected': zoned_microseconds,
- },
- {
- 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,),
- 'expected': naive,
- },
- {
- 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % (
- stamp_microseconds,),
- 'expected': naive_microseconds,
- },
- {
- 'sql': 'SELECT DATE(TIMESTAMP "%s")' % (stamp,),
- 'expected': naive.date(),
- },
- {
- 'sql': 'SELECT TIME(TIMESTAMP "%s")' % (stamp,),
- 'expected': naive.time(),
- },
- {
- 'sql': 'SELECT (1, 2)',
- 'expected': {'_field_1': 1, '_field_2': 2},
- },
- {
- 'sql': 'SELECT ((1, 2), (3, 4), 5)',
- 'expected': {
- '_field_1': {'_field_1': 1, '_field_2': 2},
- '_field_2': {'_field_1': 3, '_field_2': 4},
- '_field_3': 5,
- },
- },
- {
- 'sql': 'SELECT [1, 2, 3]',
- 'expected': [1, 2, 3],
- },
- {
- 'sql': 'SELECT ([1, 2], 3, [4, 5])',
- 'expected':
- {'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]},
- },
- {
- 'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]',
- 'expected': [
- {'_field_1': 1, '_field_2': 2, '_field_3': 3},
- {'_field_1': 4, '_field_2': 5, '_field_3': 6},
- ],
- },
- {
- 'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]',
- 'expected': [
- {u'_field_1': [1, 2, 3], u'_field_2': 4},
- {u'_field_1': [5, 6], u'_field_2': 7},
- ],
- },
- {
- 'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))',
- 'expected': [{u'_field_1': [1, 2]}],
- },
- ]
-
- def test_query_rows_w_standard_sql_types(self):
- examples = self._generate_standard_sql_types_examples()
- for example in examples:
- rows = list(Config.CLIENT.query_rows(example['sql']))
- self.assertEqual(len(rows), 1)
- self.assertEqual(len(rows[0]), 1)
- self.assertEqual(rows[0][0], example['expected'])
-
- def test_query_rows_w_failed_query(self):
- from google.api_core.exceptions import BadRequest
-
- with self.assertRaises(BadRequest):
- Config.CLIENT.query_rows('invalid syntax;')
- # TODO(swast): Ensure that job ID is surfaced in the exception.
-
- def test_dbapi_w_standard_sql_types(self):
- examples = self._generate_standard_sql_types_examples()
- for example in examples:
- Config.CURSOR.execute(example['sql'])
- self.assertEqual(Config.CURSOR.rowcount, 1)
- row = Config.CURSOR.fetchone()
- self.assertEqual(len(row), 1)
- self.assertEqual(row[0], example['expected'])
- row = Config.CURSOR.fetchone()
- self.assertIsNone(row)
-
- def test_dbapi_fetchall(self):
- query = 'SELECT * FROM UNNEST([(1, 2), (3, 4), (5, 6)])'
-
- for arraysize in range(1, 5):
- Config.CURSOR.execute(query)
- self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows")
- Config.CURSOR.arraysize = arraysize
- rows = Config.CURSOR.fetchall()
- row_tuples = [r.values() for r in rows]
- self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)])
-
- def _load_table_for_dml(self, rows, dataset_id, table_id):
- from google.cloud._testing import _NamedTemporaryFile
-
- dataset = self.temp_dataset(dataset_id)
- greeting = bigquery.SchemaField(
- 'greeting', 'STRING', mode='NULLABLE')
- table_ref = dataset.table(table_id)
- table_arg = Table(table_ref, schema=[greeting])
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- with _NamedTemporaryFile() as temp:
- with open(temp.name, 'w') as csv_write:
- writer = csv.writer(csv_write)
- writer.writerow(('Greeting',))
- writer.writerows(rows)
-
- with open(temp.name, 'rb') as csv_read:
- config = bigquery.LoadJobConfig()
- config.source_format = 'CSV'
- config.skip_leading_rows = 1
- config.create_disposition = 'CREATE_NEVER'
- config.write_disposition = 'WRITE_EMPTY'
- job = Config.CLIENT.load_table_from_file(
- csv_read, table_ref, job_config=config)
-
- # Retry until done.
- job.result(timeout=JOB_TIMEOUT)
- self._fetch_single_page(table)
-
- def test_query_w_dml(self):
- dataset_name = _make_dataset_id('dml_tests')
- table_name = 'test_table'
- self._load_table_for_dml([('Hello World',)], dataset_name, table_name)
- query_template = """UPDATE {}.{}
- SET greeting = 'Guten Tag'
- WHERE greeting = 'Hello World'
- """
-
- query_job = Config.CLIENT.query(
- query_template.format(dataset_name, table_name),
- job_id_prefix='test_query_w_dml_')
- query_job.result()
-
- self.assertEqual(query_job.num_dml_affected_rows, 1)
-
- def test_dbapi_w_dml(self):
- dataset_name = _make_dataset_id('dml_tests')
- table_name = 'test_table'
- self._load_table_for_dml([('Hello World',)], dataset_name, table_name)
- query_template = """UPDATE {}.{}
- SET greeting = 'Guten Tag'
- WHERE greeting = 'Hello World'
- """
-
- Config.CURSOR.execute(
- query_template.format(dataset_name, table_name),
- job_id='test_dbapi_w_dml_{}'.format(str(uuid.uuid4())))
- self.assertEqual(Config.CURSOR.rowcount, 1)
- self.assertIsNone(Config.CURSOR.fetchone())
-
- def test_query_w_query_params(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import ArrayQueryParameter
- from google.cloud.bigquery.query import ScalarQueryParameter
- from google.cloud.bigquery.query import StructQueryParameter
- question = 'What is the answer to life, the universe, and everything?'
- question_param = ScalarQueryParameter(
- name='question', type_='STRING', value=question)
- answer = 42
- answer_param = ScalarQueryParameter(
- name='answer', type_='INT64', value=answer)
- pi = 3.1415926
- pi_param = ScalarQueryParameter(
- name='pi', type_='FLOAT64', value=pi)
- truthy = True
- truthy_param = ScalarQueryParameter(
- name='truthy', type_='BOOL', value=truthy)
- beef = b'DEADBEEF'
- beef_param = ScalarQueryParameter(
- name='beef', type_='BYTES', value=beef)
- naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
- naive_param = ScalarQueryParameter(
- name='naive', type_='DATETIME', value=naive)
- naive_date_param = ScalarQueryParameter(
- name='naive_date', type_='DATE', value=naive.date())
- naive_time_param = ScalarQueryParameter(
- name='naive_time', type_='TIME', value=naive.time())
- zoned = naive.replace(tzinfo=UTC)
- zoned_param = ScalarQueryParameter(
- name='zoned', type_='TIMESTAMP', value=zoned)
- array_param = ArrayQueryParameter(
- name='array_param', array_type='INT64', values=[1, 2])
- struct_param = StructQueryParameter(
- 'hitchhiker', question_param, answer_param)
- phred_name = 'Phred Phlyntstone'
- phred_name_param = ScalarQueryParameter(
- name='name', type_='STRING', value=phred_name)
- phred_age = 32
- phred_age_param = ScalarQueryParameter(
- name='age', type_='INT64', value=phred_age)
- phred_param = StructQueryParameter(
- None, phred_name_param, phred_age_param)
- bharney_name = 'Bharney Rhubbyl'
- bharney_name_param = ScalarQueryParameter(
- name='name', type_='STRING', value=bharney_name)
- bharney_age = 31
- bharney_age_param = ScalarQueryParameter(
- name='age', type_='INT64', value=bharney_age)
- bharney_param = StructQueryParameter(
- None, bharney_name_param, bharney_age_param)
- characters_param = ArrayQueryParameter(
- name=None, array_type='RECORD',
- values=[phred_param, bharney_param])
- hero_param = StructQueryParameter(
- 'hero', phred_name_param, phred_age_param)
- sidekick_param = StructQueryParameter(
- 'sidekick', bharney_name_param, bharney_age_param)
- roles_param = StructQueryParameter(
- 'roles', hero_param, sidekick_param)
- friends_param = ArrayQueryParameter(
- name='friends', array_type='STRING',
- values=[phred_name, bharney_name])
- with_friends_param = StructQueryParameter(None, friends_param)
- top_left_param = StructQueryParameter(
- 'top_left',
- ScalarQueryParameter('x', 'INT64', 12),
- ScalarQueryParameter('y', 'INT64', 102))
- bottom_right_param = StructQueryParameter(
- 'bottom_right',
- ScalarQueryParameter('x', 'INT64', 22),
- ScalarQueryParameter('y', 'INT64', 92))
- rectangle_param = StructQueryParameter(
- 'rectangle', top_left_param, bottom_right_param)
- examples = [
- {
- 'sql': 'SELECT @question',
- 'expected': question,
- 'query_parameters': [question_param],
- },
- {
- 'sql': 'SELECT @answer',
- 'expected': answer,
- 'query_parameters': [answer_param],
- },
- {
- 'sql': 'SELECT @pi',
- 'expected': pi,
- 'query_parameters': [pi_param],
- },
- {
- 'sql': 'SELECT @truthy',
- 'expected': truthy,
- 'query_parameters': [truthy_param],
- },
- {
- 'sql': 'SELECT @beef',
- 'expected': beef,
- 'query_parameters': [beef_param],
- },
- {
- 'sql': 'SELECT @naive',
- 'expected': naive,
- 'query_parameters': [naive_param],
- },
- {
- 'sql': 'SELECT @naive_date',
- 'expected': naive.date(),
- 'query_parameters': [naive_date_param],
- },
- {
- 'sql': 'SELECT @naive_time',
- 'expected': naive.time(),
- 'query_parameters': [naive_time_param],
- },
- {
- 'sql': 'SELECT @zoned',
- 'expected': zoned,
- 'query_parameters': [zoned_param],
- },
- {
- 'sql': 'SELECT @array_param',
- 'expected': [1, 2],
- 'query_parameters': [array_param],
- },
- {
- 'sql': 'SELECT (@hitchhiker.question, @hitchhiker.answer)',
- 'expected': ({'_field_1': question, '_field_2': answer}),
- 'query_parameters': [struct_param],
- },
- {
- 'sql':
- 'SELECT '
- '((@rectangle.bottom_right.x - @rectangle.top_left.x) '
- '* (@rectangle.top_left.y - @rectangle.bottom_right.y))',
- 'expected': 100,
- 'query_parameters': [rectangle_param],
- },
- {
- 'sql': 'SELECT ?',
- 'expected': [
- {'name': phred_name, 'age': phred_age},
- {'name': bharney_name, 'age': bharney_age},
- ],
- 'query_parameters': [characters_param],
- },
- {
- 'sql': 'SELECT @roles',
- 'expected': {
- 'hero': {'name': phred_name, 'age': phred_age},
- 'sidekick': {'name': bharney_name, 'age': bharney_age},
- },
- 'query_parameters': [roles_param],
- },
- {
- 'sql': 'SELECT ?',
- 'expected': {
- 'friends': [phred_name, bharney_name],
- },
- 'query_parameters': [with_friends_param],
- },
- ]
- for example in examples:
- jconfig = QueryJobConfig()
- jconfig.query_parameters = example['query_parameters']
- query_job = Config.CLIENT.query(
- example['sql'],
- job_config=jconfig,
- job_id_prefix='test_query_w_query_params')
- rows = list(query_job.result())
- self.assertEqual(len(rows), 1)
- self.assertEqual(len(rows[0]), 1)
- self.assertEqual(rows[0][0], example['expected'])
-
- def test_dbapi_w_query_parameters(self):
- examples = [
- {
- 'sql': 'SELECT %(boolval)s',
- 'expected': True,
- 'query_parameters': {
- 'boolval': True,
- },
- },
- {
- 'sql': 'SELECT %(a "very" weird `name`)s',
- 'expected': True,
- 'query_parameters': {
- 'a "very" weird `name`': True,
- },
- },
- {
- 'sql': 'SELECT %(select)s',
- 'expected': True,
- 'query_parameters': {
- 'select': True, # this name is a keyword
- },
- },
- {
- 'sql': 'SELECT %s',
- 'expected': False,
- 'query_parameters': [False],
- },
- {
- 'sql': 'SELECT %(intval)s',
- 'expected': 123,
- 'query_parameters': {
- 'intval': 123,
- },
- },
- {
- 'sql': 'SELECT %s',
- 'expected': -123456789,
- 'query_parameters': [-123456789],
- },
- {
- 'sql': 'SELECT %(floatval)s',
- 'expected': 1.25,
- 'query_parameters': {
- 'floatval': 1.25,
- },
- },
- {
- 'sql': 'SELECT LOWER(%(strval)s)',
- 'query_parameters': {
- 'strval': 'I Am A String',
- },
- 'expected': 'i am a string',
- },
- {
- 'sql': 'SELECT DATE_SUB(%(dateval)s, INTERVAL 1 DAY)',
- 'query_parameters': {
- 'dateval': datetime.date(2017, 4, 2),
- },
- 'expected': datetime.date(2017, 4, 1),
- },
- {
- 'sql': 'SELECT TIME_ADD(%(timeval)s, INTERVAL 4 SECOND)',
- 'query_parameters': {
- 'timeval': datetime.time(12, 34, 56),
- },
- 'expected': datetime.time(12, 35, 0),
- },
- {
- 'sql': (
- 'SELECT DATETIME_ADD(%(datetimeval)s, INTERVAL 53 SECOND)'
- ),
- 'query_parameters': {
- 'datetimeval': datetime.datetime(2012, 3, 4, 5, 6, 7),
- },
- 'expected': datetime.datetime(2012, 3, 4, 5, 7, 0),
- },
- {
- 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)',
- 'query_parameters': {
- 'zoned': datetime.datetime(
- 2012, 3, 4, 5, 6, 7, tzinfo=UTC),
- },
- 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC),
- },
- {
- 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)',
- 'query_parameters': {
- 'zoned': datetime.datetime(
- 2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC),
- },
- 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC),
- },
- ]
- for example in examples:
- msg = 'sql: {} query_parameters: {}'.format(
- example['sql'], example['query_parameters'])
-
- Config.CURSOR.execute(example['sql'], example['query_parameters'])
-
- self.assertEqual(Config.CURSOR.rowcount, 1, msg=msg)
- row = Config.CURSOR.fetchone()
- self.assertEqual(len(row), 1, msg=msg)
- self.assertEqual(row[0], example['expected'], msg=msg)
- row = Config.CURSOR.fetchone()
- self.assertIsNone(row, msg=msg)
-
- def test_dump_table_w_public_data(self):
- PUBLIC = 'bigquery-public-data'
- DATASET_ID = 'samples'
- TABLE_NAME = 'natality'
-
- table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME)
- table = Config.CLIENT.get_table(table_ref)
- self._fetch_single_page(table)
-
- def test_dump_table_w_public_data_selected_fields(self):
- PUBLIC = 'bigquery-public-data'
- DATASET_ID = 'samples'
- TABLE_NAME = 'natality'
- selected_fields = [
- bigquery.SchemaField('year', 'INTEGER', mode='NULLABLE'),
- bigquery.SchemaField('month', 'INTEGER', mode='NULLABLE'),
- bigquery.SchemaField('day', 'INTEGER', mode='NULLABLE'),
- ]
- table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME)
-
- rows = self._fetch_single_page(
- table_ref, selected_fields=selected_fields)
-
- self.assertGreater(len(rows), 0)
- self.assertEqual(len(rows[0]), 3)
-
- def test_large_query_w_public_data(self):
- PUBLIC = 'bigquery-public-data'
- DATASET_ID = 'samples'
- TABLE_NAME = 'natality'
- LIMIT = 1000
- SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format(
- PUBLIC, DATASET_ID, TABLE_NAME, LIMIT)
-
- iterator = Config.CLIENT.query_rows(SQL)
-
- rows = list(iterator)
- self.assertEqual(len(rows), LIMIT)
-
- def test_query_future(self):
- query_job = Config.CLIENT.query('SELECT 1')
- iterator = query_job.result(timeout=JOB_TIMEOUT)
- row_tuples = [r.values() for r in iterator]
- self.assertEqual(row_tuples, [(1,)])
-
- def test_query_table_def(self):
- gs_url = self._write_csv_to_storage(
- 'bq_external_test' + unique_resource_id(), 'person_ages.csv',
- HEADER_ROW, ROWS)
-
- job_config = bigquery.QueryJobConfig()
- table_id = 'flintstones'
- ec = bigquery.ExternalConfig('CSV')
- ec.source_uris = [gs_url]
- ec.schema = SCHEMA
- ec.options.skip_leading_rows = 1 # skip the header row
- job_config.table_definitions = {table_id: ec}
- sql = 'SELECT * FROM %s' % table_id
-
- got_rows = Config.CLIENT.query_rows(sql, job_config=job_config)
-
- row_tuples = [r.values() for r in got_rows]
- by_age = operator.itemgetter(1)
- self.assertEqual(sorted(row_tuples, key=by_age),
- sorted(ROWS, key=by_age))
-
- def test_query_external_table(self):
- gs_url = self._write_csv_to_storage(
- 'bq_external_test' + unique_resource_id(), 'person_ages.csv',
- HEADER_ROW, ROWS)
- dataset_id = _make_dataset_id('query_external_table')
- dataset = self.temp_dataset(dataset_id)
- table_id = 'flintstones'
- table_arg = Table(dataset.table(table_id), schema=SCHEMA)
- ec = bigquery.ExternalConfig('CSV')
- ec.source_uris = [gs_url]
- ec.options.skip_leading_rows = 1 # skip the header row
- table_arg.external_data_configuration = ec
- table = Config.CLIENT.create_table(table_arg)
- self.to_delete.insert(0, table)
-
- sql = 'SELECT * FROM %s.%s' % (dataset_id, table_id)
-
- got_rows = Config.CLIENT.query_rows(sql)
-
- row_tuples = [r.values() for r in got_rows]
- by_age = operator.itemgetter(1)
- self.assertEqual(sorted(row_tuples, key=by_age),
- sorted(ROWS, key=by_age))
-
- def test_create_rows_nested_nested(self):
- # See #2951
- SF = bigquery.SchemaField
- schema = [
- SF('string_col', 'STRING', mode='NULLABLE'),
- SF('record_col', 'RECORD', mode='NULLABLE', fields=[
- SF('nested_string', 'STRING', mode='NULLABLE'),
- SF('nested_repeated', 'INTEGER', mode='REPEATED'),
- SF('nested_record', 'RECORD', mode='NULLABLE', fields=[
- SF('nested_nested_string', 'STRING', mode='NULLABLE'),
- ]),
- ]),
- ]
- record = {
- 'nested_string': 'another string value',
- 'nested_repeated': [0, 1, 2],
- 'nested_record': {'nested_nested_string': 'some deep insight'},
- }
- to_insert = [
- ('Some value', record)
- ]
- table_id = 'test_table'
- dataset = self.temp_dataset(_make_dataset_id('issue_2951'))
- table_arg = Table(dataset.table(table_id), schema=schema)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- Config.CLIENT.create_rows(table, to_insert)
-
- retry = RetryResult(_has_rows, max_tries=8)
- rows = retry(self._fetch_single_page)(table)
- row_tuples = [r.values() for r in rows]
- self.assertEqual(row_tuples, to_insert)
-
- def test_create_rows_nested_nested_dictionary(self):
- # See #2951
- SF = bigquery.SchemaField
- schema = [
- SF('string_col', 'STRING', mode='NULLABLE'),
- SF('record_col', 'RECORD', mode='NULLABLE', fields=[
- SF('nested_string', 'STRING', mode='NULLABLE'),
- SF('nested_repeated', 'INTEGER', mode='REPEATED'),
- SF('nested_record', 'RECORD', mode='NULLABLE', fields=[
- SF('nested_nested_string', 'STRING', mode='NULLABLE'),
- ]),
- ]),
- ]
- record = {
- 'nested_string': 'another string value',
- 'nested_repeated': [0, 1, 2],
- 'nested_record': {'nested_nested_string': 'some deep insight'},
- }
- to_insert = [
- {'string_col': 'Some value', 'record_col': record}
- ]
- table_id = 'test_table'
- dataset = self.temp_dataset(_make_dataset_id('issue_2951'))
- table_arg = Table(dataset.table(table_id), schema=schema)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- Config.CLIENT.create_rows(table, to_insert)
-
- retry = RetryResult(_has_rows, max_tries=8)
- rows = retry(self._fetch_single_page)(table)
- row_tuples = [r.values() for r in rows]
- expected_rows = [('Some value', record)]
- self.assertEqual(row_tuples, expected_rows)
-
- def test_create_table_rows_fetch_nested_schema(self):
- table_name = 'test_table'
- dataset = self.temp_dataset(
- _make_dataset_id('create_table_nested_schema'))
- schema = _load_json_schema()
- table_arg = Table(dataset.table(table_name), schema=schema)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
- self.assertTrue(_table_exists(table))
- self.assertEqual(table.table_id, table_name)
-
- to_insert = []
- # Data is in "JSON Lines" format, see http://jsonlines.org/
- json_filename = os.path.join(WHERE, 'data', 'characters.jsonl')
- with open(json_filename) as rows_file:
- for line in rows_file:
- to_insert.append(json.loads(line))
-
- errors = Config.CLIENT.create_rows_json(table, to_insert)
- self.assertEqual(len(errors), 0)
-
- retry = RetryResult(_has_rows, max_tries=8)
- fetched = retry(self._fetch_single_page)(table)
- fetched_tuples = [f.values() for f in fetched]
-
- self.assertEqual(len(fetched), len(to_insert))
-
- for found, expected in zip(sorted(fetched_tuples), to_insert):
- self.assertEqual(found[0], expected['Name'])
- self.assertEqual(found[1], int(expected['Age']))
- self.assertEqual(found[2], expected['Weight'])
- self.assertEqual(found[3], expected['IsMagic'])
-
- self.assertEqual(len(found[4]), len(expected['Spells']))
- for f_spell, e_spell in zip(found[4], expected['Spells']):
- self.assertEqual(f_spell['Name'], e_spell['Name'])
- parts = time.strptime(
- e_spell['LastUsed'], '%Y-%m-%d %H:%M:%S UTC')
- e_used = datetime.datetime(*parts[0:6], tzinfo=UTC)
- self.assertEqual(f_spell['LastUsed'], e_used)
- self.assertEqual(f_spell['DiscoveredBy'],
- e_spell['DiscoveredBy'])
- self.assertEqual(f_spell['Properties'], e_spell['Properties'])
-
- e_icon = base64.standard_b64decode(
- e_spell['Icon'].encode('ascii'))
- self.assertEqual(f_spell['Icon'], e_icon)
-
- parts = time.strptime(expected['TeaTime'], '%H:%M:%S')
- e_teatime = datetime.time(*parts[3:6])
- self.assertEqual(found[5], e_teatime)
-
- parts = time.strptime(expected['NextVacation'], '%Y-%m-%d')
- e_nextvac = datetime.date(*parts[0:3])
- self.assertEqual(found[6], e_nextvac)
-
- parts = time.strptime(expected['FavoriteTime'],
- '%Y-%m-%dT%H:%M:%S')
- e_favtime = datetime.datetime(*parts[0:6])
- self.assertEqual(found[7], e_favtime)
-
- def temp_dataset(self, dataset_id):
- dataset = retry_403(Config.CLIENT.create_dataset)(
- Dataset(Config.CLIENT.dataset(dataset_id)))
- self.to_delete.append(dataset)
- return dataset
-
-
-def _job_done(instance):
- return instance.state.lower() == 'done'
-
-
-def _dataset_exists(ds):
- try:
- Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id))
- return True
- except NotFound:
- return False
-
-
-def _table_exists(t):
- try:
- tr = DatasetReference(t.project, t.dataset_id).table(t.table_id)
- Config.CLIENT.get_table(tr)
- return True
- except NotFound:
- return False
diff --git a/bigquery/tests/unit/test__helpers.py b/bigquery/tests/unit/test__helpers.py
deleted file mode 100644
index 15a6210..0000000
--- a/bigquery/tests/unit/test__helpers.py
+++ /dev/null
@@ -1,903 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import base64
-import datetime
-import unittest
-
-
-class Test_not_null(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _not_null
-
- return _not_null(value, field)
-
- def test_w_none_nullable(self):
- self.assertFalse(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- self.assertTrue(self._call_fut(None, _Field('REQUIRED')))
-
- def test_w_value(self):
- self.assertTrue(self._call_fut(object(), object()))
-
-
-class Test_int_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _int_from_json
-
- return _int_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_string_value(self):
- coerced = self._call_fut('42', object())
- self.assertEqual(coerced, 42)
-
- def test_w_float_value(self):
- coerced = self._call_fut(42, object())
- self.assertEqual(coerced, 42)
-
-
-class Test_float_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _float_from_json
-
- return _float_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_string_value(self):
- coerced = self._call_fut('3.1415', object())
- self.assertEqual(coerced, 3.1415)
-
- def test_w_float_value(self):
- coerced = self._call_fut(3.1415, object())
- self.assertEqual(coerced, 3.1415)
-
-
-class Test_bool_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _bool_from_json
-
- return _bool_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(AttributeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_value_t(self):
- coerced = self._call_fut('T', object())
- self.assertTrue(coerced)
-
- def test_w_value_true(self):
- coerced = self._call_fut('True', object())
- self.assertTrue(coerced)
-
- def test_w_value_1(self):
- coerced = self._call_fut('1', object())
- self.assertTrue(coerced)
-
- def test_w_value_other(self):
- coerced = self._call_fut('f', object())
- self.assertFalse(coerced)
-
-
-class Test_string_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _string_from_json
-
- return _string_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- self.assertIsNone(self._call_fut(None, _Field('REQUIRED')))
-
- def test_w_string_value(self):
- coerced = self._call_fut('Wonderful!', object())
- self.assertEqual(coerced, 'Wonderful!')
-
-
-class Test_bytes_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _bytes_from_json
-
- return _bytes_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_base64_encoded_bytes(self):
- expected = b'Wonderful!'
- encoded = base64.standard_b64encode(expected)
- coerced = self._call_fut(encoded, object())
- self.assertEqual(coerced, expected)
-
- def test_w_base64_encoded_text(self):
- expected = b'Wonderful!'
- encoded = base64.standard_b64encode(expected).decode('ascii')
- coerced = self._call_fut(encoded, object())
- self.assertEqual(coerced, expected)
-
-
-class Test_timestamp_query_param_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery import _helpers
-
- return _helpers._timestamp_query_param_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_timestamp_valid(self):
- from google.cloud._helpers import UTC
-
- samples = [
- (
- '2016-12-20 15:58:27.339328+00:00',
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
- ),
- (
- '2016-12-20 15:58:27+00:00',
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
- ),
- (
- '2016-12-20T15:58:27.339328+00:00',
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
- ),
- (
- '2016-12-20T15:58:27+00:00',
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
- ),
- (
- '2016-12-20 15:58:27.339328Z',
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
- ),
- (
- '2016-12-20 15:58:27Z',
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
- ),
- (
- '2016-12-20T15:58:27.339328Z',
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
- ),
- (
- '2016-12-20T15:58:27Z',
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
- ),
- ]
- for timestamp_str, expected_result in samples:
- self.assertEqual(
- self._call_fut(timestamp_str, _Field('NULLABLE')),
- expected_result)
-
- def test_w_timestamp_invalid(self):
- with self.assertRaises(ValueError):
- self._call_fut('definitely-not-a-timestamp', _Field('NULLABLE'))
-
-
-class Test_timestamp_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _timestamp_from_json
-
- return _timestamp_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_string_value(self):
- from google.cloud._helpers import _EPOCH
-
- coerced = self._call_fut('1.234567', object())
- self.assertEqual(
- coerced,
- _EPOCH + datetime.timedelta(seconds=1, microseconds=234567))
-
- def test_w_float_value(self):
- from google.cloud._helpers import _EPOCH
-
- coerced = self._call_fut(1.234567, object())
- self.assertEqual(
- coerced,
- _EPOCH + datetime.timedelta(seconds=1, microseconds=234567))
-
-
-class Test_datetime_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _datetime_from_json
-
- return _datetime_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_string_value(self):
- coerced = self._call_fut('2016-12-02T18:51:33', object())
- self.assertEqual(
- coerced,
- datetime.datetime(2016, 12, 2, 18, 51, 33))
-
- def test_w_microseconds(self):
- coerced = self._call_fut('2015-05-22T10:11:12.987654', object())
- self.assertEqual(
- coerced,
- datetime.datetime(2015, 5, 22, 10, 11, 12, 987654))
-
-
-class Test_date_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _date_from_json
-
- return _date_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_string_value(self):
- coerced = self._call_fut('1987-09-22', object())
- self.assertEqual(
- coerced,
- datetime.date(1987, 9, 22))
-
-
-class Test_time_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _time_from_json
-
- return _time_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_string_value(self):
- coerced = self._call_fut('12:12:27', object())
- self.assertEqual(
- coerced,
- datetime.time(12, 12, 27))
-
-
-class Test_record_from_json(unittest.TestCase):
-
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _record_from_json
-
- return _record_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field('REQUIRED'))
-
- def test_w_nullable_subfield_none(self):
- subfield = _Field('NULLABLE', 'age', 'INTEGER')
- field = _Field('REQUIRED', fields=[subfield])
- value = {'f': [{'v': None}]}
- coerced = self._call_fut(value, field)
- self.assertEqual(coerced, {'age': None})
-
- def test_w_scalar_subfield(self):
- subfield = _Field('REQUIRED', 'age', 'INTEGER')
- field = _Field('REQUIRED', fields=[subfield])
- value = {'f': [{'v': 42}]}
- coerced = self._call_fut(value, field)
- self.assertEqual(coerced, {'age': 42})
-
- def test_w_repeated_subfield(self):
- subfield = _Field('REPEATED', 'color', 'STRING')
- field = _Field('REQUIRED', fields=[subfield])
- value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]}
- coerced = self._call_fut(value, field)
- self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']})
-
- def test_w_record_subfield(self):
- full_name = _Field('REQUIRED', 'full_name', 'STRING')
- area_code = _Field('REQUIRED', 'area_code', 'STRING')
- local_number = _Field('REQUIRED', 'local_number', 'STRING')
- rank = _Field('REQUIRED', 'rank', 'INTEGER')
- phone = _Field('NULLABLE', 'phone', 'RECORD',
- fields=[area_code, local_number, rank])
- person = _Field('REQUIRED', 'person', 'RECORD',
- fields=[full_name, phone])
- value = {
- 'f': [
- {'v': 'Phred Phlyntstone'},
- {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
- ],
- }
- expected = {
- 'full_name': 'Phred Phlyntstone',
- 'phone': {
- 'area_code': '800',
- 'local_number': '555-1212',
- 'rank': 1,
- }
- }
- coerced = self._call_fut(value, person)
- self.assertEqual(coerced, expected)
-
-
-class Test_row_tuple_from_json(unittest.TestCase):
-
- def _call_fut(self, row, schema):
- from google.cloud.bigquery._helpers import _row_tuple_from_json
-
- return _row_tuple_from_json(row, schema)
-
- def test_w_single_scalar_column(self):
- # SELECT 1 AS col
- col = _Field('REQUIRED', 'col', 'INTEGER')
- row = {u'f': [{u'v': u'1'}]}
- self.assertEqual(self._call_fut(row, schema=[col]), (1,))
-
- def test_w_single_struct_column(self):
- # SELECT (1, 2) AS col
- sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER')
- sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER')
- col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2])
- row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]}
- self.assertEqual(self._call_fut(row, schema=[col]),
- ({'sub_1': 1, 'sub_2': 2},))
-
- def test_w_single_array_column(self):
- # SELECT [1, 2, 3] as col
- col = _Field('REPEATED', 'col', 'INTEGER')
- row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]}
- self.assertEqual(self._call_fut(row, schema=[col]),
- ([1, 2, 3],))
-
- def test_w_struct_w_nested_array_column(self):
- # SELECT ([1, 2], 3, [4, 5]) as col
- first = _Field('REPEATED', 'first', 'INTEGER')
- second = _Field('REQUIRED', 'second', 'INTEGER')
- third = _Field('REPEATED', 'third', 'INTEGER')
- col = _Field('REQUIRED', 'col', 'RECORD',
- fields=[first, second, third])
- row = {
- u'f': [
- {u'v': {
- u'f': [
- {u'v': [{u'v': u'1'}, {u'v': u'2'}]},
- {u'v': u'3'},
- {u'v': [{u'v': u'4'}, {u'v': u'5'}]}
- ]
- }},
- ]
- }
- self.assertEqual(
- self._call_fut(row, schema=[col]),
- ({u'first': [1, 2], u'second': 3, u'third': [4, 5]},))
-
- def test_w_array_of_struct(self):
- # SELECT [(1, 2, 3), (4, 5, 6)] as col
- first = _Field('REQUIRED', 'first', 'INTEGER')
- second = _Field('REQUIRED', 'second', 'INTEGER')
- third = _Field('REQUIRED', 'third', 'INTEGER')
- col = _Field('REPEATED', 'col', 'RECORD',
- fields=[first, second, third])
- row = {u'f': [{u'v': [
- {u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}},
- {u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}},
- ]}]}
- self.assertEqual(
- self._call_fut(row, schema=[col]),
- ([
- {u'first': 1, u'second': 2, u'third': 3},
- {u'first': 4, u'second': 5, u'third': 6},
- ],))
-
- def test_w_array_of_struct_w_array(self):
- # SELECT [([1, 2, 3], 4), ([5, 6], 7)]
- first = _Field('REPEATED', 'first', 'INTEGER')
- second = _Field('REQUIRED', 'second', 'INTEGER')
- col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second])
- row = {u'f': [{u'v': [
- {u'v': {u'f': [
- {u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]},
- {u'v': u'4'}
- ]}},
- {u'v': {u'f': [
- {u'v': [{u'v': u'5'}, {u'v': u'6'}]},
- {u'v': u'7'}
- ]}}
- ]}]}
- self.assertEqual(
- self._call_fut(row, schema=[col]),
- ([
- {u'first': [1, 2, 3], u'second': 4},
- {u'first': [5, 6], u'second': 7},
- ],))
-
- def test_row(self):
- from google.cloud.bigquery._helpers import Row
-
- VALUES = (1, 2, 3)
- r = Row(VALUES, {'a': 0, 'b': 1, 'c': 2})
- self.assertEqual(r.a, 1)
- self.assertEqual(r[1], 2)
- self.assertEqual(r['c'], 3)
- self.assertEqual(len(r), 3)
- self.assertEqual(r.values(), VALUES)
- self.assertEqual(repr(r),
- "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})")
- self.assertFalse(r != r)
- self.assertFalse(r == 3)
- with self.assertRaises(AttributeError):
- r.z
- with self.assertRaises(KeyError):
- r['z']
-
-
-class Test_rows_from_json(unittest.TestCase):
-
- def _call_fut(self, rows, schema):
- from google.cloud.bigquery._helpers import _rows_from_json
-
- return _rows_from_json(rows, schema)
-
- def test_w_record_subfield(self):
- from google.cloud.bigquery._helpers import Row
-
- full_name = _Field('REQUIRED', 'full_name', 'STRING')
- area_code = _Field('REQUIRED', 'area_code', 'STRING')
- local_number = _Field('REQUIRED', 'local_number', 'STRING')
- rank = _Field('REQUIRED', 'rank', 'INTEGER')
- phone = _Field('NULLABLE', 'phone', 'RECORD',
- fields=[area_code, local_number, rank])
- color = _Field('REPEATED', 'color', 'STRING')
- schema = [full_name, phone, color]
- rows = [
- {'f': [
- {'v': 'Phred Phlyntstone'},
- {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
- {'v': [{'v': 'orange'}, {'v': 'black'}]},
- ]},
- {'f': [
- {'v': 'Bharney Rhubble'},
- {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
- {'v': [{'v': 'brown'}]},
- ]},
- {'f': [
- {'v': 'Wylma Phlyntstone'},
- {'v': None},
- {'v': []},
- ]},
- ]
- phred_phone = {
- 'area_code': '800',
- 'local_number': '555-1212',
- 'rank': 1,
- }
- bharney_phone = {
- 'area_code': '877',
- 'local_number': '768-5309',
- 'rank': 2,
- }
- f2i = {'full_name': 0, 'phone': 1, 'color': 2}
- expected = [
- Row(('Phred Phlyntstone', phred_phone, ['orange', 'black']), f2i),
- Row(('Bharney Rhubble', bharney_phone, ['brown']), f2i),
- Row(('Wylma Phlyntstone', None, []), f2i),
- ]
- coerced = self._call_fut(rows, schema)
- self.assertEqual(coerced, expected)
-
- def test_w_int64_float64_bool(self):
- from google.cloud.bigquery._helpers import Row
-
- # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'.
- candidate = _Field('REQUIRED', 'candidate', 'STRING')
- votes = _Field('REQUIRED', 'votes', 'INT64')
- percentage = _Field('REQUIRED', 'percentage', 'FLOAT64')
- incumbent = _Field('REQUIRED', 'incumbent', 'BOOL')
- schema = [candidate, votes, percentage, incumbent]
- rows = [
- {'f': [
- {'v': 'Phred Phlyntstone'},
- {'v': 8},
- {'v': 0.25},
- {'v': 'true'},
- ]},
- {'f': [
- {'v': 'Bharney Rhubble'},
- {'v': 4},
- {'v': 0.125},
- {'v': 'false'},
- ]},
- {'f': [
- {'v': 'Wylma Phlyntstone'},
- {'v': 20},
- {'v': 0.625},
- {'v': 'false'},
- ]},
- ]
- f2i = {'candidate': 0, 'votes': 1, 'percentage': 2, 'incumbent': 3}
- expected = [
- Row(('Phred Phlyntstone', 8, 0.25, True), f2i),
- Row(('Bharney Rhubble', 4, 0.125, False), f2i),
- Row(('Wylma Phlyntstone', 20, 0.625, False), f2i),
- ]
- coerced = self._call_fut(rows, schema)
- self.assertEqual(coerced, expected)
-
-
-class Test_int_to_json(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _int_to_json
-
- return _int_to_json(value)
-
- def test_w_int(self):
- self.assertEqual(self._call_fut(123), '123')
-
- def test_w_string(self):
- self.assertEqual(self._call_fut('123'), '123')
-
-
-class Test_float_to_json(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _float_to_json
-
- return _float_to_json(value)
-
- def test_w_float(self):
- self.assertEqual(self._call_fut(1.23), 1.23)
-
-
-class Test_bool_to_json(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _bool_to_json
-
- return _bool_to_json(value)
-
- def test_w_true(self):
- self.assertEqual(self._call_fut(True), 'true')
-
- def test_w_false(self):
- self.assertEqual(self._call_fut(False), 'false')
-
- def test_w_string(self):
- self.assertEqual(self._call_fut('false'), 'false')
-
-
-class Test_bytes_to_json(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _bytes_to_json
-
- return _bytes_to_json(value)
-
- def test_w_non_bytes(self):
- non_bytes = object()
- self.assertIs(self._call_fut(non_bytes), non_bytes)
-
- def test_w_bytes(self):
- source = b'source'
- expected = u'c291cmNl'
- converted = self._call_fut(source)
- self.assertEqual(converted, expected)
-
-
-class Test_timestamp_to_json_parameter(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _timestamp_to_json_parameter
-
- return _timestamp_to_json_parameter(value)
-
- def test_w_float(self):
- self.assertEqual(self._call_fut(1.234567), 1.234567)
-
- def test_w_string(self):
- ZULU = '2016-12-20 15:58:27.339328+00:00'
- self.assertEqual(self._call_fut(ZULU), ZULU)
-
- def test_w_datetime_wo_zone(self):
- ZULU = '2016-12-20 15:58:27.339328+00:00'
- when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328)
- self.assertEqual(self._call_fut(when), ZULU)
-
- def test_w_datetime_w_non_utc_zone(self):
- class _Zone(datetime.tzinfo):
-
- def utcoffset(self, _):
- return datetime.timedelta(minutes=-240)
-
- ZULU = '2016-12-20 19:58:27.339328+00:00'
- when = datetime.datetime(
- 2016, 12, 20, 15, 58, 27, 339328, tzinfo=_Zone())
- self.assertEqual(self._call_fut(when), ZULU)
-
- def test_w_datetime_w_utc_zone(self):
- from google.cloud._helpers import UTC
-
- ZULU = '2016-12-20 15:58:27.339328+00:00'
- when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
- self.assertEqual(self._call_fut(when), ZULU)
-
-
-class Test_timestamp_to_json_row(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _timestamp_to_json_row
-
- return _timestamp_to_json_row(value)
-
- def test_w_float(self):
- self.assertEqual(self._call_fut(1.234567), 1.234567)
-
- def test_w_string(self):
- ZULU = '2016-12-20 15:58:27.339328+00:00'
- self.assertEqual(self._call_fut(ZULU), ZULU)
-
- def test_w_datetime(self):
- from google.cloud._helpers import _microseconds_from_datetime
-
- when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328)
- self.assertEqual(
- self._call_fut(when), _microseconds_from_datetime(when) / 1e6)
-
-
-class Test_datetime_to_json(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _datetime_to_json
-
- return _datetime_to_json(value)
-
- def test_w_string(self):
- RFC3339 = '2016-12-03T14:14:51Z'
- self.assertEqual(self._call_fut(RFC3339), RFC3339)
-
- def test_w_datetime(self):
- from google.cloud._helpers import UTC
-
- when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC)
- self.assertEqual(self._call_fut(when), '2016-12-03T14:11:27.123456')
-
-
-class Test_date_to_json(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _date_to_json
-
- return _date_to_json(value)
-
- def test_w_string(self):
- RFC3339 = '2016-12-03'
- self.assertEqual(self._call_fut(RFC3339), RFC3339)
-
- def test_w_datetime(self):
- when = datetime.date(2016, 12, 3)
- self.assertEqual(self._call_fut(when), '2016-12-03')
-
-
-class Test_time_to_json(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _time_to_json
-
- return _time_to_json(value)
-
- def test_w_string(self):
- RFC3339 = '12:13:41'
- self.assertEqual(self._call_fut(RFC3339), RFC3339)
-
- def test_w_datetime(self):
- when = datetime.time(12, 13, 41)
- self.assertEqual(self._call_fut(when), '12:13:41')
-
-
-class Test_snake_to_camel_case(unittest.TestCase):
-
- def _call_fut(self, value):
- from google.cloud.bigquery._helpers import _snake_to_camel_case
-
- return _snake_to_camel_case(value)
-
- def test_w_snake_case_string(self):
- self.assertEqual(self._call_fut('friendly_name'), 'friendlyName')
-
- def test_w_camel_case_string(self):
- self.assertEqual(self._call_fut('friendlyName'), 'friendlyName')
-
-
-class Test_TypedApiResourceProperty(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery._helpers import _TypedApiResourceProperty
-
- return _TypedApiResourceProperty
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_it(self):
-
- class Wrapper(object):
- attr = self._make_one('attr', 'back', int)
-
- def __init__(self):
- self._properties = {}
-
- self.assertIsNotNone(Wrapper.attr)
-
- wrapper = Wrapper()
- with self.assertRaises(ValueError):
- wrapper.attr = 'BOGUS'
-
- wrapper.attr = 42
- self.assertEqual(wrapper.attr, 42)
- self.assertEqual(wrapper._properties['back'], 42)
-
- wrapper.attr = None
- self.assertIsNone(wrapper.attr)
- self.assertIsNone(wrapper._properties['back'])
-
- wrapper.attr = 23
- self.assertEqual(wrapper.attr, 23)
- self.assertEqual(wrapper._properties['back'], 23)
-
- del wrapper.attr
- self.assertIsNone(wrapper.attr)
- with self.assertRaises(KeyError):
- wrapper._properties['back']
-
-
-class Test_ListApiResourceProperty(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery._helpers import _ListApiResourceProperty
-
- return _ListApiResourceProperty
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def _descriptor_and_klass(self):
- from google.cloud.bigquery.query import _AbstractQueryParameter
-
- descriptor = self._make_one(
- 'query_parameters', 'queryParameters', _AbstractQueryParameter)
-
- class _Test(object):
- def __init__(self):
- self._properties = {}
-
- query_parameters = descriptor
-
- return descriptor, _Test
-
- def test_class_getter(self):
- descriptor, klass = self._descriptor_and_klass()
- self.assertIs(klass.query_parameters, descriptor)
-
- def test_instance_getter_empty(self):
- _, klass = self._descriptor_and_klass()
- instance = klass()
- self.assertEqual(instance.query_parameters, [])
-
- def test_instance_getter_w_non_empty_list(self):
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
- _, klass = self._descriptor_and_klass()
- instance = klass()
- instance._properties['queryParameters'] = query_parameters
-
- self.assertEqual(instance.query_parameters, query_parameters)
-
- def test_instance_setter_w_empty_list(self):
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
- _, klass = self._descriptor_and_klass()
- instance = klass()
- instance._query_parameters = query_parameters
-
- instance.query_parameters = []
-
- self.assertEqual(instance.query_parameters, [])
-
- def test_instance_setter_w_none(self):
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
- _, klass = self._descriptor_and_klass()
- instance = klass()
- instance._query_parameters = query_parameters
-
- with self.assertRaises(ValueError):
- instance.query_parameters = None
-
- def test_instance_setter_w_valid_udf(self):
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
- _, klass = self._descriptor_and_klass()
- instance = klass()
-
- instance.query_parameters = query_parameters
-
- self.assertEqual(instance.query_parameters, query_parameters)
-
- def test_instance_setter_w_bad_udfs(self):
- _, klass = self._descriptor_and_klass()
- instance = klass()
-
- with self.assertRaises(ValueError):
- instance.query_parameters = ["foo"]
-
- self.assertEqual(instance.query_parameters, [])
-
-
-class _Field(object):
-
- def __init__(self, mode, name='unknown', field_type='UNKNOWN', fields=()):
- self.mode = mode
- self.name = name
- self.field_type = field_type
- self.fields = fields
diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
deleted file mode 100644
index c71847e..0000000
--- a/bigquery/tests/unit/test_client.py
+++ /dev/null
@@ -1,3236 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import copy
-import email
-import io
-import json
-import unittest
-
-import mock
-import six
-from six.moves import http_client
-import pytest
-
-from google.cloud.bigquery.dataset import DatasetReference
-
-
-def _make_credentials():
- import google.auth.credentials
-
- return mock.Mock(spec=google.auth.credentials.Credentials)
-
-
-class TestClient(unittest.TestCase):
-
- PROJECT = 'PROJECT'
- DS_ID = 'DATASET_ID'
- TABLE_ID = 'TABLE_ID'
- TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID)
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.client import Client
-
- return Client
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- from google.cloud.bigquery._http import Connection
-
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- self.assertIsInstance(client._connection, Connection)
- self.assertIs(client._connection.credentials, creds)
- self.assertIs(client._connection.http, http)
-
- def test__get_query_results_miss_w_explicit_project_and_timeout(self):
- from google.cloud.exceptions import NotFound
-
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection()
-
- with self.assertRaises(NotFound):
- client._get_query_results(
- 'nothere', None, project='other-project', timeout_ms=500)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(
- req['path'], '/projects/other-project/queries/nothere')
- self.assertEqual(
- req['query_params'], {'maxResults': 0, 'timeoutMs': 500})
-
- def test__get_query_results_hit(self):
- job_id = 'query_job'
- data = {
- 'kind': 'bigquery#getQueryResultsResponse',
- 'etag': 'some-tag',
- 'schema': {
- 'fields': [
- {
- 'name': 'title',
- 'type': 'STRING',
- 'mode': 'NULLABLE'
- },
- {
- 'name': 'unique_words',
- 'type': 'INTEGER',
- 'mode': 'NULLABLE'
- }
- ]
- },
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': job_id,
- },
- 'totalRows': '10',
- 'totalBytesProcessed': '2464625',
- 'jobComplete': True,
- 'cacheHit': False,
- }
-
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- client._connection = _Connection(data)
- query_results = client._get_query_results(job_id, None)
-
- self.assertEqual(query_results.total_rows, 10)
- self.assertTrue(query_results.complete)
-
- def test_list_projects_defaults(self):
- from google.cloud.bigquery.client import Project
-
- PROJECT_1 = 'PROJECT_ONE'
- PROJECT_2 = 'PROJECT_TWO'
- PATH = 'projects'
- TOKEN = 'TOKEN'
- DATA = {
- 'nextPageToken': TOKEN,
- 'projects': [
- {'kind': 'bigquery#project',
- 'id': PROJECT_1,
- 'numericId': 1,
- 'projectReference': {'projectId': PROJECT_1},
- 'friendlyName': 'One'},
- {'kind': 'bigquery#project',
- 'id': PROJECT_2,
- 'numericId': 2,
- 'projectReference': {'projectId': PROJECT_2},
- 'friendlyName': 'Two'},
- ]
- }
- creds = _make_credentials()
- client = self._make_one(PROJECT_1, creds)
- conn = client._connection = _Connection(DATA)
-
- iterator = client.list_projects()
- page = six.next(iterator.pages)
- projects = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(projects), len(DATA['projects']))
- for found, expected in zip(projects, DATA['projects']):
- self.assertIsInstance(found, Project)
- self.assertEqual(found.project_id, expected['id'])
- self.assertEqual(found.numeric_id, expected['numericId'])
- self.assertEqual(found.friendly_name, expected['friendlyName'])
- self.assertEqual(token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
-
- def test_list_projects_explicit_response_missing_projects_key(self):
- PATH = 'projects'
- TOKEN = 'TOKEN'
- DATA = {}
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection(DATA)
-
- iterator = client.list_projects(max_results=3, page_token=TOKEN)
- page = six.next(iterator.pages)
- projects = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(projects), 0)
- self.assertIsNone(token)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['query_params'],
- {'maxResults': 3, 'pageToken': TOKEN})
-
- def test_list_datasets_defaults(self):
- from google.cloud.bigquery.dataset import Dataset
-
- DATASET_1 = 'dataset_one'
- DATASET_2 = 'dataset_two'
- PATH = 'projects/%s/datasets' % self.PROJECT
- TOKEN = 'TOKEN'
- DATA = {
- 'nextPageToken': TOKEN,
- 'datasets': [
- {'kind': 'bigquery#dataset',
- 'id': '%s:%s' % (self.PROJECT, DATASET_1),
- 'datasetReference': {'datasetId': DATASET_1,
- 'projectId': self.PROJECT},
- 'friendlyName': None},
- {'kind': 'bigquery#dataset',
- 'id': '%s:%s' % (self.PROJECT, DATASET_2),
- 'datasetReference': {'datasetId': DATASET_2,
- 'projectId': self.PROJECT},
- 'friendlyName': 'Two'},
- ]
- }
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection(DATA)
-
- iterator = client.list_datasets()
- page = six.next(iterator.pages)
- datasets = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(datasets), len(DATA['datasets']))
- for found, expected in zip(datasets, DATA['datasets']):
- self.assertIsInstance(found, Dataset)
- self.assertEqual(found.full_dataset_id, expected['id'])
- self.assertEqual(found.friendly_name, expected['friendlyName'])
- self.assertEqual(token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
-
- def test_list_datasets_explicit_response_missing_datasets_key(self):
- PATH = 'projects/%s/datasets' % self.PROJECT
- TOKEN = 'TOKEN'
- FILTER = 'FILTER'
- DATA = {}
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection(DATA)
-
- iterator = client.list_datasets(
- include_all=True, filter=FILTER,
- max_results=3, page_token=TOKEN)
- page = six.next(iterator.pages)
- datasets = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(datasets), 0)
- self.assertIsNone(token)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['query_params'],
- {'all': True, 'filter': FILTER,
- 'maxResults': 3, 'pageToken': TOKEN})
-
- def test_dataset_with_specified_project(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- dataset = client.dataset(self.DS_ID, self.PROJECT)
- self.assertIsInstance(dataset, DatasetReference)
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, self.PROJECT)
-
- def test_dataset_with_default_project(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- dataset = client.dataset(self.DS_ID)
- self.assertIsInstance(dataset, DatasetReference)
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, self.PROJECT)
-
- def test_get_dataset(self):
- from google.cloud.exceptions import ServerError
-
- path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- resource = {
- 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
- 'datasetReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- },
- }
- conn = client._connection = _Connection(resource)
- dataset_ref = client.dataset(self.DS_ID)
-
- dataset = client.get_dataset(dataset_ref)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % path)
- self.assertEqual(dataset.dataset_id, self.DS_ID)
-
- # Test retry.
-
- # Not a cloud API exception (missing 'errors' field).
- client._connection = _Connection(Exception(''), resource)
- with self.assertRaises(Exception):
- client.get_dataset(dataset_ref)
-
- # Zero-length errors field.
- client._connection = _Connection(ServerError(''), resource)
- with self.assertRaises(ServerError):
- client.get_dataset(dataset_ref)
-
- # Non-retryable reason.
- client._connection = _Connection(
- ServerError('', errors=[{'reason': 'serious'}]),
- resource)
- with self.assertRaises(ServerError):
- client.get_dataset(dataset_ref)
-
- # Retryable reason, but retry is disabled.
- client._connection = _Connection(
- ServerError('', errors=[{'reason': 'backendError'}]),
- resource)
- with self.assertRaises(ServerError):
- client.get_dataset(dataset_ref, retry=None)
-
- # Retryable reason, default retry: success.
- client._connection = _Connection(
- ServerError('', errors=[{'reason': 'backendError'}]),
- resource)
- dataset = client.get_dataset(dataset_ref)
- self.assertEqual(dataset.dataset_id, self.DS_ID)
-
- def test_create_dataset_minimal(self):
- from google.cloud.bigquery.dataset import Dataset
-
- PATH = 'projects/%s/datasets' % self.PROJECT
- RESOURCE = {
- 'datasetReference':
- {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
- 'etag': "etag",
- 'id': "%s:%s" % (self.PROJECT, self.DS_ID),
- }
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(RESOURCE)
- ds = client.create_dataset(Dataset(client.dataset(self.DS_ID)))
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- SENT = {
- 'datasetReference':
- {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
- 'labels': {},
- }
- self.assertEqual(req['data'], SENT)
- self.assertEqual(ds.dataset_id, self.DS_ID)
- self.assertEqual(ds.project, self.PROJECT)
- self.assertEqual(ds.etag, RESOURCE['etag'])
- self.assertEqual(ds.full_dataset_id, RESOURCE['id'])
-
- def test_create_dataset_w_attrs(self):
- from google.cloud.bigquery.dataset import Dataset, AccessEntry
-
- PATH = 'projects/%s/datasets' % self.PROJECT
- DESCRIPTION = 'DESC'
- FRIENDLY_NAME = 'FN'
- LOCATION = 'US'
- USER_EMAIL = 'phred@example.com'
- LABELS = {'color': 'red'}
- VIEW = {
- 'projectId': 'my-proj',
- 'datasetId': 'starry-skies',
- 'tableId': 'northern-hemisphere',
- }
- RESOURCE = {
- 'datasetReference':
- {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
- 'etag': "etag",
- 'id': "%s:%s" % (self.PROJECT, self.DS_ID),
- 'description': DESCRIPTION,
- 'friendlyName': FRIENDLY_NAME,
- 'location': LOCATION,
- 'defaultTableExpirationMs': 3600,
- 'labels': LABELS,
- 'access': [
- {'role': 'OWNER', 'userByEmail': USER_EMAIL},
- {'view': VIEW}],
- }
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(RESOURCE)
- entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL),
- AccessEntry(None, 'view', VIEW)]
- ds_arg = Dataset(client.dataset(self.DS_ID))
- ds_arg.access_entries = entries
- ds_arg.description = DESCRIPTION
- ds_arg.friendly_name = FRIENDLY_NAME
- ds_arg.default_table_expiration_ms = 3600
- ds_arg.location = LOCATION
- ds_arg.labels = LABELS
- ds = client.create_dataset(ds_arg)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- SENT = {
- 'datasetReference':
- {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
- 'description': DESCRIPTION,
- 'friendlyName': FRIENDLY_NAME,
- 'location': LOCATION,
- 'defaultTableExpirationMs': 3600,
- 'access': [
- {'role': 'OWNER', 'userByEmail': USER_EMAIL},
- {'view': VIEW}],
- 'labels': LABELS,
- }
- self.assertEqual(req['data'], SENT)
- self.assertEqual(ds.dataset_id, self.DS_ID)
- self.assertEqual(ds.project, self.PROJECT)
- self.assertEqual(ds.etag, RESOURCE['etag'])
- self.assertEqual(ds.full_dataset_id, RESOURCE['id'])
- self.assertEqual(ds.description, DESCRIPTION)
- self.assertEqual(ds.friendly_name, FRIENDLY_NAME)
- self.assertEqual(ds.location, LOCATION)
- self.assertEqual(ds.default_table_expiration_ms, 3600)
- self.assertEqual(ds.labels, LABELS)
-
- def test_create_table_w_day_partition(self):
- from google.cloud.bigquery.table import Table
-
- path = 'projects/%s/datasets/%s/tables' % (
- self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- }
- conn = client._connection = _Connection(resource)
- table = Table(self.TABLE_REF)
- table.partitioning_type = 'DAY'
-
- got = client.create_table(table)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % path)
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'timePartitioning': {'type': 'DAY'},
- 'labels': {},
- }
- self.assertEqual(req['data'], sent)
- self.assertEqual(table.partitioning_type, "DAY")
- self.assertEqual(got.table_id, self.TABLE_ID)
-
- def test_create_table_w_day_partition_and_expire(self):
- from google.cloud.bigquery.table import Table
-
- path = 'projects/%s/datasets/%s/tables' % (
- self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- }
- conn = client._connection = _Connection(resource)
- table = Table(self.TABLE_REF)
- table.partitioning_type = 'DAY'
- table.partition_expiration = 100
-
- got = client.create_table(table)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % path)
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'timePartitioning': {'type': 'DAY', 'expirationMs': 100},
- 'labels': {},
- }
- self.assertEqual(req['data'], sent)
- self.assertEqual(table.partitioning_type, "DAY")
- self.assertEqual(table.partition_expiration, 100)
- self.assertEqual(got.table_id, self.TABLE_ID)
-
- def test_create_table_w_schema_and_query(self):
- from google.cloud.bigquery.table import Table, SchemaField
-
- path = 'projects/%s/datasets/%s/tables' % (
- self.PROJECT, self.DS_ID)
- query = 'SELECT * from %s:%s' % (self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]
- },
- 'view': {'query': query},
- }
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED')
- ]
- conn = client._connection = _Connection(resource)
- table = Table(self.TABLE_REF, schema=schema)
- table.view_query = query
-
- got = client.create_table(table)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % path)
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- },
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]
- },
- 'view': {'query': query, 'useLegacySql': False},
- 'labels': {},
- }
- self.assertEqual(req['data'], sent)
- self.assertEqual(got.table_id, self.TABLE_ID)
- self.assertEqual(got.project, self.PROJECT)
- self.assertEqual(got.dataset_id, self.DS_ID)
- self.assertEqual(got.schema, schema)
- self.assertEqual(got.view_query, query)
-
- def test_create_table_w_external(self):
- from google.cloud.bigquery.table import Table
- from google.cloud.bigquery.external_config import ExternalConfig
-
- path = 'projects/%s/datasets/%s/tables' % (
- self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'externalDataConfiguration': {
- 'sourceFormat': 'CSV',
- 'autodetect': True,
- },
- }
- conn = client._connection = _Connection(resource)
- table = Table(self.TABLE_REF)
- ec = ExternalConfig('CSV')
- ec.autodetect = True
- table.external_data_configuration = ec
-
- got = client.create_table(table)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % path)
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- },
- 'externalDataConfiguration': {
- 'sourceFormat': 'CSV',
- 'autodetect': True,
- },
- 'labels': {},
- }
- self.assertEqual(req['data'], sent)
- self.assertEqual(got.table_id, self.TABLE_ID)
- self.assertEqual(got.project, self.PROJECT)
- self.assertEqual(got.dataset_id, self.DS_ID)
- self.assertEqual(got.external_data_configuration.source_format, 'CSV')
- self.assertEqual(got.external_data_configuration.autodetect, True)
-
- def test_get_table(self):
- path = 'projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- },
- }
- conn = client._connection = _Connection(resource)
- table = client.get_table(self.TABLE_REF)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % path)
- self.assertEqual(table.table_id, self.TABLE_ID)
-
- def test_update_dataset_w_invalid_field(self):
- from google.cloud.bigquery.dataset import Dataset
-
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(ValueError):
- client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"])
-
- def test_update_dataset(self):
- from google.cloud.bigquery.dataset import Dataset, AccessEntry
-
- PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)
- DESCRIPTION = 'DESCRIPTION'
- FRIENDLY_NAME = 'TITLE'
- LOCATION = 'loc'
- LABELS = {'priority': 'high'}
- ACCESS = [
- {'role': 'OWNER', 'userByEmail': 'phred@example.com'},
- ]
- EXP = 17
- RESOURCE = {
- 'datasetReference':
- {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
- 'etag': "etag",
- 'description': DESCRIPTION,
- 'friendlyName': FRIENDLY_NAME,
- 'location': LOCATION,
- 'defaultTableExpirationMs': EXP,
- 'labels': LABELS,
- 'access': ACCESS,
- }
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(RESOURCE, RESOURCE)
- ds = Dataset(client.dataset(self.DS_ID))
- ds.description = DESCRIPTION
- ds.friendly_name = FRIENDLY_NAME
- ds.location = LOCATION
- ds.default_table_expiration_ms = EXP
- ds.labels = LABELS
- ds.access_entries = [
- AccessEntry('OWNER', 'userByEmail', 'phred@example.com')]
- ds2 = client.update_dataset(
- ds, ['description', 'friendly_name', 'location', 'labels',
- 'access_entries'])
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'PATCH')
- SENT = {
- 'description': DESCRIPTION,
- 'friendlyName': FRIENDLY_NAME,
- 'location': LOCATION,
- 'labels': LABELS,
- 'access': ACCESS,
- }
- self.assertEqual(req['data'], SENT)
- self.assertEqual(req['path'], '/' + PATH)
- self.assertIsNone(req['headers'])
- self.assertEqual(ds2.description, ds.description)
- self.assertEqual(ds2.friendly_name, ds.friendly_name)
- self.assertEqual(ds2.location, ds.location)
- self.assertEqual(ds2.labels, ds.labels)
- self.assertEqual(ds2.access_entries, ds.access_entries)
-
- # ETag becomes If-Match header.
- ds._properties['etag'] = 'etag'
- client.update_dataset(ds, [])
- req = conn._requested[1]
- self.assertEqual(req['headers']['If-Match'], 'etag')
-
- def test_update_table(self):
- from google.cloud.bigquery.table import Table, SchemaField
-
- path = 'projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- description = 'description'
- title = 'title'
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]
- },
- 'etag': 'etag',
- 'description': description,
- 'friendlyName': title,
- 'labels': {'x': 'y'},
- }
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED')
- ]
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(resource, resource)
- table = Table(self.TABLE_REF, schema=schema)
- table.description = description
- table.friendly_name = title
- table.labels = {'x': 'y'}
-
- updated_table = client.update_table(
- table, ['schema', 'description', 'friendly_name', 'labels'])
-
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]},
- 'description': description,
- 'friendlyName': title,
- 'labels': {'x': 'y'},
- }
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'PATCH')
- self.assertEqual(req['data'], sent)
- self.assertEqual(req['path'], '/' + path)
- self.assertIsNone(req['headers'])
- self.assertEqual(updated_table.description, table.description)
- self.assertEqual(updated_table.friendly_name, table.friendly_name)
- self.assertEqual(updated_table.schema, table.schema)
- self.assertEqual(updated_table.labels, table.labels)
-
- # ETag becomes If-Match header.
- table._properties['etag'] = 'etag'
- client.update_table(table, [])
- req = conn._requested[1]
- self.assertEqual(req['headers']['If-Match'], 'etag')
-
- def test_update_table_only_use_legacy_sql(self):
- from google.cloud.bigquery.table import Table
-
- path = 'projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'view': {'useLegacySql': True}
- }
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(resource)
- table = Table(self.TABLE_REF)
- table.view_use_legacy_sql = True
-
- updated_table = client.update_table(table, ['view_use_legacy_sql'])
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'PATCH')
- self.assertEqual(req['path'], '/%s' % path)
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'view': {'useLegacySql': True}
- }
- self.assertEqual(req['data'], sent)
- self.assertEqual(
- updated_table.view_use_legacy_sql, table.view_use_legacy_sql)
-
- def test_update_table_w_query(self):
- import datetime
- from google.cloud._helpers import UTC
- from google.cloud._helpers import _millis
- from google.cloud.bigquery.table import Table, SchemaField
-
- path = 'projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- query = 'select fullname, age from person_ages'
- location = 'EU'
- exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC)
- schema_resource = {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED')
- ]
- resource = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'schema': schema_resource,
- 'view': {'query': query, 'useLegacySql': True},
- 'location': location,
- 'expirationTime': _millis(exp_time)
- }
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(resource)
- table = Table(self.TABLE_REF, schema=schema)
- table.location = location
- table.expires = exp_time
- table.view_query = query
- table.view_use_legacy_sql = True
- updated_properties = ['schema', 'view_query', 'location',
- 'expires', 'view_use_legacy_sql']
-
- updated_table = client.update_table(table, updated_properties)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'PATCH')
- self.assertEqual(req['path'], '/%s' % path)
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'view': {'query': query, 'useLegacySql': True},
- 'location': location,
- 'expirationTime': _millis(exp_time),
- 'schema': schema_resource,
- }
- self.assertEqual(req['data'], sent)
- self.assertEqual(updated_table.schema, table.schema)
- self.assertEqual(updated_table.view_query, table.view_query)
- self.assertEqual(updated_table.location, table.location)
- self.assertEqual(updated_table.expires, table.expires)
- self.assertEqual(
- updated_table.view_use_legacy_sql, table.view_use_legacy_sql)
-
- def test_update_table_w_schema_None(self):
- # Simulate deleting schema: not sure if back-end will actually
- # allow this operation, but the spec says it is optional.
- path = 'projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- resource1 = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID},
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}
- }
- resource2 = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID},
- 'schema': {'fields': []},
- }
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(resource1, resource2)
- table = client.get_table(self.TABLE_REF)
- table.schema = None
-
- updated_table = client.update_table(table, ['schema'])
-
- self.assertEqual(len(conn._requested), 2)
- req = conn._requested[1]
- self.assertEqual(req['method'], 'PATCH')
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'schema': None
- }
- self.assertEqual(req['data'], sent)
- self.assertEqual(req['path'], '/%s' % path)
- self.assertEqual(updated_table.schema, table.schema)
-
- def test_update_table_delete_property(self):
- from google.cloud.bigquery.table import Table
-
- description = 'description'
- title = 'title'
- path = 'projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- resource1 = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'description': description,
- 'friendlyName': title,
- }
- resource2 = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'description': None,
- }
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(resource1, resource2)
- table = Table(self.TABLE_REF)
- table.description = description
- table.friendly_name = title
- table2 = client.update_table(table, ['description', 'friendly_name'])
- self.assertEqual(table2.description, table.description)
- table2.description = None
-
- table3 = client.update_table(table2, ['description'])
- self.assertEqual(len(conn._requested), 2)
- req = conn._requested[1]
- self.assertEqual(req['method'], 'PATCH')
- self.assertEqual(req['path'], '/%s' % path)
- sent = {
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID
- },
- 'description': None,
- }
- self.assertEqual(req['data'], sent)
- self.assertIsNone(table3.description)
-
- def test_list_dataset_tables_empty(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection({})
-
- dataset = client.dataset(self.DS_ID)
- iterator = client.list_dataset_tables(dataset)
- self.assertIs(iterator.dataset, dataset)
- page = six.next(iterator.pages)
- tables = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(tables, [])
- self.assertIsNone(token)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID)
- self.assertEqual(req['path'], '/%s' % PATH)
-
- def test_list_dataset_tables_defaults(self):
- from google.cloud.bigquery.table import Table
-
- TABLE_1 = 'table_one'
- TABLE_2 = 'table_two'
- PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID)
- TOKEN = 'TOKEN'
- DATA = {
- 'nextPageToken': TOKEN,
- 'tables': [
- {'kind': 'bigquery#table',
- 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1),
- 'tableReference': {'tableId': TABLE_1,
- 'datasetId': self.DS_ID,
- 'projectId': self.PROJECT},
- 'type': 'TABLE'},
- {'kind': 'bigquery#table',
- 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2),
- 'tableReference': {'tableId': TABLE_2,
- 'datasetId': self.DS_ID,
- 'projectId': self.PROJECT},
- 'type': 'TABLE'},
- ]
- }
-
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(DATA)
- dataset = client.dataset(self.DS_ID)
-
- iterator = client.list_dataset_tables(dataset)
- self.assertIs(iterator.dataset, dataset)
- page = six.next(iterator.pages)
- tables = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(tables), len(DATA['tables']))
- for found, expected in zip(tables, DATA['tables']):
- self.assertIsInstance(found, Table)
- self.assertEqual(found.full_table_id, expected['id'])
- self.assertEqual(found.table_type, expected['type'])
- self.assertEqual(token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
-
- def test_list_dataset_tables_explicit(self):
- from google.cloud.bigquery.table import Table
-
- TABLE_1 = 'table_one'
- TABLE_2 = 'table_two'
- PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID)
- TOKEN = 'TOKEN'
- DATA = {
- 'tables': [
- {'kind': 'bigquery#dataset',
- 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1),
- 'tableReference': {'tableId': TABLE_1,
- 'datasetId': self.DS_ID,
- 'projectId': self.PROJECT},
- 'type': 'TABLE'},
- {'kind': 'bigquery#dataset',
- 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2),
- 'tableReference': {'tableId': TABLE_2,
- 'datasetId': self.DS_ID,
- 'projectId': self.PROJECT},
- 'type': 'TABLE'},
- ]
- }
-
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection(DATA)
- dataset = client.dataset(self.DS_ID)
-
- iterator = client.list_dataset_tables(
- dataset, max_results=3, page_token=TOKEN)
- self.assertIs(iterator.dataset, dataset)
- page = six.next(iterator.pages)
- tables = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(tables), len(DATA['tables']))
- for found, expected in zip(tables, DATA['tables']):
- self.assertIsInstance(found, Table)
- self.assertEqual(found.full_table_id, expected['id'])
- self.assertEqual(found.table_type, expected['type'])
- self.assertIsNone(token)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['query_params'],
- {'maxResults': 3, 'pageToken': TOKEN})
-
- def test_list_dataset_tables_wrong_type(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(TypeError):
- client.list_dataset_tables(client.dataset(self.DS_ID).table("foo"))
-
- def test_delete_dataset(self):
- from google.cloud.bigquery.dataset import Dataset
-
- PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = _Connection({}, {})
- ds_ref = client.dataset(self.DS_ID)
- for arg in (ds_ref, Dataset(ds_ref)):
- client.delete_dataset(arg)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'DELETE')
- self.assertEqual(req['path'], '/%s' % PATH)
-
- def test_delete_dataset_wrong_type(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(TypeError):
- client.delete_dataset(client.dataset(self.DS_ID).table("foo"))
-
- def test_delete_table(self):
- from google.cloud.bigquery.table import Table
-
- path = 'projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection({}, {})
-
- for arg in (self.TABLE_REF, Table(self.TABLE_REF)):
- client.delete_table(arg)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'DELETE')
- self.assertEqual(req['path'], '/%s' % path)
-
- def test_delete_table_w_wrong_type(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(TypeError):
- client.delete_table(client.dataset(self.DS_ID))
-
- def test_job_from_resource_unknown_type(self):
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- with self.assertRaises(ValueError):
- client.job_from_resource({'configuration': {'nonesuch': {}}})
-
- def test_get_job_miss_w_explict_project(self):
- from google.cloud.exceptions import NotFound
-
- OTHER_PROJECT = 'OTHER_PROJECT'
- JOB_ID = 'NONESUCH'
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection()
-
- with self.assertRaises(NotFound):
- client.get_job(JOB_ID, project=OTHER_PROJECT)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH')
- self.assertEqual(req['query_params'], {'projection': 'full'})
-
- def test_get_job_hit(self):
- from google.cloud.bigquery.job import QueryJob
-
- JOB_ID = 'query_job'
- QUERY_DESTINATION_TABLE = 'query_destination_table'
- QUERY = 'SELECT * from test_dataset:test_table'
- ASYNC_QUERY_DATA = {
- 'id': '{}:{}'.format(self.PROJECT, JOB_ID),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'query_job',
- },
- 'state': 'DONE',
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': QUERY_DESTINATION_TABLE,
- },
- 'createDisposition': 'CREATE_IF_NEEDED',
- 'writeDisposition': 'WRITE_TRUNCATE',
- }
- },
- }
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection(ASYNC_QUERY_DATA)
-
- job = client.get_job(JOB_ID)
-
- self.assertIsInstance(job, QueryJob)
- self.assertEqual(job.job_id, JOB_ID)
- self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED')
- self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE')
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs/query_job')
- self.assertEqual(req['query_params'], {'projection': 'full'})
-
- def test_list_jobs_defaults(self):
- from google.cloud.bigquery.job import LoadJob
- from google.cloud.bigquery.job import CopyJob
- from google.cloud.bigquery.job import ExtractJob
- from google.cloud.bigquery.job import QueryJob
-
- SOURCE_TABLE = 'source_table'
- DESTINATION_TABLE = 'destination_table'
- QUERY_DESTINATION_TABLE = 'query_destination_table'
- SOURCE_URI = 'gs://test_bucket/src_object*'
- DESTINATION_URI = 'gs://test_bucket/dst_object*'
- JOB_TYPES = {
- 'load_job': LoadJob,
- 'copy_job': CopyJob,
- 'extract_job': ExtractJob,
- 'query_job': QueryJob,
- }
- PATH = 'projects/%s/jobs' % self.PROJECT
- TOKEN = 'TOKEN'
- QUERY = 'SELECT * from test_dataset:test_table'
- ASYNC_QUERY_DATA = {
- 'id': '%s:%s' % (self.PROJECT, 'query_job'),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'query_job',
- },
- 'state': 'DONE',
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': QUERY_DESTINATION_TABLE,
- },
- 'createDisposition': 'CREATE_IF_NEEDED',
- 'writeDisposition': 'WRITE_TRUNCATE',
- }
- },
- }
- EXTRACT_DATA = {
- 'id': '%s:%s' % (self.PROJECT, 'extract_job'),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'extract_job',
- },
- 'state': 'DONE',
- 'configuration': {
- 'extract': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE_TABLE,
- },
- 'destinationUris': [DESTINATION_URI],
- }
- },
- }
- COPY_DATA = {
- 'id': '%s:%s' % (self.PROJECT, 'copy_job'),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'copy_job',
- },
- 'state': 'DONE',
- 'configuration': {
- 'copy': {
- 'sourceTables': [{
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE_TABLE,
- }],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': DESTINATION_TABLE,
- },
- }
- },
- }
- LOAD_DATA = {
- 'id': '%s:%s' % (self.PROJECT, 'load_job'),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'load_job',
- },
- 'state': 'DONE',
- 'configuration': {
- 'load': {
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE_TABLE,
- },
- 'sourceUris': [SOURCE_URI],
- }
- },
- }
- DATA = {
- 'nextPageToken': TOKEN,
- 'jobs': [
- ASYNC_QUERY_DATA,
- EXTRACT_DATA,
- COPY_DATA,
- LOAD_DATA,
- ]
- }
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection(DATA)
-
- iterator = client.list_jobs()
- page = six.next(iterator.pages)
- jobs = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(jobs), len(DATA['jobs']))
- for found, expected in zip(jobs, DATA['jobs']):
- name = expected['jobReference']['jobId']
- self.assertIsInstance(found, JOB_TYPES[name])
- self.assertEqual(found.job_id, name)
- self.assertEqual(token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['query_params'], {'projection': 'full'})
-
- def test_list_jobs_load_job_wo_sourceUris(self):
- from google.cloud.bigquery.job import LoadJob
-
- SOURCE_TABLE = 'source_table'
- JOB_TYPES = {
- 'load_job': LoadJob,
- }
- PATH = 'projects/%s/jobs' % self.PROJECT
- TOKEN = 'TOKEN'
- LOAD_DATA = {
- 'id': '%s:%s' % (self.PROJECT, 'load_job'),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'load_job',
- },
- 'state': 'DONE',
- 'configuration': {
- 'load': {
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE_TABLE,
- },
- }
- },
- }
- DATA = {
- 'nextPageToken': TOKEN,
- 'jobs': [
- LOAD_DATA,
- ]
- }
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection(DATA)
-
- iterator = client.list_jobs()
- page = six.next(iterator.pages)
- jobs = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(jobs), len(DATA['jobs']))
- for found, expected in zip(jobs, DATA['jobs']):
- name = expected['jobReference']['jobId']
- self.assertIsInstance(found, JOB_TYPES[name])
- self.assertEqual(found.job_id, name)
- self.assertEqual(token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['query_params'], {'projection': 'full'})
-
- def test_list_jobs_explicit_missing(self):
- PATH = 'projects/%s/jobs' % self.PROJECT
- DATA = {}
- TOKEN = 'TOKEN'
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = _Connection(DATA)
-
- iterator = client.list_jobs(max_results=1000, page_token=TOKEN,
- all_users=True, state_filter='done')
- page = six.next(iterator.pages)
- jobs = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(jobs), 0)
- self.assertIsNone(token)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['query_params'],
- {'projection': 'full',
- 'maxResults': 1000,
- 'pageToken': TOKEN,
- 'allUsers': True,
- 'stateFilter': 'done'})
-
- def test_load_table_from_uri(self):
- from google.cloud.bigquery.job import LoadJob
-
- JOB = 'job_name'
- DESTINATION = 'destination_table'
- SOURCE_URI = 'http://example.com/source.csv'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'load': {
- 'sourceUris': [SOURCE_URI],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': DESTINATION,
- },
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
- destination = client.dataset(self.DS_ID).table(DESTINATION)
-
- job = client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB)
-
- # Check that load_table_from_uri actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT)
-
- self.assertIsInstance(job, LoadJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(list(job.source_uris), [SOURCE_URI])
- self.assertIs(job.destination, destination)
-
- conn = client._connection = _Connection(RESOURCE)
-
- job = client.load_table_from_uri([SOURCE_URI], destination, job_id=JOB)
- self.assertIsInstance(job, LoadJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(list(job.source_uris), [SOURCE_URI])
- self.assertIs(job.destination, destination)
-
- @staticmethod
- def _mock_requests_response(status_code, headers, content=b''):
- return mock.Mock(
- content=content, headers=headers, status_code=status_code,
- spec=['content', 'headers', 'status_code'])
-
- def _mock_transport(self, status_code, headers, content=b''):
- fake_transport = mock.Mock(spec=['request'])
- fake_response = self._mock_requests_response(
- status_code, headers, content=content)
- fake_transport.request.return_value = fake_response
- return fake_transport
-
- def _initiate_resumable_upload_helper(self, num_retries=None):
- from google.resumable_media.requests import ResumableUpload
- from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE
- from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE
- from google.cloud.bigquery.client import _get_upload_headers
- from google.cloud.bigquery.job import LoadJob, LoadJobConfig
-
- # Create mocks to be checked for doing transport.
- resumable_url = 'http://test.invalid?upload_id=hey-you'
- response_headers = {'location': resumable_url}
- fake_transport = self._mock_transport(
- http_client.OK, response_headers)
- client = self._make_one(project=self.PROJECT, _http=fake_transport)
- conn = client._connection = _Connection()
-
- # Create some mock arguments and call the method under test.
- data = b'goodbye gudbi gootbee'
- stream = io.BytesIO(data)
- config = LoadJobConfig()
- config.source_format = 'CSV'
- job = LoadJob(None, None, self.TABLE_REF, client, job_config=config)
- metadata = job._build_resource()
- upload, transport = client._initiate_resumable_upload(
- stream, metadata, num_retries)
-
- # Check the returned values.
- self.assertIsInstance(upload, ResumableUpload)
- upload_url = (
- 'https://www.googleapis.com/upload/bigquery/v2/projects/' +
- self.PROJECT +
- '/jobs?uploadType=resumable')
- self.assertEqual(upload.upload_url, upload_url)
- expected_headers = _get_upload_headers(conn.USER_AGENT)
- self.assertEqual(upload._headers, expected_headers)
- self.assertFalse(upload.finished)
- self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE)
- self.assertIs(upload._stream, stream)
- self.assertIsNone(upload._total_bytes)
- self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE)
- self.assertEqual(upload.resumable_url, resumable_url)
-
- retry_strategy = upload._retry_strategy
- self.assertEqual(retry_strategy.max_sleep, 64.0)
- if num_retries is None:
- self.assertEqual(retry_strategy.max_cumulative_retry, 600.0)
- self.assertIsNone(retry_strategy.max_retries)
- else:
- self.assertIsNone(retry_strategy.max_cumulative_retry)
- self.assertEqual(retry_strategy.max_retries, num_retries)
- self.assertIs(transport, fake_transport)
- # Make sure we never read from the stream.
- self.assertEqual(stream.tell(), 0)
-
- # Check the mocks.
- request_headers = expected_headers.copy()
- request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE
- fake_transport.request.assert_called_once_with(
- 'POST',
- upload_url,
- data=json.dumps(metadata).encode('utf-8'),
- headers=request_headers,
- )
-
- def test__initiate_resumable_upload(self):
- self._initiate_resumable_upload_helper()
-
- def test__initiate_resumable_upload_with_retry(self):
- self._initiate_resumable_upload_helper(num_retries=11)
-
- def _do_multipart_upload_success_helper(
- self, get_boundary, num_retries=None):
- from google.cloud.bigquery.client import _get_upload_headers
- from google.cloud.bigquery.job import LoadJob, LoadJobConfig
-
- fake_transport = self._mock_transport(http_client.OK, {})
- client = self._make_one(project=self.PROJECT, _http=fake_transport)
- conn = client._connection = _Connection()
-
- # Create some mock arguments.
- data = b'Bzzzz-zap \x00\x01\xf4'
- stream = io.BytesIO(data)
- config = LoadJobConfig()
- config.source_format = 'CSV'
- job = LoadJob(None, None, self.TABLE_REF, client, job_config=config)
- metadata = job._build_resource()
- size = len(data)
- response = client._do_multipart_upload(
- stream, metadata, size, num_retries)
-
- # Check the mocks and the returned value.
- self.assertIs(response, fake_transport.request.return_value)
- self.assertEqual(stream.tell(), size)
- get_boundary.assert_called_once_with()
-
- upload_url = (
- 'https://www.googleapis.com/upload/bigquery/v2/projects/' +
- self.PROJECT +
- '/jobs?uploadType=multipart')
- payload = (
- b'--==0==\r\n' +
- b'content-type: application/json; charset=UTF-8\r\n\r\n' +
- json.dumps(metadata).encode('utf-8') + b'\r\n' +
- b'--==0==\r\n' +
- b'content-type: */*\r\n\r\n' +
- data + b'\r\n' +
- b'--==0==--')
- headers = _get_upload_headers(conn.USER_AGENT)
- headers['content-type'] = b'multipart/related; boundary="==0=="'
- fake_transport.request.assert_called_once_with(
- 'POST',
- upload_url,
- data=payload,
- headers=headers,
- )
-
- @mock.patch(u'google.resumable_media._upload.get_boundary',
- return_value=b'==0==')
- def test__do_multipart_upload(self, get_boundary):
- self._do_multipart_upload_success_helper(get_boundary)
-
- @mock.patch(u'google.resumable_media._upload.get_boundary',
- return_value=b'==0==')
- def test__do_multipart_upload_with_retry(self, get_boundary):
- self._do_multipart_upload_success_helper(get_boundary, num_retries=8)
-
- def test_copy_table(self):
- from google.cloud.bigquery.job import CopyJob
-
- JOB = 'job_name'
- SOURCE = 'source_table'
- DESTINATION = 'destination_table'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'copy': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE,
- },
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': DESTINATION,
- },
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
- dataset = client.dataset(self.DS_ID)
- source = dataset.table(SOURCE)
- destination = dataset.table(DESTINATION)
-
- job = client.copy_table(source, destination, job_id=JOB)
-
- # Check that copy_table actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT)
-
- self.assertIsInstance(job, CopyJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(list(job.sources), [source])
- self.assertIs(job.destination, destination)
-
- conn = client._connection = _Connection(RESOURCE)
- source2 = dataset.table(SOURCE + '2')
- job = client.copy_table([source, source2], destination, job_id=JOB)
- self.assertIsInstance(job, CopyJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(list(job.sources), [source, source2])
- self.assertIs(job.destination, destination)
-
- def test_extract_table(self):
- from google.cloud.bigquery.job import ExtractJob
-
- JOB = 'job_id'
- SOURCE = 'source_table'
- DESTINATION = 'gs://bucket_name/object_name'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'extract': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE,
- },
- 'destinationUris': [DESTINATION],
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
- dataset = client.dataset(self.DS_ID)
- source = dataset.table(SOURCE)
-
- job = client.extract_table(source, DESTINATION, job_id=JOB)
-
- # Check that extract_table actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
-
- # Check the job resource.
- self.assertIsInstance(job, ExtractJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(job.source, source)
- self.assertEqual(list(job.destination_uris), [DESTINATION])
-
- def test_extract_table_generated_job_id(self):
- from google.cloud.bigquery.job import ExtractJob
- from google.cloud.bigquery.job import ExtractJobConfig
- from google.cloud.bigquery.job import DestinationFormat
-
- JOB = 'job_id'
- SOURCE = 'source_table'
- DESTINATION = 'gs://bucket_name/object_name'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'extract': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE,
- },
- 'destinationUris': [DESTINATION],
- 'destinationFormat': 'NEWLINE_DELIMITED_JSON',
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
- dataset = client.dataset(self.DS_ID)
- source = dataset.table(SOURCE)
- job_config = ExtractJobConfig()
- job_config.destination_format = (
- DestinationFormat.NEWLINE_DELIMITED_JSON)
-
- job = client.extract_table(source, DESTINATION, job_config=job_config)
-
- # Check that extract_table actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
- self.assertIsInstance(
- req['data']['jobReference']['jobId'], six.string_types)
-
- # Check the job resource.
- self.assertIsInstance(job, ExtractJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.source, source)
- self.assertEqual(list(job.destination_uris), [DESTINATION])
-
- def test_extract_table_w_destination_uris(self):
- from google.cloud.bigquery.job import ExtractJob
-
- JOB = 'job_id'
- SOURCE = 'source_table'
- DESTINATION1 = 'gs://bucket_name/object_one'
- DESTINATION2 = 'gs://bucket_name/object_two'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'extract': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': SOURCE,
- },
- 'destinationUris': [
- DESTINATION1,
- DESTINATION2,
- ],
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
- dataset = client.dataset(self.DS_ID)
- source = dataset.table(SOURCE)
-
- job = client.extract_table(
- source, [DESTINATION1, DESTINATION2], job_id=JOB)
-
- # Check that extract_table actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
-
- # Check the job resource.
- self.assertIsInstance(job, ExtractJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(job.source, source)
- self.assertEqual(
- list(job.destination_uris), [DESTINATION1, DESTINATION2])
-
- def test_query_defaults(self):
- from google.cloud.bigquery.job import QueryJob
-
- QUERY = 'select count(*) from persons'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'some-random-id',
- },
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'useLegacySql': False,
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
-
- job = client.query(QUERY)
-
- self.assertIsInstance(job, QueryJob)
- self.assertIsInstance(job.job_id, six.string_types)
- self.assertIs(job._client, client)
- self.assertEqual(job.query, QUERY)
- self.assertEqual(job.udf_resources, [])
- self.assertEqual(job.query_parameters, [])
-
- # Check that query actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
- sent = req['data']
- self.assertIsInstance(
- sent['jobReference']['jobId'], six.string_types)
- sent_config = sent['configuration']['query']
- self.assertEqual(sent_config['query'], QUERY)
- self.assertFalse(sent_config['useLegacySql'])
-
- def test_query_w_udf_resources(self):
- from google.cloud.bigquery.job import QueryJob
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import UDFResource
-
- RESOURCE_URI = 'gs://some-bucket/js/lib.js'
- JOB = 'job_name'
- QUERY = 'select count(*) from persons'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'useLegacySql': True,
- 'userDefinedFunctionResources': [
- {'resourceUri': RESOURCE_URI},
- ],
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
- udf_resources = [UDFResource("resourceUri", RESOURCE_URI)]
- config = QueryJobConfig()
- config.udf_resources = udf_resources
- config.use_legacy_sql = True
-
- job = client.query(QUERY, job_config=config, job_id=JOB)
-
- self.assertIsInstance(job, QueryJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(job.query, QUERY)
- self.assertEqual(job.udf_resources, udf_resources)
- self.assertEqual(job.query_parameters, [])
-
- # Check that query actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
- sent = req['data']
- self.assertIsInstance(
- sent['jobReference']['jobId'], six.string_types)
- sent_config = sent['configuration']['query']
- self.assertEqual(sent_config['query'], QUERY)
- self.assertTrue(sent_config['useLegacySql'])
- self.assertEqual(
- sent_config['userDefinedFunctionResources'][0],
- {'resourceUri': RESOURCE_URI})
-
- def test_query_w_query_parameters(self):
- from google.cloud.bigquery.job import QueryJob
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- JOB = 'job_name'
- QUERY = 'select count(*) from persons'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'useLegacySql': False,
- 'queryParameters': [
- {
- 'name': 'foo',
- 'parameterType': {'type': 'INT64'},
- 'parameterValue': {'value': '123'}
- },
- ],
- },
- },
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESOURCE)
- query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)]
- config = QueryJobConfig()
- config.query_parameters = query_parameters
-
- job = client.query(QUERY, job_config=config, job_id=JOB)
-
- self.assertIsInstance(job, QueryJob)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_id, JOB)
- self.assertEqual(job.query, QUERY)
- self.assertEqual(job.udf_resources, [])
- self.assertEqual(job.query_parameters, query_parameters)
-
- # Check that query actually starts the job.
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
- sent = req['data']
- self.assertEqual(sent['jobReference']['jobId'], JOB)
- sent_config = sent['configuration']['query']
- self.assertEqual(sent_config['query'], QUERY)
- self.assertFalse(sent_config['useLegacySql'])
- self.assertEqual(
- sent_config['queryParameters'][0],
- {
- 'name': 'foo',
- 'parameterType': {'type': 'INT64'},
- 'parameterValue': {'value': '123'}
- })
-
- def test_create_rows_wo_schema(self):
- from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
-
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- table = Table(self.TABLE_REF)
- ROWS = [
- ('Phred Phlyntstone', 32),
- ('Bharney Rhubble', 33),
- ('Wylma Phlyntstone', 29),
- ('Bhettye Rhubble', 27),
- ]
-
- with self.assertRaises(ValueError) as exc:
- client.create_rows(table, ROWS)
-
- self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,))
-
- def test_create_rows_w_schema(self):
- import datetime
- from google.cloud._helpers import UTC
- from google.cloud._helpers import _datetime_to_rfc3339
- from google.cloud._helpers import _microseconds_from_datetime
- from google.cloud.bigquery.table import Table, SchemaField
-
- WHEN_TS = 1437767599.006
- WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(
- tzinfo=UTC)
- PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection({})
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED'),
- SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'),
- ]
- table = Table(self.TABLE_REF, schema=schema)
- ROWS = [
- ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)),
- ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)),
- ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)),
- ('Bhettye Rhubble', 27, None),
- ]
-
- def _row_data(row):
- joined = row[2]
- if isinstance(row[2], datetime.datetime):
- joined = _microseconds_from_datetime(joined) * 1e-6
- return {'full_name': row[0],
- 'age': str(row[1]),
- 'joined': joined}
-
- SENT = {
- 'rows': [{
- 'json': _row_data(row),
- 'insertId': str(i),
- } for i, row in enumerate(ROWS)],
- }
-
- with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
- errors = client.create_rows(table, ROWS)
-
- self.assertEqual(len(errors), 0)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['data'], SENT)
-
- def test_create_rows_w_list_of_dictionaries(self):
- import datetime
- from google.cloud._helpers import UTC
- from google.cloud._helpers import _datetime_to_rfc3339
- from google.cloud._helpers import _microseconds_from_datetime
- from google.cloud.bigquery.table import Table, SchemaField
-
- WHEN_TS = 1437767599.006
- WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(
- tzinfo=UTC)
- PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection({})
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED'),
- SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'),
- ]
- table = Table(self.TABLE_REF, schema=schema)
- ROWS = [
- {
- 'full_name': 'Phred Phlyntstone', 'age': 32,
- 'joined': _datetime_to_rfc3339(WHEN)
- },
- {
- 'full_name': 'Bharney Rhubble', 'age': 33,
- 'joined': WHEN + datetime.timedelta(seconds=1)
- },
- {
- 'full_name': 'Wylma Phlyntstone', 'age': 29,
- 'joined': WHEN + datetime.timedelta(seconds=2)
- },
- {
- 'full_name': 'Bhettye Rhubble', 'age': 27, 'joined': None
- },
- ]
-
- def _row_data(row):
- joined = row['joined']
- if isinstance(joined, datetime.datetime):
- row['joined'] = _microseconds_from_datetime(joined) * 1e-6
- row['age'] = str(row['age'])
- return row
-
- SENT = {
- 'rows': [{
- 'json': _row_data(row),
- 'insertId': str(i),
- } for i, row in enumerate(ROWS)],
- }
-
- with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
- errors = client.create_rows(table, ROWS)
-
- self.assertEqual(len(errors), 0)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['data'], SENT)
-
- def test_create_rows_w_list_of_Rows(self):
- from google.cloud.bigquery._helpers import Row
- from google.cloud.bigquery.table import Table, SchemaField
-
- PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection({})
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED'),
- ]
- table = Table(self.TABLE_REF, schema=schema)
- f2i = {'full_name': 0, 'age': 1}
- ROWS = [
- Row(('Phred Phlyntstone', 32), f2i),
- Row(('Bharney Rhubble', 33), f2i),
- Row(('Wylma Phlyntstone', 29), f2i),
- Row(('Bhettye Rhubble', 27), f2i),
- ]
-
- def _row_data(row):
- return {'full_name': row[0], 'age': str(row[1])}
-
- SENT = {
- 'rows': [{
- 'json': _row_data(row),
- 'insertId': str(i),
- } for i, row in enumerate(ROWS)],
- }
-
- with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
- errors = client.create_rows(table, ROWS)
-
- self.assertEqual(len(errors), 0)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['data'], SENT)
-
- def test_create_rows_w_skip_invalid_and_ignore_unknown(self):
- from google.cloud.bigquery.table import Table, SchemaField
-
- PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- RESPONSE = {
- 'insertErrors': [
- {'index': 1,
- 'errors': [
- {'reason': 'REASON',
- 'location': 'LOCATION',
- 'debugInfo': 'INFO',
- 'message': 'MESSAGE'}
- ]},
- ]}
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(RESPONSE)
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED'),
- SchemaField('voter', 'BOOLEAN', mode='NULLABLE'),
- ]
- table = Table(self.TABLE_REF, schema=schema)
- ROWS = [
- ('Phred Phlyntstone', 32, True),
- ('Bharney Rhubble', 33, False),
- ('Wylma Phlyntstone', 29, True),
- ('Bhettye Rhubble', 27, True),
- ]
-
- def _row_data(row):
- return {
- 'full_name': row[0],
- 'age': str(row[1]),
- 'voter': row[2] and 'true' or 'false',
- }
-
- SENT = {
- 'skipInvalidRows': True,
- 'ignoreUnknownValues': True,
- 'templateSuffix': '20160303',
- 'rows': [{'insertId': index, 'json': _row_data(row)}
- for index, row in enumerate(ROWS)],
- }
-
- errors = client.create_rows(
- table,
- ROWS,
- row_ids=[index for index, _ in enumerate(ROWS)],
- skip_invalid_rows=True,
- ignore_unknown_values=True,
- template_suffix='20160303',
- )
-
- self.assertEqual(len(errors), 1)
- self.assertEqual(errors[0]['index'], 1)
- self.assertEqual(len(errors[0]['errors']), 1)
- self.assertEqual(errors[0]['errors'][0],
- RESPONSE['insertErrors'][0]['errors'][0])
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['data'], SENT)
-
- def test_create_rows_w_repeated_fields(self):
- from google.cloud.bigquery.table import Table, SchemaField
-
- PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection({})
- full_name = SchemaField('color', 'STRING', mode='REPEATED')
- index = SchemaField('index', 'INTEGER', 'REPEATED')
- score = SchemaField('score', 'FLOAT', 'REPEATED')
- struct = SchemaField('struct', 'RECORD', mode='REPEATED',
- fields=[index, score])
- table = Table(self.TABLE_REF, schema=[full_name, struct])
- ROWS = [
- (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]),
- ]
-
- def _row_data(row):
- return {'color': row[0],
- 'struct': row[1]}
-
- SENT = {
- 'rows': [{
- 'json': _row_data(row),
- 'insertId': str(i),
- } for i, row in enumerate(ROWS)],
- }
-
- with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
- errors = client.create_rows(table, ROWS)
-
- self.assertEqual(len(errors), 0)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['data'], SENT)
-
- def test_create_rows_w_record_schema(self):
- from google.cloud.bigquery.table import SchemaField
-
- PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection({})
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- area_code = SchemaField('area_code', 'STRING', 'REQUIRED')
- local_number = SchemaField('local_number', 'STRING', 'REQUIRED')
- rank = SchemaField('rank', 'INTEGER', 'REQUIRED')
- phone = SchemaField('phone', 'RECORD', mode='NULLABLE',
- fields=[area_code, local_number, rank])
- ROWS = [
- ('Phred Phlyntstone', {'area_code': '800',
- 'local_number': '555-1212',
- 'rank': 1}),
- ('Bharney Rhubble', {'area_code': '877',
- 'local_number': '768-5309',
- 'rank': 2}),
- ('Wylma Phlyntstone', None),
- ]
-
- def _row_data(row):
- return {'full_name': row[0],
- 'phone': row[1]}
-
- SENT = {
- 'rows': [{
- 'json': _row_data(row),
- 'insertId': str(i),
- } for i, row in enumerate(ROWS)],
- }
-
- with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
- errors = client.create_rows(self.TABLE_REF, ROWS,
- selected_fields=[full_name, phone])
-
- self.assertEqual(len(errors), 0)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['data'], SENT)
-
- def test_create_rows_errors(self):
- from google.cloud.bigquery.table import Table
-
- ROWS = [
- ('Phred Phlyntstone', 32, True),
- ('Bharney Rhubble', 33, False),
- ('Wylma Phlyntstone', 29, True),
- ('Bhettye Rhubble', 27, True),
- ]
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
-
- # table ref with no selected fields
- with self.assertRaises(ValueError):
- client.create_rows(self.TABLE_REF, ROWS)
-
- # table with no schema
- with self.assertRaises(ValueError):
- client.create_rows(Table(self.TABLE_REF), ROWS)
-
- # neither Table nor tableReference
- with self.assertRaises(TypeError):
- client.create_rows(1, ROWS)
-
- def test_create_rows_json(self):
- from google.cloud.bigquery.table import Table, SchemaField
- from google.cloud.bigquery.dataset import DatasetReference
-
- PROJECT = 'PROJECT'
- DS_ID = 'DS_ID'
- TABLE_ID = 'TABLE_ID'
- PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
- PROJECT, DS_ID, TABLE_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=PROJECT, credentials=creds, _http=http)
- conn = client._connection = _Connection({})
- table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID)
- schema = [
- SchemaField('full_name', 'STRING', mode='REQUIRED'),
- SchemaField('age', 'INTEGER', mode='REQUIRED'),
- SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'),
- ]
- table = Table(table_ref, schema=schema)
- ROWS = [
- {
- 'full_name': 'Phred Phlyntstone', 'age': '32',
- 'joined': '2015-07-24T19:53:19.006000Z'
- },
- {
- 'full_name': 'Bharney Rhubble', 'age': '33',
- 'joined': 1437767600.006
- },
- {
- 'full_name': 'Wylma Phlyntstone', 'age': '29',
- 'joined': 1437767601.006
- },
- {
- 'full_name': 'Bhettye Rhubble', 'age': '27', 'joined': None
- },
- ]
-
- SENT = {
- 'rows': [{
- 'json': row,
- 'insertId': str(i),
- } for i, row in enumerate(ROWS)],
- }
-
- with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
- errors = client.create_rows_json(table, ROWS)
-
- self.assertEqual(len(errors), 0)
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['data'], SENT)
-
- def test_query_rows_defaults(self):
- from google.api_core.page_iterator import HTTPIterator
- from google.cloud.bigquery._helpers import Row
-
- JOB = 'job-id'
- QUERY = 'SELECT COUNT(*) FROM persons'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': '_temp_dataset',
- 'tableId': '_temp_table',
- },
- },
- },
- 'status': {
- 'state': 'DONE',
- },
- }
- RESULTS_RESOURCE = {
- 'jobReference': RESOURCE['jobReference'],
- 'jobComplete': True,
- 'schema': {
- 'fields': [
- {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'},
- ]
- },
- 'totalRows': '3',
- 'pageToken': 'next-page',
- }
- FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE)
- FIRST_PAGE['rows'] = [
- {'f': [{'v': '1'}]},
- {'f': [{'v': '2'}]},
- ]
- LAST_PAGE = copy.deepcopy(RESULTS_RESOURCE)
- LAST_PAGE['rows'] = [
- {'f': [{'v': '3'}]},
- ]
- del LAST_PAGE['pageToken']
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(
- RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE)
-
- rows_iter = client.query_rows(QUERY)
- rows = list(rows_iter)
-
- self.assertEqual(rows, [Row((i,), {'field0': 0}) for i in (1, 2, 3)])
- self.assertIs(rows_iter.client, client)
- self.assertIsInstance(rows_iter, HTTPIterator)
- self.assertEqual(len(conn._requested), 4)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
- self.assertIsInstance(
- req['data']['jobReference']['jobId'], six.string_types)
-
- def test_query_rows_w_job_id(self):
- from google.api_core.page_iterator import HTTPIterator
-
- JOB = 'job-id'
- QUERY = 'SELECT COUNT(*) FROM persons'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': '_temp_dataset',
- 'tableId': '_temp_table',
- },
- },
- },
- 'status': {
- 'state': 'DONE',
- },
- }
- RESULTS_RESOURCE = {
- 'jobReference': RESOURCE['jobReference'],
- 'jobComplete': True,
- 'schema': {
- 'fields': [
- {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'},
- ]
- },
- 'totalRows': '0',
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(
- RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE)
-
- rows_iter = client.query_rows(QUERY, job_id=JOB)
- rows = list(rows_iter)
-
- self.assertEqual(rows, [])
- self.assertIs(rows_iter.client, client)
- self.assertIsInstance(rows_iter, HTTPIterator)
- self.assertEqual(len(conn._requested), 3)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
- self.assertEqual(req['data']['jobReference']['jobId'], JOB)
-
- def test_query_rows_w_job_config(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.api_core.page_iterator import HTTPIterator
-
- JOB = 'job-id'
- QUERY = 'SELECT COUNT(*) FROM persons'
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': JOB,
- },
- 'configuration': {
- 'query': {
- 'query': QUERY,
- 'useLegacySql': True,
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': '_temp_dataset',
- 'tableId': '_temp_table',
- },
- },
- 'dryRun': True,
- },
- 'status': {
- 'state': 'DONE',
- },
- }
- RESULTS_RESOURCE = {
- 'jobReference': RESOURCE['jobReference'],
- 'jobComplete': True,
- 'schema': {
- 'fields': [
- {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'},
- ]
- },
- 'totalRows': '0',
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(
- RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE)
-
- job_config = QueryJobConfig()
- job_config.use_legacy_sql = True
- job_config.dry_run = True
- rows_iter = client.query_rows(QUERY, job_id=JOB, job_config=job_config)
-
- self.assertIsInstance(rows_iter, HTTPIterator)
- self.assertEqual(len(conn._requested), 2)
- req = conn._requested[0]
- configuration = req['data']['configuration']
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], '/projects/PROJECT/jobs')
- self.assertEqual(req['data']['jobReference']['jobId'], JOB)
- self.assertEqual(configuration['query']['useLegacySql'], True)
- self.assertEqual(configuration['dryRun'], True)
-
- def test_list_rows(self):
- import datetime
- from google.cloud._helpers import UTC
- from google.cloud.bigquery.table import Table, SchemaField
- from google.cloud.bigquery._helpers import Row
-
- PATH = 'projects/%s/datasets/%s/tables/%s/data' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- WHEN_TS = 1437767599.006
- WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(
- tzinfo=UTC)
- WHEN_1 = WHEN + datetime.timedelta(seconds=1)
- WHEN_2 = WHEN + datetime.timedelta(seconds=2)
- ROWS = 1234
- TOKEN = 'TOKEN'
-
- def _bigquery_timestamp_float_repr(ts_float):
- # Preserve microsecond precision for E+09 timestamps
- return '%0.15E' % (ts_float,)
-
- DATA = {
- 'totalRows': str(ROWS),
- 'pageToken': TOKEN,
- 'rows': [
- {'f': [
- {'v': 'Phred Phlyntstone'},
- {'v': '32'},
- {'v': _bigquery_timestamp_float_repr(WHEN_TS)},
- ]},
- {'f': [
- {'v': 'Bharney Rhubble'},
- {'v': '33'},
- {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)},
- ]},
- {'f': [
- {'v': 'Wylma Phlyntstone'},
- {'v': '29'},
- {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)},
- ]},
- {'f': [
- {'v': 'Bhettye Rhubble'},
- {'v': None},
- {'v': None},
- ]},
- ]
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(DATA, DATA)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='NULLABLE')
- joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE')
- table = Table(self.TABLE_REF, schema=[full_name, age, joined])
-
- iterator = client.list_rows(table)
- page = six.next(iterator.pages)
- rows = list(page)
- total_rows = iterator.total_rows
- page_token = iterator.next_page_token
-
- f2i = {'full_name': 0, 'age': 1, 'joined': 2}
- self.assertEqual(len(rows), 4)
- self.assertEqual(rows[0], Row(('Phred Phlyntstone', 32, WHEN), f2i))
- self.assertEqual(rows[1], Row(('Bharney Rhubble', 33, WHEN_1), f2i))
- self.assertEqual(rows[2], Row(('Wylma Phlyntstone', 29, WHEN_2), f2i))
- self.assertEqual(rows[3], Row(('Bhettye Rhubble', None, None), f2i))
- self.assertEqual(total_rows, ROWS)
- self.assertEqual(page_token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
- self.assertEqual(req['query_params'], {})
-
- def test_list_rows_query_params(self):
- from google.cloud.bigquery.table import Table, SchemaField
-
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- table = Table(self.TABLE_REF,
- schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')])
- tests = [
- ({}, {}),
- ({'start_index': 1}, {'startIndex': 1}),
- ({'max_results': 2}, {'maxResults': 2}),
- ({'start_index': 1, 'max_results': 2},
- {'startIndex': 1, 'maxResults': 2}),
- ]
- conn = client._connection = _Connection(*len(tests)*[{}])
- for i, test in enumerate(tests):
- iterator = client.list_rows(table, **test[0])
- six.next(iterator.pages)
- req = conn._requested[i]
- self.assertEqual(req['query_params'], test[1],
- 'for kwargs %s' % test[0])
-
- def test_list_rows_repeated_fields(self):
- from google.cloud.bigquery.table import SchemaField
-
- PATH = 'projects/%s/datasets/%s/tables/%s/data' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- ROWS = 1234
- TOKEN = 'TOKEN'
- DATA = {
- 'totalRows': ROWS,
- 'pageToken': TOKEN,
- 'rows': [
- {'f': [
- {'v': [{'v': 'red'}, {'v': 'green'}]},
- {'v': [{
- 'v': {
- 'f': [
- {'v': [{'v': '1'}, {'v': '2'}]},
- {'v': [{'v': '3.1415'}, {'v': '1.414'}]},
- ]}
- }]},
- ]},
- ]
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(DATA)
- color = SchemaField('color', 'STRING', mode='REPEATED')
- index = SchemaField('index', 'INTEGER', 'REPEATED')
- score = SchemaField('score', 'FLOAT', 'REPEATED')
- struct = SchemaField('struct', 'RECORD', mode='REPEATED',
- fields=[index, score])
-
- iterator = client.list_rows(self.TABLE_REF,
- selected_fields=[color, struct])
- page = six.next(iterator.pages)
- rows = list(page)
- total_rows = iterator.total_rows
- page_token = iterator.next_page_token
-
- self.assertEqual(len(rows), 1)
- self.assertEqual(rows[0][0], ['red', 'green'])
- self.assertEqual(rows[0][1], [{'index': [1, 2],
- 'score': [3.1415, 1.414]}])
- self.assertEqual(total_rows, ROWS)
- self.assertEqual(page_token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
-
- def test_list_rows_w_record_schema(self):
- from google.cloud.bigquery.table import Table, SchemaField
-
- PATH = 'projects/%s/datasets/%s/tables/%s/data' % (
- self.PROJECT, self.DS_ID, self.TABLE_ID)
- ROWS = 1234
- TOKEN = 'TOKEN'
- DATA = {
- 'totalRows': ROWS,
- 'pageToken': TOKEN,
- 'rows': [
- {'f': [
- {'v': 'Phred Phlyntstone'},
- {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
- ]},
- {'f': [
- {'v': 'Bharney Rhubble'},
- {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
- ]},
- {'f': [
- {'v': 'Wylma Phlyntstone'},
- {'v': None},
- ]},
- ]
- }
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- conn = client._connection = _Connection(DATA)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- area_code = SchemaField('area_code', 'STRING', 'REQUIRED')
- local_number = SchemaField('local_number', 'STRING', 'REQUIRED')
- rank = SchemaField('rank', 'INTEGER', 'REQUIRED')
- phone = SchemaField('phone', 'RECORD', mode='NULLABLE',
- fields=[area_code, local_number, rank])
- table = Table(self.TABLE_REF, schema=[full_name, phone])
-
- iterator = client.list_rows(table)
- page = six.next(iterator.pages)
- rows = list(page)
- total_rows = iterator.total_rows
- page_token = iterator.next_page_token
-
- self.assertEqual(len(rows), 3)
- self.assertEqual(rows[0][0], 'Phred Phlyntstone')
- self.assertEqual(rows[0][1], {'area_code': '800',
- 'local_number': '555-1212',
- 'rank': 1})
- self.assertEqual(rows[1][0], 'Bharney Rhubble')
- self.assertEqual(rows[1][1], {'area_code': '877',
- 'local_number': '768-5309',
- 'rank': 2})
- self.assertEqual(rows[2][0], 'Wylma Phlyntstone')
- self.assertIsNone(rows[2][1])
- self.assertEqual(total_rows, ROWS)
- self.assertEqual(page_token, TOKEN)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], '/%s' % PATH)
-
- def test_list_rows_errors(self):
- from google.cloud.bigquery.table import Table
-
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
-
- # table ref with no selected fields
- with self.assertRaises(ValueError):
- client.list_rows(self.TABLE_REF)
-
- # table with no schema
- with self.assertRaises(ValueError):
- client.list_rows(Table(self.TABLE_REF))
-
- # neither Table nor tableReference
- with self.assertRaises(TypeError):
- client.list_rows(1)
-
- def test_list_partitions(self):
- RESOURCE = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': 'JOB_ID',
- },
- 'configuration': {
- 'query': {
- 'query': 'q',
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': 'DS_ID',
- 'tableId': 'TABLE_ID',
- },
- },
- },
- 'status': {
- 'state': 'DONE',
- },
- }
- RESULTS_RESOURCE = {
- 'jobReference': RESOURCE['jobReference'],
- 'jobComplete': True,
- 'schema': {
- 'fields': [
- {'name': 'partition_id', 'type': 'INTEGER',
- 'mode': 'REQUIRED'},
- ]
- },
- 'totalRows': '2',
- 'pageToken': 'next-page',
- }
- FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE)
- FIRST_PAGE['rows'] = [
- {'f': [{'v': 20160804}]},
- {'f': [{'v': 20160805}]},
- ]
- del FIRST_PAGE['pageToken']
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds,
- _http=http)
- client._connection = _Connection(
- RESOURCE, RESULTS_RESOURCE, FIRST_PAGE)
- self.assertEqual(client.list_partitions(self.TABLE_REF),
- [20160804, 20160805])
-
-
-class Test_make_job_id(unittest.TestCase):
- def _call_fut(self, job_id, prefix=None):
- from google.cloud.bigquery.client import _make_job_id
-
- return _make_job_id(job_id, prefix=prefix)
-
- def test__make_job_id_wo_suffix(self):
- job_id = self._call_fut('job_id')
-
- self.assertEqual(job_id, 'job_id')
-
- def test__make_job_id_w_suffix(self):
- with mock.patch('uuid.uuid4', side_effect=['212345']):
- job_id = self._call_fut(None, prefix='job_id')
-
- self.assertEqual(job_id, 'job_id212345')
-
- def test__make_random_job_id(self):
- with mock.patch('uuid.uuid4', side_effect=['212345']):
- job_id = self._call_fut(None)
-
- self.assertEqual(job_id, '212345')
-
- def test__make_job_id_w_job_id_overrides_prefix(self):
- job_id = self._call_fut('job_id', prefix='unused_prefix')
-
- self.assertEqual(job_id, 'job_id')
-
-
-class TestClientUpload(object):
- # NOTE: This is a "partner" to `TestClient` meant to test some of the
- # "load_table_from_file" portions of `Client`. It also uses
- # `pytest`-style tests rather than `unittest`-style.
-
- TABLE_REF = DatasetReference(
- 'project_id', 'test_dataset').table('test_table')
-
- @staticmethod
- def _make_client(transport=None):
- from google.cloud.bigquery import _http
- from google.cloud.bigquery import client
-
- cl = client.Client(project='project_id',
- credentials=_make_credentials(),
- _http=transport)
- cl._connection = mock.create_autospec(_http.Connection, instance=True)
- return cl
-
- @staticmethod
- def _make_response(status_code, content='', headers={}):
- """Make a mock HTTP response."""
- import requests
- response = requests.Response()
- response.request = requests.Request(
- 'POST', 'http://example.com').prepare()
- response._content = content.encode('utf-8')
- response.headers.update(headers)
- response.status_code = status_code
- return response
-
- @classmethod
- def _make_do_upload_patch(cls, client, method,
- resource={}, side_effect=None):
- """Patches the low-level upload helpers."""
- if side_effect is None:
- side_effect = [cls._make_response(
- http_client.OK,
- json.dumps(resource),
- {'Content-Type': 'application/json'})]
- return mock.patch.object(
- client, method, side_effect=side_effect, autospec=True)
-
- EXPECTED_CONFIGURATION = {
- 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'},
- 'configuration': {
- 'load': {
- 'sourceFormat': 'CSV',
- 'destinationTable': {
- 'projectId': 'project_id',
- 'datasetId': 'test_dataset',
- 'tableId': 'test_table'
- }
- }
- }
- }
-
- @staticmethod
- def _make_file_obj():
- return io.BytesIO(b'hello, is it me you\'re looking for?')
-
- @staticmethod
- def _make_config():
- from google.cloud.bigquery.job import LoadJobConfig
-
- config = LoadJobConfig()
- config.source_format = 'CSV'
- return config
-
- # High-level tests
-
- def test_load_table_from_file_resumable(self):
- from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
-
- client = self._make_client()
- file_obj = self._make_file_obj()
-
- do_upload_patch = self._make_do_upload_patch(
- client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION)
- with do_upload_patch as do_upload:
- client.load_table_from_file(file_obj, self.TABLE_REF,
- job_id='job_id',
- job_config=self._make_config())
-
- do_upload.assert_called_once_with(
- file_obj,
- self.EXPECTED_CONFIGURATION,
- _DEFAULT_NUM_RETRIES)
-
- def test_load_table_from_file_resumable_metadata(self):
- from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
-
- client = self._make_client()
- file_obj = self._make_file_obj()
-
- config = self._make_config()
- config.allow_jagged_rows = False
- config.allow_quoted_newlines = False
- config.create_disposition = 'CREATE_IF_NEEDED'
- config.encoding = 'utf8'
- config.field_delimiter = ','
- config.ignore_unknown_values = False
- config.max_bad_records = 0
- config.quote_character = '"'
- config.skip_leading_rows = 1
- config.write_disposition = 'WRITE_APPEND'
- config.null_marker = r'\N'
-
- expected_config = {
- 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'},
- 'configuration': {
- 'load': {
- 'destinationTable': {
- 'projectId': self.TABLE_REF.project,
- 'datasetId': self.TABLE_REF.dataset_id,
- 'tableId': self.TABLE_REF.table_id,
- },
- 'sourceFormat': config.source_format,
- 'allowJaggedRows': config.allow_jagged_rows,
- 'allowQuotedNewlines': config.allow_quoted_newlines,
- 'createDisposition': config.create_disposition,
- 'encoding': config.encoding,
- 'fieldDelimiter': config.field_delimiter,
- 'ignoreUnknownValues': config.ignore_unknown_values,
- 'maxBadRecords': config.max_bad_records,
- 'quote': config.quote_character,
- 'skipLeadingRows': str(config.skip_leading_rows),
- 'writeDisposition': config.write_disposition,
- 'nullMarker': config.null_marker,
- },
- },
- }
-
- do_upload_patch = self._make_do_upload_patch(
- client, '_do_resumable_upload', expected_config)
- with do_upload_patch as do_upload:
- client.load_table_from_file(
- file_obj, self.TABLE_REF, job_id='job_id', job_config=config)
-
- do_upload.assert_called_once_with(
- file_obj,
- expected_config,
- _DEFAULT_NUM_RETRIES)
-
- def test_load_table_from_file_multipart(self):
- from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
-
- client = self._make_client()
- file_obj = self._make_file_obj()
- file_obj_size = 10
- config = self._make_config()
-
- do_upload_patch = self._make_do_upload_patch(
- client, '_do_multipart_upload', self.EXPECTED_CONFIGURATION)
- with do_upload_patch as do_upload:
- client.load_table_from_file(
- file_obj, self.TABLE_REF, job_id='job_id', job_config=config,
- size=file_obj_size)
-
- do_upload.assert_called_once_with(
- file_obj,
- self.EXPECTED_CONFIGURATION,
- file_obj_size,
- _DEFAULT_NUM_RETRIES)
-
- def test_load_table_from_file_with_retries(self):
- client = self._make_client()
- file_obj = self._make_file_obj()
- num_retries = 20
-
- do_upload_patch = self._make_do_upload_patch(
- client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION)
- with do_upload_patch as do_upload:
- client.load_table_from_file(
- file_obj, self.TABLE_REF, num_retries=num_retries,
- job_id='job_id', job_config=self._make_config())
-
- do_upload.assert_called_once_with(
- file_obj,
- self.EXPECTED_CONFIGURATION,
- num_retries)
-
- def test_load_table_from_file_with_rewind(self):
- client = self._make_client()
- file_obj = self._make_file_obj()
- file_obj.seek(2)
-
- with self._make_do_upload_patch(
- client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION):
- client.load_table_from_file(
- file_obj, self.TABLE_REF, rewind=True)
-
- assert file_obj.tell() == 0
-
- def test_load_table_from_file_failure(self):
- from google.resumable_media import InvalidResponse
- from google.cloud import exceptions
-
- client = self._make_client()
- file_obj = self._make_file_obj()
-
- response = self._make_response(
- content='Someone is already in this spot.',
- status_code=http_client.CONFLICT)
-
- do_upload_patch = self._make_do_upload_patch(
- client, '_do_resumable_upload',
- side_effect=InvalidResponse(response))
-
- with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info:
- client.load_table_from_file(
- file_obj, self.TABLE_REF, rewind=True)
-
- assert response.text in exc_info.value.message
- assert exc_info.value.errors == []
-
- def test_load_table_from_file_bad_mode(self):
- client = self._make_client()
- file_obj = mock.Mock(spec=['mode'])
- file_obj.mode = 'x'
-
- with pytest.raises(ValueError):
- client.load_table_from_file(file_obj, self.TABLE_REF)
-
- # Low-level tests
-
- @classmethod
- def _make_resumable_upload_responses(cls, size):
- """Make a series of responses for a successful resumable upload."""
- from google import resumable_media
-
- resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1'
- initial_response = cls._make_response(
- http_client.OK, '', {'location': resumable_url})
- data_response = cls._make_response(
- resumable_media.PERMANENT_REDIRECT,
- '', {'range': 'bytes=0-{:d}'.format(size - 1)})
- final_response = cls._make_response(
- http_client.OK,
- json.dumps({'size': size}),
- {'Content-Type': 'application/json'})
- return [initial_response, data_response, final_response]
-
- @staticmethod
- def _make_transport(responses=None):
- import google.auth.transport.requests
-
- transport = mock.create_autospec(
- google.auth.transport.requests.AuthorizedSession, instance=True)
- transport.request.side_effect = responses
- return transport
-
- def test__do_resumable_upload(self):
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
- transport = self._make_transport(
- self._make_resumable_upload_responses(file_obj_len))
- client = self._make_client(transport)
-
- result = client._do_resumable_upload(
- file_obj,
- self.EXPECTED_CONFIGURATION,
- None)
-
- content = result.content.decode('utf-8')
- assert json.loads(content) == {'size': file_obj_len}
-
- # Verify that configuration data was passed in with the initial
- # request.
- transport.request.assert_any_call(
- 'POST',
- mock.ANY,
- data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'),
- headers=mock.ANY)
-
- def test__do_multipart_upload(self):
- transport = self._make_transport([self._make_response(http_client.OK)])
- client = self._make_client(transport)
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
-
- client._do_multipart_upload(
- file_obj,
- self.EXPECTED_CONFIGURATION,
- file_obj_len,
- None)
-
- # Verify that configuration data was passed in with the initial
- # request.
- request_args = transport.request.mock_calls[0][2]
- request_data = request_args['data'].decode('utf-8')
- request_headers = request_args['headers']
-
- request_content = email.message_from_string(
- 'Content-Type: {}\r\n{}'.format(
- request_headers['content-type'].decode('utf-8'),
- request_data))
-
- # There should be two payloads: the configuration and the binary daya.
- configuration_data = request_content.get_payload(0).get_payload()
- binary_data = request_content.get_payload(1).get_payload()
-
- assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION
- assert binary_data.encode('utf-8') == file_obj.getvalue()
-
- def test__do_multipart_upload_wrong_size(self):
- client = self._make_client()
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
-
- with pytest.raises(ValueError):
- client._do_multipart_upload(
- file_obj,
- {},
- file_obj_len+1,
- None)
-
-
-class _Connection(object):
-
- USER_AGENT = 'testing 1.2.3'
-
- def __init__(self, *responses):
- self._responses = responses
- self._requested = []
-
- def api_request(self, **kw):
- from google.cloud.exceptions import NotFound
- self._requested.append(kw)
-
- if len(self._responses) == 0:
- raise NotFound('miss')
-
- response, self._responses = self._responses[0], self._responses[1:]
- if isinstance(response, Exception):
- raise response
- return response
diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py
deleted file mode 100644
index c04d154..0000000
--- a/bigquery/tests/unit/test_dataset.py
+++ /dev/null
@@ -1,459 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import mock
-
-
-class TestAccessEntry(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.dataset import AccessEntry
-
- return AccessEntry
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor_defaults(self):
- entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
- self.assertEqual(entry.role, 'OWNER')
- self.assertEqual(entry.entity_type, 'userByEmail')
- self.assertEqual(entry.entity_id, 'phred@example.com')
-
- def test_ctor_bad_entity_type(self):
- with self.assertRaises(ValueError):
- self._make_one(None, 'unknown', None)
-
- def test_ctor_view_with_role(self):
- role = 'READER'
- entity_type = 'view'
- with self.assertRaises(ValueError):
- self._make_one(role, entity_type, None)
-
- def test_ctor_view_success(self):
- role = None
- entity_type = 'view'
- entity_id = object()
- entry = self._make_one(role, entity_type, entity_id)
- self.assertEqual(entry.role, role)
- self.assertEqual(entry.entity_type, entity_type)
- self.assertEqual(entry.entity_id, entity_id)
-
- def test_ctor_nonview_without_role(self):
- role = None
- entity_type = 'userByEmail'
- with self.assertRaises(ValueError):
- self._make_one(role, entity_type, None)
-
- def test___eq___role_mismatch(self):
- entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
- other = self._make_one('WRITER', 'userByEmail', 'phred@example.com')
- self.assertNotEqual(entry, other)
-
- def test___eq___entity_type_mismatch(self):
- entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
- other = self._make_one('OWNER', 'groupByEmail', 'phred@example.com')
- self.assertNotEqual(entry, other)
-
- def test___eq___entity_id_mismatch(self):
- entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
- other = self._make_one('OWNER', 'userByEmail', 'bharney@example.com')
- self.assertNotEqual(entry, other)
-
- def test___eq___hit(self):
- entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
- other = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
- self.assertEqual(entry, other)
-
- def test__eq___type_mismatch(self):
- entry = self._make_one('OWNER', 'userByEmail', 'silly@example.com')
- self.assertNotEqual(entry, object())
- self.assertEqual(entry, mock.ANY)
-
-
-class TestDatasetReference(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.dataset import DatasetReference
-
- return DatasetReference
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor_defaults(self):
- dataset_ref = self._make_one('some-project-1', 'dataset_1')
- self.assertEqual(dataset_ref.project, 'some-project-1')
- self.assertEqual(dataset_ref.dataset_id, 'dataset_1')
-
- def test_ctor_bad_args(self):
- with self.assertRaises(ValueError):
- self._make_one(1, 'd')
- with self.assertRaises(ValueError):
- self._make_one('p', 2)
-
- def test_table(self):
- dataset_ref = self._make_one('some-project-1', 'dataset_1')
- table_ref = dataset_ref.table('table_1')
- self.assertEqual(table_ref.dataset_id, 'dataset_1')
- self.assertEqual(table_ref.project, 'some-project-1')
- self.assertEqual(table_ref.table_id, 'table_1')
-
- def test_to_api_repr(self):
- dataset = self._make_one('project_1', 'dataset_1')
-
- resource = dataset.to_api_repr()
-
- self.assertEqual(
- resource,
- {
- 'projectId': 'project_1',
- 'datasetId': 'dataset_1',
- })
-
- def test_from_api_repr(self):
- from google.cloud.bigquery.dataset import DatasetReference
- expected = self._make_one('project_1', 'dataset_1')
-
- got = DatasetReference.from_api_repr(
- {
- 'projectId': 'project_1',
- 'datasetId': 'dataset_1',
- })
-
- self.assertEqual(expected, got)
-
- def test___eq___wrong_type(self):
- dataset = self._make_one('project_1', 'dataset_1')
- other = object()
- self.assertNotEqual(dataset, other)
- self.assertEqual(dataset, mock.ANY)
-
- def test___eq___project_mismatch(self):
- dataset = self._make_one('project_1', 'dataset_1')
- other = self._make_one('project_2', 'dataset_1')
- self.assertNotEqual(dataset, other)
-
- def test___eq___dataset_mismatch(self):
- dataset = self._make_one('project_1', 'dataset_1')
- other = self._make_one('project_1', 'dataset_2')
- self.assertNotEqual(dataset, other)
-
- def test___eq___equality(self):
- dataset = self._make_one('project_1', 'dataset_1')
- other = self._make_one('project_1', 'dataset_1')
- self.assertEqual(dataset, other)
-
- def test___hash__set_equality(self):
- dataset1 = self._make_one('project_1', 'dataset_1')
- dataset2 = self._make_one('project_1', 'dataset_2')
- set_one = {dataset1, dataset2}
- set_two = {dataset1, dataset2}
- self.assertEqual(set_one, set_two)
-
- def test___hash__not_equals(self):
- dataset1 = self._make_one('project_1', 'dataset_1')
- dataset2 = self._make_one('project_1', 'dataset_2')
- set_one = {dataset1}
- set_two = {dataset2}
- self.assertNotEqual(set_one, set_two)
-
- def test___repr__(self):
- dataset = self._make_one('project1', 'dataset1')
- expected = "DatasetReference('project1', 'dataset1')"
- self.assertEqual(repr(dataset), expected)
-
-
-class TestDataset(unittest.TestCase):
- from google.cloud.bigquery.dataset import DatasetReference
-
- PROJECT = 'project'
- DS_ID = 'dataset-id'
- DS_REF = DatasetReference(PROJECT, DS_ID)
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.dataset import Dataset
-
- return Dataset
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def _setUpConstants(self):
- import datetime
- from google.cloud._helpers import UTC
-
- self.WHEN_TS = 1437767599.006
- self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(
- tzinfo=UTC)
- self.ETAG = 'ETAG'
- self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID)
- self.RESOURCE_URL = 'http://example.com/path/to/resource'
-
- def _makeResource(self):
- self._setUpConstants()
- USER_EMAIL = 'phred@example.com'
- GROUP_EMAIL = 'group-name@lists.example.com'
- return {
- 'creationTime': self.WHEN_TS * 1000,
- 'datasetReference':
- {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
- 'etag': self.ETAG,
- 'id': self.DS_FULL_ID,
- 'lastModifiedTime': self.WHEN_TS * 1000,
- 'location': 'US',
- 'selfLink': self.RESOURCE_URL,
- 'defaultTableExpirationMs': 3600,
- 'access': [
- {'role': 'OWNER', 'userByEmail': USER_EMAIL},
- {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL},
- {'role': 'WRITER', 'specialGroup': 'projectWriters'},
- {'role': 'READER', 'specialGroup': 'projectReaders'}],
- }
-
- def _verify_access_entry(self, access_entries, resource):
- r_entries = []
- for r_entry in resource['access']:
- role = r_entry.pop('role')
- for entity_type, entity_id in sorted(r_entry.items()):
- r_entries.append({
- 'role': role,
- 'entity_type': entity_type,
- 'entity_id': entity_id})
-
- self.assertEqual(len(access_entries), len(r_entries))
- for a_entry, r_entry in zip(access_entries, r_entries):
- self.assertEqual(a_entry.role, r_entry['role'])
- self.assertEqual(a_entry.entity_type, r_entry['entity_type'])
- self.assertEqual(a_entry.entity_id, r_entry['entity_id'])
-
- def _verify_readonly_resource_properties(self, dataset, resource):
-
- self.assertEqual(dataset.dataset_id, self.DS_ID)
-
- if 'creationTime' in resource:
- self.assertEqual(dataset.created, self.WHEN)
- else:
- self.assertIsNone(dataset.created)
- if 'etag' in resource:
- self.assertEqual(dataset.etag, self.ETAG)
- else:
- self.assertIsNone(dataset.etag)
- if 'lastModifiedTime' in resource:
- self.assertEqual(dataset.modified, self.WHEN)
- else:
- self.assertIsNone(dataset.modified)
- if 'selfLink' in resource:
- self.assertEqual(dataset.self_link, self.RESOURCE_URL)
- else:
- self.assertIsNone(dataset.self_link)
-
- def _verify_resource_properties(self, dataset, resource):
-
- self._verify_readonly_resource_properties(dataset, resource)
-
- if 'defaultTableExpirationMs' in resource:
- self.assertEqual(dataset.default_table_expiration_ms,
- int(resource.get('defaultTableExpirationMs')))
- else:
- self.assertIsNone(dataset.default_table_expiration_ms)
- self.assertEqual(dataset.description, resource.get('description'))
- self.assertEqual(dataset.friendly_name, resource.get('friendlyName'))
- self.assertEqual(dataset.location, resource.get('location'))
-
- if 'access' in resource:
- self._verify_access_entry(dataset.access_entries, resource)
- else:
- self.assertEqual(dataset.access_entries, [])
-
- def test_ctor_defaults(self):
- dataset = self._make_one(self.DS_REF)
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, self.PROJECT)
- self.assertEqual(
- dataset.path,
- '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID))
- self.assertEqual(dataset.access_entries, [])
-
- self.assertIsNone(dataset.created)
- self.assertIsNone(dataset.full_dataset_id)
- self.assertIsNone(dataset.etag)
- self.assertIsNone(dataset.modified)
- self.assertIsNone(dataset.self_link)
-
- self.assertIsNone(dataset.default_table_expiration_ms)
- self.assertIsNone(dataset.description)
- self.assertIsNone(dataset.friendly_name)
- self.assertIsNone(dataset.location)
-
- def test_ctor_explicit(self):
- from google.cloud.bigquery.dataset import DatasetReference, AccessEntry
-
- phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com')
- bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com')
- entries = [phred, bharney]
- OTHER_PROJECT = 'foo-bar-123'
- dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID))
- dataset.access_entries = entries
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, OTHER_PROJECT)
- self.assertEqual(
- dataset.path,
- '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_ID))
- self.assertEqual(dataset.access_entries, entries)
-
- self.assertIsNone(dataset.created)
- self.assertIsNone(dataset.full_dataset_id)
- self.assertIsNone(dataset.etag)
- self.assertIsNone(dataset.modified)
- self.assertIsNone(dataset.self_link)
-
- self.assertIsNone(dataset.default_table_expiration_ms)
- self.assertIsNone(dataset.description)
- self.assertIsNone(dataset.friendly_name)
- self.assertIsNone(dataset.location)
-
- def test_access_entries_setter_non_list(self):
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(TypeError):
- dataset.access_entries = object()
-
- def test_access_entries_setter_invalid_field(self):
- from google.cloud.bigquery.dataset import AccessEntry
-
- dataset = self._make_one(self.DS_REF)
- phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com')
- with self.assertRaises(ValueError):
- dataset.access_entries = [phred, object()]
-
- def test_access_entries_setter(self):
- from google.cloud.bigquery.dataset import AccessEntry
-
- dataset = self._make_one(self.DS_REF)
- phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com')
- bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com')
- dataset.access_entries = [phred, bharney]
- self.assertEqual(dataset.access_entries, [phred, bharney])
-
- def test_default_table_expiration_ms_setter_bad_value(self):
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(ValueError):
- dataset.default_table_expiration_ms = 'bogus'
-
- def test_default_table_expiration_ms_setter(self):
- dataset = self._make_one(self.DS_REF)
- dataset.default_table_expiration_ms = 12345
- self.assertEqual(dataset.default_table_expiration_ms, 12345)
-
- def test_description_setter_bad_value(self):
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(ValueError):
- dataset.description = 12345
-
- def test_description_setter(self):
- dataset = self._make_one(self.DS_REF)
- dataset.description = 'DESCRIPTION'
- self.assertEqual(dataset.description, 'DESCRIPTION')
-
- def test_friendly_name_setter_bad_value(self):
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(ValueError):
- dataset.friendly_name = 12345
-
- def test_friendly_name_setter(self):
- dataset = self._make_one(self.DS_REF)
- dataset.friendly_name = 'FRIENDLY'
- self.assertEqual(dataset.friendly_name, 'FRIENDLY')
-
- def test_location_setter_bad_value(self):
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(ValueError):
- dataset.location = 12345
-
- def test_location_setter(self):
- dataset = self._make_one(self.DS_REF)
- dataset.location = 'LOCATION'
- self.assertEqual(dataset.location, 'LOCATION')
-
- def test_labels_setter(self):
- dataset = self._make_one(self.DS_REF)
- dataset.labels = {'color': 'green'}
- self.assertEqual(dataset.labels, {'color': 'green'})
-
- def test_labels_setter_bad_value(self):
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(ValueError):
- dataset.labels = None
-
- def test_from_api_repr_missing_identity(self):
- self._setUpConstants()
- RESOURCE = {}
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE)
-
- def test_from_api_repr_bare(self):
- self._setUpConstants()
- RESOURCE = {
- 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
- 'datasetReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- }
- }
- klass = self._get_target_class()
- dataset = klass.from_api_repr(RESOURCE)
- self._verify_resource_properties(dataset, RESOURCE)
-
- def test_from_api_repr_w_properties(self):
- RESOURCE = self._makeResource()
- klass = self._get_target_class()
- dataset = klass.from_api_repr(RESOURCE)
- self._verify_resource_properties(dataset, RESOURCE)
-
- def test__parse_access_entries_w_unknown_entity_type(self):
- ACCESS = [
- {'role': 'READER', 'unknown': 'UNKNOWN'},
- ]
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(ValueError):
- dataset._parse_access_entries(ACCESS)
-
- def test__parse_access_entries_w_extra_keys(self):
- USER_EMAIL = 'phred@example.com'
- ACCESS = [
- {
- 'role': 'READER',
- 'specialGroup': 'projectReaders',
- 'userByEmail': USER_EMAIL,
- },
- ]
- dataset = self._make_one(self.DS_REF)
- with self.assertRaises(ValueError):
- dataset._parse_access_entries(ACCESS)
-
- def test_table(self):
- from google.cloud.bigquery.table import TableReference
-
- dataset = self._make_one(self.DS_REF)
- table = dataset.table('table_id')
- self.assertIsInstance(table, TableReference)
- self.assertEqual(table.table_id, 'table_id')
- self.assertEqual(table.dataset_id, self.DS_ID)
- self.assertEqual(table.project, self.PROJECT)
diff --git a/bigquery/tests/unit/test_dbapi_cursor.py b/bigquery/tests/unit/test_dbapi_cursor.py
deleted file mode 100644
index a16b7b4..0000000
--- a/bigquery/tests/unit/test_dbapi_cursor.py
+++ /dev/null
@@ -1,308 +0,0 @@
-# Copyright 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import mock
-
-
-class TestCursor(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.dbapi import Cursor
- return Cursor
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def _mock_client(
- self, rows=None, schema=None, num_dml_affected_rows=None):
- from google.cloud.bigquery import client
-
- if rows is None:
- total_rows = 0
- else:
- total_rows = len(rows)
-
- mock_client = mock.create_autospec(client.Client)
- mock_client.query.return_value = self._mock_job(
- total_rows=total_rows,
- schema=schema,
- num_dml_affected_rows=num_dml_affected_rows)
- mock_client.list_rows.return_value = rows
- return mock_client
-
- def _mock_job(
- self, total_rows=0, schema=None, num_dml_affected_rows=None):
- from google.cloud.bigquery import job
- mock_job = mock.create_autospec(job.QueryJob)
- mock_job.error_result = None
- mock_job.state = 'DONE'
- mock_job.result.return_value = mock_job
-
- if num_dml_affected_rows is None:
- mock_job.statement_type = None # API sends back None for SELECT
- else:
- mock_job.statement_type = 'UPDATE'
-
- mock_job.query_results.return_value = self._mock_results(
- total_rows=total_rows, schema=schema,
- num_dml_affected_rows=num_dml_affected_rows)
- return mock_job
-
- def _mock_results(
- self, total_rows=0, schema=None, num_dml_affected_rows=None):
- from google.cloud.bigquery import query
- mock_results = mock.create_autospec(query.QueryResults)
- mock_results.schema = schema
- mock_results.num_dml_affected_rows = num_dml_affected_rows
- mock_results.total_rows = total_rows
- return mock_results
-
- def test_ctor(self):
- from google.cloud.bigquery.dbapi import connect
- from google.cloud.bigquery.dbapi import Cursor
- connection = connect(self._mock_client())
- cursor = self._make_one(connection)
- self.assertIsInstance(cursor, Cursor)
- self.assertIs(cursor.connection, connection)
-
- def test_close(self):
- from google.cloud.bigquery.dbapi import connect
- connection = connect(self._mock_client())
- cursor = connection.cursor()
- # close() is a no-op, there is nothing to test.
- cursor.close()
-
- def test_fetchone_wo_execute_raises_error(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(self._mock_client())
- cursor = connection.cursor()
- self.assertRaises(dbapi.Error, cursor.fetchone)
-
- def test_fetchone_w_row(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(
- self._mock_client(rows=[(1,)]))
- cursor = connection.cursor()
- cursor.execute('SELECT 1;')
- row = cursor.fetchone()
- self.assertEqual(row, (1,))
- self.assertIsNone(cursor.fetchone())
-
- def test_fetchmany_wo_execute_raises_error(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(self._mock_client())
- cursor = connection.cursor()
- self.assertRaises(dbapi.Error, cursor.fetchmany)
-
- def test_fetchmany_w_row(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(
- self._mock_client(rows=[(1,)]))
- cursor = connection.cursor()
- cursor.execute('SELECT 1;')
- rows = cursor.fetchmany()
- self.assertEqual(len(rows), 1)
- self.assertEqual(rows[0], (1,))
-
- def test_fetchmany_w_size(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(
- self._mock_client(
- rows=[
- (1, 2, 3),
- (4, 5, 6),
- (7, 8, 9),
- ]))
- cursor = connection.cursor()
- cursor.execute('SELECT a, b, c;')
- rows = cursor.fetchmany(size=2)
- self.assertEqual(len(rows), 2)
- self.assertEqual(rows[0], (1, 2, 3))
- self.assertEqual(rows[1], (4, 5, 6))
- second_page = cursor.fetchmany(size=2)
- self.assertEqual(len(second_page), 1)
- self.assertEqual(second_page[0], (7, 8, 9))
- third_page = cursor.fetchmany(size=2)
- self.assertEqual(third_page, [])
-
- def test_fetchmany_w_arraysize(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(
- self._mock_client(
- rows=[
- (1, 2, 3),
- (4, 5, 6),
- (7, 8, 9),
- ]))
- cursor = connection.cursor()
- cursor.execute('SELECT a, b, c;')
- cursor.arraysize = 2
- rows = cursor.fetchmany()
- self.assertEqual(len(rows), 2)
- self.assertEqual(rows[0], (1, 2, 3))
- self.assertEqual(rows[1], (4, 5, 6))
- second_page = cursor.fetchmany()
- self.assertEqual(len(second_page), 1)
- self.assertEqual(second_page[0], (7, 8, 9))
- third_page = cursor.fetchmany()
- self.assertEqual(third_page, [])
-
- def test_fetchall_wo_execute_raises_error(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(self._mock_client())
- cursor = connection.cursor()
- self.assertRaises(dbapi.Error, cursor.fetchall)
-
- def test_fetchall_w_row(self):
- from google.cloud.bigquery import dbapi
- connection = dbapi.connect(
- self._mock_client(rows=[(1,)]))
- cursor = connection.cursor()
- cursor.execute('SELECT 1;')
- self.assertIsNone(cursor.description)
- self.assertEqual(cursor.rowcount, 1)
- rows = cursor.fetchall()
- self.assertEqual(len(rows), 1)
- self.assertEqual(rows[0], (1,))
-
- def test_execute_custom_job_id(self):
- from google.cloud.bigquery.dbapi import connect
- client = self._mock_client(rows=[], num_dml_affected_rows=0)
- connection = connect(client)
- cursor = connection.cursor()
- cursor.execute('SELECT 1;', job_id='foo')
- args, kwargs = client.query.call_args
- self.assertEqual(args[0], 'SELECT 1;')
- self.assertEqual(kwargs['job_id'], 'foo')
-
- def test_execute_w_dml(self):
- from google.cloud.bigquery.dbapi import connect
- connection = connect(
- self._mock_client(rows=[], num_dml_affected_rows=12))
- cursor = connection.cursor()
- cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';')
- rows = cursor.fetchall()
- self.assertIsNone(cursor.description)
- self.assertEqual(cursor.rowcount, 12)
- self.assertEqual(rows, [])
-
- def test_execute_w_query(self):
- from google.cloud.bigquery.schema import SchemaField
- from google.cloud.bigquery import dbapi
-
- connection = dbapi.connect(self._mock_client(
- rows=[('hello', 'world', 1), ('howdy', 'y\'all', 2)],
- schema=[
- SchemaField('a', 'STRING', mode='NULLABLE'),
- SchemaField('b', 'STRING', mode='REQUIRED'),
- SchemaField('c', 'INTEGER', mode='NULLABLE')]))
- cursor = connection.cursor()
- cursor.execute('SELECT a, b, c FROM hello_world WHERE d > 3;')
-
- # Verify the description.
- self.assertEqual(len(cursor.description), 3)
- a_name, a_type, _, _, _, _, a_null_ok = cursor.description[0]
- self.assertEqual(a_name, 'a')
- self.assertEqual(a_type, 'STRING')
- self.assertEqual(a_type, dbapi.STRING)
- self.assertTrue(a_null_ok)
- b_name, b_type, _, _, _, _, b_null_ok = cursor.description[1]
- self.assertEqual(b_name, 'b')
- self.assertEqual(b_type, 'STRING')
- self.assertEqual(b_type, dbapi.STRING)
- self.assertFalse(b_null_ok)
- c_name, c_type, _, _, _, _, c_null_ok = cursor.description[2]
- self.assertEqual(c_name, 'c')
- self.assertEqual(c_type, 'INTEGER')
- self.assertEqual(c_type, dbapi.NUMBER)
- self.assertTrue(c_null_ok)
-
- # Verify the results.
- self.assertEqual(cursor.rowcount, 2)
- row = cursor.fetchone()
- self.assertEqual(row, ('hello', 'world', 1))
- row = cursor.fetchone()
- self.assertEqual(row, ('howdy', 'y\'all', 2))
- row = cursor.fetchone()
- self.assertIsNone(row)
-
- def test_execute_raises_if_result_raises(self):
- import google.cloud.exceptions
-
- from google.cloud.bigquery import client
- from google.cloud.bigquery import job
- from google.cloud.bigquery.dbapi import connect
- from google.cloud.bigquery.dbapi import exceptions
-
- job = mock.create_autospec(job.QueryJob)
- job.result.side_effect = google.cloud.exceptions.GoogleCloudError('')
- client = mock.create_autospec(client.Client)
- client.query.return_value = job
- connection = connect(client)
- cursor = connection.cursor()
-
- with self.assertRaises(exceptions.DatabaseError):
- cursor.execute('SELECT 1')
-
- def test_executemany_w_dml(self):
- from google.cloud.bigquery.dbapi import connect
- connection = connect(
- self._mock_client(rows=[], num_dml_affected_rows=12))
- cursor = connection.cursor()
- cursor.executemany(
- 'DELETE FROM UserSessions WHERE user_id = %s;',
- (('test',), ('anothertest',)))
- self.assertIsNone(cursor.description)
- self.assertEqual(cursor.rowcount, 12)
-
- def test__format_operation_w_dict(self):
- from google.cloud.bigquery.dbapi import cursor
- formatted_operation = cursor._format_operation(
- 'SELECT %(somevalue)s, %(a `weird` one)s;',
- {
- 'somevalue': 'hi',
- 'a `weird` one': 'world',
- })
- self.assertEqual(
- formatted_operation, 'SELECT @`somevalue`, @`a \\`weird\\` one`;')
-
- def test__format_operation_w_wrong_dict(self):
- from google.cloud.bigquery import dbapi
- from google.cloud.bigquery.dbapi import cursor
- self.assertRaises(
- dbapi.ProgrammingError,
- cursor._format_operation,
- 'SELECT %(somevalue)s, %(othervalue)s;',
- {
- 'somevalue-not-here': 'hi',
- 'othervalue': 'world',
- })
-
- def test__format_operation_w_sequence(self):
- from google.cloud.bigquery.dbapi import cursor
- formatted_operation = cursor._format_operation(
- 'SELECT %s, %s;', ('hello', 'world'))
- self.assertEqual(formatted_operation, 'SELECT ?, ?;')
-
- def test__format_operation_w_too_short_sequence(self):
- from google.cloud.bigquery import dbapi
- from google.cloud.bigquery.dbapi import cursor
- self.assertRaises(
- dbapi.ProgrammingError,
- cursor._format_operation,
- 'SELECT %s, %s;',
- ('hello',))
diff --git a/bigquery/tests/unit/test_external_config.py b/bigquery/tests/unit/test_external_config.py
deleted file mode 100644
index b788742..0000000
--- a/bigquery/tests/unit/test_external_config.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# Copyright 2017 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import base64
-import copy
-import unittest
-
-from google.cloud.bigquery.external_config import ExternalConfig
-
-
-class TestExternalConfig(unittest.TestCase):
-
- SOURCE_URIS = ['gs://foo', 'gs://bar']
-
- BASE_RESOURCE = {
- 'sourceFormat': '',
- 'sourceUris': SOURCE_URIS,
- 'maxBadRecords': 17,
- 'autodetect': True,
- 'ignoreUnknownValues': False,
- 'compression': 'compression',
- }
-
- def test_api_repr_base(self):
- from google.cloud.bigquery.schema import SchemaField
-
- resource = copy.deepcopy(self.BASE_RESOURCE)
- ec = ExternalConfig.from_api_repr(resource)
- self._verify_base(ec)
- self.assertEqual(ec.schema, [])
- self.assertIsNone(ec.options)
-
- got_resource = ec.to_api_repr()
- self.assertEqual(got_resource, self.BASE_RESOURCE)
-
- resource = _copy_and_update(self.BASE_RESOURCE, {
- 'schema': {
- 'fields': [
- {
- 'name': 'full_name',
- 'type': 'STRING',
- 'mode': 'REQUIRED',
- },
- ],
- },
- })
- ec = ExternalConfig.from_api_repr(resource)
- self._verify_base(ec)
- self.assertEqual(ec.schema,
- [SchemaField('full_name', 'STRING', mode='REQUIRED')])
- self.assertIsNone(ec.options)
-
- got_resource = ec.to_api_repr()
- self.assertEqual(got_resource, resource)
-
- def _verify_base(self, ec):
- self.assertEqual(ec.autodetect, True)
- self.assertEqual(ec.compression, 'compression')
- self.assertEqual(ec.ignore_unknown_values, False)
- self.assertEqual(ec.max_bad_records, 17)
- self.assertEqual(ec.source_uris, self.SOURCE_URIS)
-
- def test_to_api_repr_source_format(self):
- ec = ExternalConfig('CSV')
- got = ec.to_api_repr()
- want = {'sourceFormat': 'CSV'}
- self.assertEqual(got, want)
-
- def test_api_repr_sheets(self):
- from google.cloud.bigquery.external_config import GoogleSheetsOptions
-
- resource = _copy_and_update(self.BASE_RESOURCE, {
- 'sourceFormat': 'GOOGLE_SHEETS',
- 'googleSheetsOptions': {'skipLeadingRows': '123'},
- })
-
- ec = ExternalConfig.from_api_repr(resource)
-
- self._verify_base(ec)
- self.assertEqual(ec.source_format, 'GOOGLE_SHEETS')
- self.assertIsInstance(ec.options, GoogleSheetsOptions)
- self.assertEqual(ec.options.skip_leading_rows, 123)
-
- got_resource = ec.to_api_repr()
-
- self.assertEqual(got_resource, resource)
-
- del resource['googleSheetsOptions']['skipLeadingRows']
- ec = ExternalConfig.from_api_repr(resource)
- self.assertIsNone(ec.options.skip_leading_rows)
- got_resource = ec.to_api_repr()
- self.assertEqual(got_resource, resource)
-
- def test_api_repr_csv(self):
- from google.cloud.bigquery.external_config import CSVOptions
-
- resource = _copy_and_update(self.BASE_RESOURCE, {
- 'sourceFormat': 'CSV',
- 'csvOptions': {
- 'fieldDelimiter': 'fieldDelimiter',
- 'skipLeadingRows': '123',
- 'quote': 'quote',
- 'allowQuotedNewlines': True,
- 'allowJaggedRows': False,
- 'encoding': 'encoding',
- },
- })
-
- ec = ExternalConfig.from_api_repr(resource)
-
- self._verify_base(ec)
- self.assertEqual(ec.source_format, 'CSV')
- self.assertIsInstance(ec.options, CSVOptions)
- self.assertEqual(ec.options.field_delimiter, 'fieldDelimiter')
- self.assertEqual(ec.options.skip_leading_rows, 123)
- self.assertEqual(ec.options.quote_character, 'quote')
- self.assertEqual(ec.options.allow_quoted_newlines, True)
- self.assertEqual(ec.options.allow_jagged_rows, False)
- self.assertEqual(ec.options.encoding, 'encoding')
-
- got_resource = ec.to_api_repr()
-
- self.assertEqual(got_resource, resource)
-
- del resource['csvOptions']['skipLeadingRows']
- ec = ExternalConfig.from_api_repr(resource)
- self.assertIsNone(ec.options.skip_leading_rows)
- got_resource = ec.to_api_repr()
- self.assertEqual(got_resource, resource)
-
- def test_api_repr_bigtable(self):
- from google.cloud.bigquery.external_config import BigtableOptions
- from google.cloud.bigquery.external_config import BigtableColumnFamily
-
- QUALIFIER_ENCODED = base64.standard_b64encode(b'q').decode('ascii')
- resource = _copy_and_update(self.BASE_RESOURCE, {
- 'sourceFormat': 'BIGTABLE',
- 'bigtableOptions': {
- 'ignoreUnspecifiedColumnFamilies': True,
- 'readRowkeyAsString': False,
- 'columnFamilies': [
- {
- 'familyId': 'familyId',
- 'type': 'type',
- 'encoding': 'encoding',
- 'columns': [
- {
- 'qualifierString': 'q',
- 'fieldName': 'fieldName1',
- 'type': 'type1',
- 'encoding': 'encoding1',
- 'onlyReadLatest': True,
- },
- {
- 'qualifierEncoded': QUALIFIER_ENCODED,
- 'fieldName': 'fieldName2',
- 'type': 'type2',
- 'encoding': 'encoding2',
- },
-
- ],
- 'onlyReadLatest': False,
- }
- ],
- },
- })
-
- ec = ExternalConfig.from_api_repr(resource)
-
- self._verify_base(ec)
- self.assertEqual(ec.source_format, 'BIGTABLE')
- self.assertIsInstance(ec.options, BigtableOptions)
- self.assertEqual(ec.options.ignore_unspecified_column_families, True)
- self.assertEqual(ec.options.read_rowkey_as_string, False)
- self.assertEqual(len(ec.options.column_families), 1)
- fam1 = ec.options.column_families[0]
- self.assertIsInstance(fam1, BigtableColumnFamily)
- self.assertEqual(fam1.family_id, 'familyId')
- self.assertEqual(fam1.type_, 'type')
- self.assertEqual(fam1.encoding, 'encoding')
- self.assertEqual(len(fam1.columns), 2)
- col1 = fam1.columns[0]
- self.assertEqual(col1.qualifier_string, 'q')
- self.assertEqual(col1.field_name, 'fieldName1')
- self.assertEqual(col1.type_, 'type1')
- self.assertEqual(col1.encoding, 'encoding1')
- col2 = ec.options.column_families[0].columns[1]
- self.assertEqual(col2.qualifier_encoded, b'q')
- self.assertEqual(col2.field_name, 'fieldName2')
- self.assertEqual(col2.type_, 'type2')
- self.assertEqual(col2.encoding, 'encoding2')
-
- got_resource = ec.to_api_repr()
-
- self.assertEqual(got_resource, resource)
-
-
-def _copy_and_update(d, u):
- d = copy.deepcopy(d)
- d.update(u)
- return d
diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py
deleted file mode 100644
index 34c5c92..0000000
--- a/bigquery/tests/unit/test_job.py
+++ /dev/null
@@ -1,2937 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import copy
-
-from six.moves import http_client
-import unittest
-
-from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig
-from google.cloud.bigquery.job import LoadJobConfig
-from google.cloud.bigquery.dataset import DatasetReference
-
-import mock
-
-
-def _make_credentials():
- import google.auth.credentials
-
- return mock.Mock(spec=google.auth.credentials.Credentials)
-
-
-def _make_client(project='test-project', connection=None):
- from google.cloud.bigquery.client import Client
-
- if connection is None:
- connection = _Connection()
-
- client = Client(
- project=project, credentials=_make_credentials(), _http=object())
- client._connection = connection
- return client
-
-
-class Test__int_or_none(unittest.TestCase):
-
- def _call_fut(self, *args, **kwargs):
- from google.cloud.bigquery import job
-
- return job._int_or_none(*args, **kwargs)
-
- def test_w_int(self):
- self.assertEqual(self._call_fut(13), 13)
-
- def test_w_none(self):
- self.assertIsNone(self._call_fut(None))
-
- def test_w_str(self):
- self.assertEqual(self._call_fut('13'), 13)
-
-
-class Test__error_result_to_exception(unittest.TestCase):
-
- def _call_fut(self, *args, **kwargs):
- from google.cloud.bigquery import job
-
- return job._error_result_to_exception(*args, **kwargs)
-
- def test_simple(self):
- error_result = {
- 'reason': 'invalid',
- 'message': 'bad request'
- }
- exception = self._call_fut(error_result)
- self.assertEqual(exception.code, http_client.BAD_REQUEST)
- self.assertTrue(exception.message.startswith('bad request'))
- self.assertIn(error_result, exception.errors)
-
- def test_missing_reason(self):
- error_result = {}
- exception = self._call_fut(error_result)
- self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR)
-
-
-class _Base(object):
- from google.cloud.bigquery.dataset import DatasetReference
- from google.cloud.bigquery.table import TableReference
-
- PROJECT = 'project'
- SOURCE1 = 'http://example.com/source1.csv'
- DS_ID = 'datset_id'
- DS_REF = DatasetReference(PROJECT, DS_ID)
- TABLE_ID = 'table_id'
- TABLE_REF = TableReference(DS_REF, TABLE_ID)
- JOB_ID = 'JOB_ID'
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def _setUpConstants(self):
- import datetime
- from google.cloud._helpers import UTC
-
- self.WHEN_TS = 1437767599.006
- self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(
- tzinfo=UTC)
- self.ETAG = 'ETAG'
- self.FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID)
- self.RESOURCE_URL = 'http://example.com/path/to/resource'
- self.USER_EMAIL = 'phred@example.com'
-
- def _table_ref(self, table_id):
- from google.cloud.bigquery.table import TableReference
-
- return TableReference(self.DS_REF, table_id)
-
- def _makeResource(self, started=False, ended=False):
- self._setUpConstants()
- resource = {
- 'configuration': {
- self.JOB_TYPE: {
- },
- },
- 'statistics': {
- 'creationTime': self.WHEN_TS * 1000,
- self.JOB_TYPE: {
- }
- },
- 'etag': self.ETAG,
- 'id': self.FULL_JOB_ID,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'selfLink': self.RESOURCE_URL,
- 'user_email': self.USER_EMAIL,
- }
-
- if started or ended:
- resource['statistics']['startTime'] = self.WHEN_TS * 1000
-
- if ended:
- resource['statistics']['endTime'] = (self.WHEN_TS + 1000) * 1000
-
- if self.JOB_TYPE == 'query':
- resource['configuration']['query']['destinationTable'] = {
- 'projectId': self.PROJECT,
- 'datasetId': '_temp_dataset',
- 'tableId': '_temp_table',
- }
-
- return resource
-
- def _verifyInitialReadonlyProperties(self, job):
- # root elements of resource
- self.assertIsNone(job.etag)
- self.assertIsNone(job.self_link)
- self.assertIsNone(job.user_email)
-
- # derived from resource['statistics']
- self.assertIsNone(job.created)
- self.assertIsNone(job.started)
- self.assertIsNone(job.ended)
-
- # derived from resource['status']
- self.assertIsNone(job.error_result)
- self.assertIsNone(job.errors)
- self.assertIsNone(job.state)
-
- def _verifyReadonlyResourceProperties(self, job, resource):
- from datetime import timedelta
-
- statistics = resource.get('statistics', {})
-
- if 'creationTime' in statistics:
- self.assertEqual(job.created, self.WHEN)
- else:
- self.assertIsNone(job.created)
-
- if 'startTime' in statistics:
- self.assertEqual(job.started, self.WHEN)
- else:
- self.assertIsNone(job.started)
-
- if 'endTime' in statistics:
- self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000))
- else:
- self.assertIsNone(job.ended)
-
- if 'etag' in resource:
- self.assertEqual(job.etag, self.ETAG)
- else:
- self.assertIsNone(job.etag)
-
- if 'selfLink' in resource:
- self.assertEqual(job.self_link, self.RESOURCE_URL)
- else:
- self.assertIsNone(job.self_link)
-
- if 'user_email' in resource:
- self.assertEqual(job.user_email, self.USER_EMAIL)
- else:
- self.assertIsNone(job.user_email)
-
-
-class TestLoadJob(unittest.TestCase, _Base):
- JOB_TYPE = 'load'
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.job import LoadJob
-
- return LoadJob
-
- def _setUpConstants(self):
- super(TestLoadJob, self)._setUpConstants()
- self.INPUT_FILES = 2
- self.INPUT_BYTES = 12345
- self.OUTPUT_BYTES = 23456
- self.OUTPUT_ROWS = 345
-
- def _makeResource(self, started=False, ended=False):
- resource = super(TestLoadJob, self)._makeResource(
- started, ended)
- config = resource['configuration']['load']
- config['sourceUris'] = [self.SOURCE1]
- config['destinationTable'] = {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- }
-
- if ended:
- resource['status'] = {'state': 'DONE'}
- resource['statistics']['load']['inputFiles'] = self.INPUT_FILES
- resource['statistics']['load']['inputFileBytes'] = self.INPUT_BYTES
- resource['statistics']['load']['outputBytes'] = self.OUTPUT_BYTES
- resource['statistics']['load']['outputRows'] = self.OUTPUT_ROWS
-
- return resource
-
- def _verifyBooleanConfigProperties(self, job, config):
- if 'allowJaggedRows' in config:
- self.assertEqual(job.allow_jagged_rows,
- config['allowJaggedRows'])
- else:
- self.assertIsNone(job.allow_jagged_rows)
- if 'allowQuotedNewlines' in config:
- self.assertEqual(job.allow_quoted_newlines,
- config['allowQuotedNewlines'])
- else:
- self.assertIsNone(job.allow_quoted_newlines)
- if 'autodetect' in config:
- self.assertEqual(
- job.autodetect, config['autodetect'])
- else:
- self.assertIsNone(job.autodetect)
- if 'ignoreUnknownValues' in config:
- self.assertEqual(job.ignore_unknown_values,
- config['ignoreUnknownValues'])
- else:
- self.assertIsNone(job.ignore_unknown_values)
-
- def _verifyEnumConfigProperties(self, job, config):
- if 'createDisposition' in config:
- self.assertEqual(job.create_disposition,
- config['createDisposition'])
- else:
- self.assertIsNone(job.create_disposition)
- if 'encoding' in config:
- self.assertEqual(job.encoding,
- config['encoding'])
- else:
- self.assertIsNone(job.encoding)
- if 'sourceFormat' in config:
- self.assertEqual(job.source_format,
- config['sourceFormat'])
- else:
- self.assertIsNone(job.source_format)
- if 'writeDisposition' in config:
- self.assertEqual(job.write_disposition,
- config['writeDisposition'])
- else:
- self.assertIsNone(job.write_disposition)
-
- def _verifyResourceProperties(self, job, resource):
- self._verifyReadonlyResourceProperties(job, resource)
-
- config = resource.get('configuration', {}).get('load')
-
- self._verifyBooleanConfigProperties(job, config)
- self._verifyEnumConfigProperties(job, config)
-
- self.assertEqual(job.source_uris, config['sourceUris'])
-
- table_ref = config['destinationTable']
- self.assertEqual(job.destination.project, table_ref['projectId'])
- self.assertEqual(job.destination.dataset_id, table_ref['datasetId'])
- self.assertEqual(job.destination.table_id, table_ref['tableId'])
-
- if 'fieldDelimiter' in config:
- self.assertEqual(job.field_delimiter,
- config['fieldDelimiter'])
- else:
- self.assertIsNone(job.field_delimiter)
- if 'maxBadRecords' in config:
- self.assertEqual(job.max_bad_records,
- config['maxBadRecords'])
- else:
- self.assertIsNone(job.max_bad_records)
- if 'nullMarker' in config:
- self.assertEqual(job.null_marker,
- config['nullMarker'])
- else:
- self.assertIsNone(job.null_marker)
- if 'quote' in config:
- self.assertEqual(job.quote_character,
- config['quote'])
- else:
- self.assertIsNone(job.quote_character)
- if 'skipLeadingRows' in config:
- self.assertEqual(str(job.skip_leading_rows),
- config['skipLeadingRows'])
- else:
- self.assertIsNone(job.skip_leading_rows)
-
- def test_ctor(self):
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
- client)
- self.assertIs(job.destination, self.TABLE_REF)
- self.assertEqual(list(job.source_uris), [self.SOURCE1])
- self.assertIs(job._client, client)
- self.assertEqual(job.job_type, self.JOB_TYPE)
- self.assertEqual(
- job.path,
- '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
- self.assertEqual(job.schema, [])
-
- self._verifyInitialReadonlyProperties(job)
-
- # derived from resource['statistics']['load']
- self.assertIsNone(job.input_file_bytes)
- self.assertIsNone(job.input_files)
- self.assertIsNone(job.output_bytes)
- self.assertIsNone(job.output_rows)
-
- # set/read from resource['configuration']['load']
- self.assertIsNone(job.allow_jagged_rows)
- self.assertIsNone(job.allow_quoted_newlines)
- self.assertIsNone(job.autodetect)
- self.assertIsNone(job.create_disposition)
- self.assertIsNone(job.encoding)
- self.assertIsNone(job.field_delimiter)
- self.assertIsNone(job.ignore_unknown_values)
- self.assertIsNone(job.max_bad_records)
- self.assertIsNone(job.null_marker)
- self.assertIsNone(job.quote_character)
- self.assertIsNone(job.skip_leading_rows)
- self.assertIsNone(job.source_format)
- self.assertIsNone(job.write_disposition)
-
- def test_ctor_w_config(self):
- from google.cloud.bigquery.schema import SchemaField
-
- client = _make_client(project=self.PROJECT)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- config = LoadJobConfig()
- config.schema = [full_name, age]
- job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
- client, config)
- self.assertEqual(job.schema, [full_name, age])
-
- def test_done(self):
- client = _make_client(project=self.PROJECT)
- resource = self._makeResource(ended=True)
- job = self._get_target_class().from_api_repr(resource, client)
- self.assertTrue(job.done())
-
- def test_result(self):
- client = _make_client(project=self.PROJECT)
- resource = self._makeResource(ended=True)
- job = self._get_target_class().from_api_repr(resource, client)
-
- result = job.result()
-
- self.assertIs(result, job)
-
- def test_result_invokes_begin(self):
- begun_resource = self._makeResource()
- done_resource = copy.deepcopy(begun_resource)
- done_resource['status'] = {'state': 'DONE'}
- connection = _Connection(begun_resource, done_resource)
- client = _make_client(self.PROJECT)
- client._connection = connection
-
- job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
- client)
- job.result()
-
- self.assertEqual(len(connection._requested), 2)
- begin_request, reload_request = connection._requested
- self.assertEqual(begin_request['method'], 'POST')
- self.assertEqual(reload_request['method'], 'GET')
-
- def test_schema_setter_non_list(self):
- config = LoadJobConfig()
- with self.assertRaises(TypeError):
- config.schema = object()
-
- def test_schema_setter_invalid_field(self):
- from google.cloud.bigquery.schema import SchemaField
-
- config = LoadJobConfig()
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- with self.assertRaises(ValueError):
- config.schema = [full_name, object()]
-
- def test_schema_setter(self):
- from google.cloud.bigquery.schema import SchemaField
-
- config = LoadJobConfig()
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- config.schema = [full_name, age]
- self.assertEqual(config.schema, [full_name, age])
-
- def test_props_set_by_server(self):
- import datetime
- from google.cloud._helpers import UTC
- from google.cloud._helpers import _millis
-
- CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC)
- STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC)
- ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC)
- FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID)
- URL = 'http://example.com/projects/%s/jobs/%s' % (
- self.PROJECT, self.JOB_ID)
- EMAIL = 'phred@example.com'
- ERROR_RESULT = {'debugInfo': 'DEBUG',
- 'location': 'LOCATION',
- 'message': 'MESSAGE',
- 'reason': 'REASON'}
-
- client = _make_client(project=self.PROJECT)
- table = _Table()
- job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
- job._properties['etag'] = 'ETAG'
- job._properties['id'] = FULL_JOB_ID
- job._properties['selfLink'] = URL
- job._properties['user_email'] = EMAIL
-
- statistics = job._properties['statistics'] = {}
- statistics['creationTime'] = _millis(CREATED)
- statistics['startTime'] = _millis(STARTED)
- statistics['endTime'] = _millis(ENDED)
- load_stats = statistics['load'] = {}
- load_stats['inputFileBytes'] = 12345
- load_stats['inputFiles'] = 1
- load_stats['outputBytes'] = 23456
- load_stats['outputRows'] = 345
-
- self.assertEqual(job.etag, 'ETAG')
- self.assertEqual(job.self_link, URL)
- self.assertEqual(job.user_email, EMAIL)
-
- self.assertEqual(job.created, CREATED)
- self.assertEqual(job.started, STARTED)
- self.assertEqual(job.ended, ENDED)
-
- self.assertEqual(job.input_file_bytes, 12345)
- self.assertEqual(job.input_files, 1)
- self.assertEqual(job.output_bytes, 23456)
- self.assertEqual(job.output_rows, 345)
-
- status = job._properties['status'] = {}
-
- self.assertIsNone(job.error_result)
- self.assertIsNone(job.errors)
- self.assertIsNone(job.state)
-
- status['errorResult'] = ERROR_RESULT
- status['errors'] = [ERROR_RESULT]
- status['state'] = 'STATE'
-
- self.assertEqual(job.error_result, ERROR_RESULT)
- self.assertEqual(job.errors, [ERROR_RESULT])
- self.assertEqual(job.state, 'STATE')
-
- def test_from_api_repr_missing_identity(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {}
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_missing_config(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': '%s:%s' % (self.PROJECT, self.JOB_ID),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- }
- }
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_bare(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': self.FULL_JOB_ID,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'load': {
- 'sourceUris': [self.SOURCE1],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- },
- }
- },
- }
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_from_api_repr_w_properties(self):
- client = _make_client(project=self.PROJECT)
- RESOURCE = self._makeResource()
- load_config = RESOURCE['configuration']['load']
- load_config['createDisposition'] = 'CREATE_IF_NEEDED'
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_begin_w_already_running(self):
- conn = _Connection()
- client = _make_client(project=self.PROJECT, connection=conn)
- job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
- client)
- job._properties['status'] = {'state': 'RUNNING'}
-
- with self.assertRaises(ValueError):
- job.begin()
-
- def test_begin_w_bound_client(self):
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
- client)
-
- job.begin()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'load': {
- 'sourceUris': [self.SOURCE1],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- },
- },
- },
- }
- self.assertEqual(req['data'], SENT)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_begin_w_autodetect(self):
- path = '/projects/{}/jobs'.format(self.PROJECT)
- resource = self._makeResource()
- resource['configuration']['load']['autodetect'] = True
- # Ensure None for missing server-set props
- del resource['statistics']['creationTime']
- del resource['etag']
- del resource['selfLink']
- del resource['user_email']
- conn = _Connection(resource)
- client = _make_client(project=self.PROJECT, connection=conn)
- config = LoadJobConfig()
- config.autodetect = True
- job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
- client, config)
- job.begin()
-
- sent = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'load': {
- 'sourceUris': [self.SOURCE1],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- },
- 'autodetect': True
- },
- },
- }
- expected_request = {
- 'method': 'POST',
- 'path': path,
- 'data': sent,
- }
- self.assertEqual(conn._requested, [expected_request])
- self._verifyResourceProperties(job, resource)
-
- def test_begin_w_alternate_client(self):
- from google.cloud.bigquery.schema import SchemaField
-
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource(ended=True)
- LOAD_CONFIGURATION = {
- 'sourceUris': [self.SOURCE1],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_ID,
- },
- 'allowJaggedRows': True,
- 'allowQuotedNewlines': True,
- 'createDisposition': 'CREATE_NEVER',
- 'encoding': 'ISO-8559-1',
- 'fieldDelimiter': '|',
- 'ignoreUnknownValues': True,
- 'maxBadRecords': 100,
- 'nullMarker': r'\N',
- 'quote': "'",
- 'skipLeadingRows': '1',
- 'sourceFormat': 'CSV',
- 'writeDisposition': 'WRITE_TRUNCATE',
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'},
- ]}
- }
- RESOURCE['configuration']['load'] = LOAD_CONFIGURATION
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- config = LoadJobConfig()
- config.schema = [full_name, age]
- job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
- client1, config)
- config.allow_jagged_rows = True
- config.allow_quoted_newlines = True
- config.create_disposition = 'CREATE_NEVER'
- config.encoding = 'ISO-8559-1'
- config.field_delimiter = '|'
- config.ignore_unknown_values = True
- config.max_bad_records = 100
- config.null_marker = r'\N'
- config.quote_character = "'"
- config.skip_leading_rows = 1
- config.source_format = 'CSV'
- config.write_disposition = 'WRITE_TRUNCATE'
-
- job.begin(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'load': LOAD_CONFIGURATION,
- },
- }
- self.maxDiff = None
- self.assertEqual(req['data'], SENT)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_exists_miss_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn = _Connection()
- client = _make_client(project=self.PROJECT, connection=conn)
- table = _Table()
- job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
-
- self.assertFalse(job.exists())
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_exists_hit_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection({})
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- table = _Table()
- job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1)
-
- self.assertTrue(job.exists(client=client2))
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_reload_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource()
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- table = _Table()
- job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
-
- job.reload()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_reload_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource()
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- table = _Table()
- job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1)
-
- job.reload(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_cancel_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource(ended=True)
- RESPONSE = {'job': RESOURCE}
- conn = _Connection(RESPONSE)
- client = _make_client(project=self.PROJECT, connection=conn)
- table = _Table()
- job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
-
- job.cancel()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_cancel_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource(ended=True)
- RESPONSE = {'job': RESOURCE}
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESPONSE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- table = _Table()
- job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1)
-
- job.cancel(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
-
-class TestCopyJob(unittest.TestCase, _Base):
- JOB_TYPE = 'copy'
- SOURCE_TABLE = 'source_table'
- DESTINATION_TABLE = 'destination_table'
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.job import CopyJob
-
- return CopyJob
-
- def _makeResource(self, started=False, ended=False):
- resource = super(TestCopyJob, self)._makeResource(
- started, ended)
- config = resource['configuration']['copy']
- config['sourceTables'] = [{
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE,
- }]
- config['destinationTable'] = {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.DESTINATION_TABLE,
- }
-
- return resource
-
- def _verifyResourceProperties(self, job, resource):
- self._verifyReadonlyResourceProperties(job, resource)
-
- config = resource.get('configuration', {}).get('copy')
-
- table_ref = config['destinationTable']
- self.assertEqual(job.destination.project, table_ref['projectId'])
- self.assertEqual(job.destination.dataset_id, table_ref['datasetId'])
- self.assertEqual(job.destination.table_id, table_ref['tableId'])
-
- sources = config.get('sourceTables')
- if sources is None:
- sources = [config['sourceTable']]
- self.assertEqual(len(sources), len(job.sources))
- for table_ref, table in zip(sources, job.sources):
- self.assertEqual(table.project, table_ref['projectId'])
- self.assertEqual(table.dataset_id, table_ref['datasetId'])
- self.assertEqual(table.table_id, table_ref['tableId'])
-
- if 'createDisposition' in config:
- self.assertEqual(job.create_disposition,
- config['createDisposition'])
- else:
- self.assertIsNone(job.create_disposition)
-
- if 'writeDisposition' in config:
- self.assertEqual(job.write_disposition,
- config['writeDisposition'])
- else:
- self.assertIsNone(job.write_disposition)
-
- def test_ctor(self):
- client = _make_client(project=self.PROJECT)
- source = self._table_ref(self.SOURCE_TABLE)
- destination = self._table_ref(self.DESTINATION_TABLE)
- job = self._make_one(self.JOB_ID, [source], destination, client)
- self.assertIs(job.destination, destination)
- self.assertEqual(job.sources, [source])
- self.assertIs(job._client, client)
- self.assertEqual(job.job_type, self.JOB_TYPE)
- self.assertEqual(
- job.path,
- '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
-
- self._verifyInitialReadonlyProperties(job)
-
- # set/read from resource['configuration']['copy']
- self.assertIsNone(job.create_disposition)
- self.assertIsNone(job.write_disposition)
-
- def test_from_api_repr_missing_identity(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {}
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_missing_config(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- }
- }
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_bare(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': self.JOB_ID,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'copy': {
- 'sourceTables': [{
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE,
- }],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.DESTINATION_TABLE,
- },
- }
- },
- }
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_from_api_repr_w_sourcetable(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': self.JOB_ID,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'copy': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE,
- },
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.DESTINATION_TABLE,
- },
- }
- },
- }
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_from_api_repr_wo_sources(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': self.JOB_ID,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'copy': {
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.DESTINATION_TABLE,
- },
- }
- },
- }
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_w_properties(self):
- client = _make_client(project=self.PROJECT)
- RESOURCE = self._makeResource()
- copy_config = RESOURCE['configuration']['copy']
- copy_config['createDisposition'] = 'CREATE_IF_NEEDED'
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_begin_w_bound_client(self):
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- source = self._table_ref(self.SOURCE_TABLE)
- destination = self._table_ref(self.DESTINATION_TABLE)
- job = self._make_one(self.JOB_ID, [source], destination, client)
-
- job.begin()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'copy': {
- 'sourceTables': [{
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE
- }],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.DESTINATION_TABLE,
- },
- },
- },
- }
- self.assertEqual(req['data'], SENT)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_begin_w_alternate_client(self):
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource(ended=True)
- COPY_CONFIGURATION = {
- 'sourceTables': [{
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE,
- }],
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.DESTINATION_TABLE,
- },
- 'createDisposition': 'CREATE_NEVER',
- 'writeDisposition': 'WRITE_TRUNCATE',
- }
- RESOURCE['configuration']['copy'] = COPY_CONFIGURATION
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- source = self._table_ref(self.SOURCE_TABLE)
- destination = self._table_ref(self.DESTINATION_TABLE)
- config = CopyJobConfig()
- config.create_disposition = 'CREATE_NEVER'
- config.write_disposition = 'WRITE_TRUNCATE'
- job = self._make_one(self.JOB_ID, [source], destination, client1,
- config)
- job.begin(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'copy': COPY_CONFIGURATION,
- },
- }
- self.assertEqual(req['data'], SENT)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_exists_miss_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn = _Connection()
- client = _make_client(project=self.PROJECT, connection=conn)
-
- source = self._table_ref(self.SOURCE_TABLE)
- destination = self._table_ref(self.DESTINATION_TABLE)
- job = self._make_one(self.JOB_ID, [source], destination, client)
-
- self.assertFalse(job.exists())
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_exists_hit_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection({})
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- source = self._table_ref(self.SOURCE_TABLE)
- destination = self._table_ref(self.DESTINATION_TABLE)
- job = self._make_one(self.JOB_ID, [source], destination, client1)
-
- self.assertTrue(job.exists(client=client2))
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_reload_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource()
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- source = self._table_ref(self.SOURCE_TABLE)
- destination = self._table_ref(self.DESTINATION_TABLE)
- job = self._make_one(self.JOB_ID, [source], destination, client)
-
- job.reload()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_reload_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource()
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- source = self._table_ref(self.SOURCE_TABLE)
- destination = self._table_ref(self.DESTINATION_TABLE)
- job = self._make_one(self.JOB_ID, [source], destination, client1)
-
- job.reload(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
-
-class TestExtractJob(unittest.TestCase, _Base):
- JOB_TYPE = 'extract'
- SOURCE_TABLE = 'source_table'
- DESTINATION_URI = 'gs://bucket_name/object_name'
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.job import ExtractJob
-
- return ExtractJob
-
- def _makeResource(self, started=False, ended=False):
- resource = super(TestExtractJob, self)._makeResource(
- started, ended)
- config = resource['configuration']['extract']
- config['sourceTable'] = {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE,
- }
- config['destinationUris'] = [self.DESTINATION_URI]
- return resource
-
- def _verifyResourceProperties(self, job, resource):
- self._verifyReadonlyResourceProperties(job, resource)
-
- config = resource.get('configuration', {}).get('extract')
-
- self.assertEqual(job.destination_uris, config['destinationUris'])
-
- table_ref = config['sourceTable']
- self.assertEqual(job.source.project, table_ref['projectId'])
- self.assertEqual(job.source.dataset_id, table_ref['datasetId'])
- self.assertEqual(job.source.table_id, table_ref['tableId'])
-
- if 'compression' in config:
- self.assertEqual(
- job.compression, config['compression'])
- else:
- self.assertIsNone(job.compression)
-
- if 'destinationFormat' in config:
- self.assertEqual(
- job.destination_format, config['destinationFormat'])
- else:
- self.assertIsNone(job.destination_format)
-
- if 'fieldDelimiter' in config:
- self.assertEqual(
- job.field_delimiter, config['fieldDelimiter'])
- else:
- self.assertIsNone(job.field_delimiter)
-
- if 'printHeader' in config:
- self.assertEqual(
- job.print_header, config['printHeader'])
- else:
- self.assertIsNone(job.print_header)
-
- def test_ctor(self):
- client = _make_client(project=self.PROJECT)
- source = _Table(self.SOURCE_TABLE)
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client)
- self.assertEqual(job.source, source)
- self.assertEqual(job.destination_uris, [self.DESTINATION_URI])
- self.assertIs(job._client, client)
- self.assertEqual(job.job_type, self.JOB_TYPE)
- self.assertEqual(
- job.path,
- '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
-
- self._verifyInitialReadonlyProperties(job)
-
- # set/read from resource['configuration']['extract']
- self.assertIsNone(job.compression)
- self.assertIsNone(job.destination_format)
- self.assertIsNone(job.field_delimiter)
- self.assertIsNone(job.print_header)
-
- def test_destination_uri_file_counts(self):
- file_counts = 23
- client = _make_client(project=self.PROJECT)
- source = _Table(self.SOURCE_TABLE)
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client)
- self.assertIsNone(job.destination_uri_file_counts)
-
- statistics = job._properties['statistics'] = {}
- self.assertIsNone(job.destination_uri_file_counts)
-
- extract_stats = statistics['extract'] = {}
- self.assertIsNone(job.destination_uri_file_counts)
-
- extract_stats['destinationUriFileCounts'] = str(file_counts)
- self.assertEqual(job.destination_uri_file_counts, file_counts)
-
- def test_from_api_repr_missing_identity(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {}
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_missing_config(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- }
- }
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_bare(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': self.JOB_ID,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'extract': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE,
- },
- 'destinationUris': [self.DESTINATION_URI],
- }
- },
- }
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_from_api_repr_w_properties(self):
- client = _make_client(project=self.PROJECT)
- RESOURCE = self._makeResource()
- extract_config = RESOURCE['configuration']['extract']
- extract_config['compression'] = 'GZIP'
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_begin_w_bound_client(self):
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
- source = source_dataset.table(self.SOURCE_TABLE)
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client)
-
- job.begin()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'extract': {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE
- },
- 'destinationUris': [self.DESTINATION_URI],
- },
- },
- }
- self.assertEqual(req['data'], SENT)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_begin_w_alternate_client(self):
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource(ended=True)
- EXTRACT_CONFIGURATION = {
- 'sourceTable': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.SOURCE_TABLE,
- },
- 'destinationUris': [self.DESTINATION_URI],
- 'compression': 'GZIP',
- 'destinationFormat': 'NEWLINE_DELIMITED_JSON',
- 'fieldDelimiter': '|',
- 'printHeader': False,
- }
- RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
- source = source_dataset.table(self.SOURCE_TABLE)
- job_config = ExtractJobConfig()
- job_config.compression = 'GZIP'
- job_config.destination_format = 'NEWLINE_DELIMITED_JSON'
- job_config.field_delimiter = '|'
- job_config.print_header = False
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client1, job_config)
-
- job.begin(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'extract': EXTRACT_CONFIGURATION,
- },
- }
- self.assertEqual(req['data'], SENT)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_exists_miss_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn = _Connection()
- client = _make_client(project=self.PROJECT, connection=conn)
- source = _Table(self.SOURCE_TABLE)
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client)
-
- self.assertFalse(job.exists())
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_exists_hit_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection({})
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- source = _Table(self.SOURCE_TABLE)
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client1)
-
- self.assertTrue(job.exists(client=client2))
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_reload_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource()
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
- source = source_dataset.table(self.SOURCE_TABLE)
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client)
-
- job.reload()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_reload_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- RESOURCE = self._makeResource()
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
- source = source_dataset.table(self.SOURCE_TABLE)
- job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
- client1)
-
- job.reload(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
-
-class TestQueryJobConfig(unittest.TestCase, _Base):
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.job import QueryJobConfig
-
- return QueryJobConfig
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- config = self._make_one()
- self.assertEqual(config._properties, {})
-
- def test_from_api_repr_empty(self):
- klass = self._get_target_class()
- config = klass.from_api_repr({})
- self.assertIsNone(config.dry_run)
- self.assertIsNone(config.use_legacy_sql)
- self.assertIsNone(config.default_dataset)
-
- def test_from_api_repr_normal(self):
- resource = {
- 'useLegacySql': True,
- 'query': 'no property for me',
- 'defaultDataset': {
- 'projectId': 'someproject',
- 'datasetId': 'somedataset',
- },
- 'someNewProperty': 'I should be saved, too.',
- }
- klass = self._get_target_class()
-
- config = klass.from_api_repr(resource)
-
- self.assertTrue(config.use_legacy_sql)
- self.assertEqual(
- config.default_dataset,
- DatasetReference('someproject', 'somedataset'))
- # Make sure unknown properties propagate.
- self.assertEqual(config._properties['query'], 'no property for me')
- self.assertEqual(
- config._properties['someNewProperty'], 'I should be saved, too.')
-
- def test_to_api_repr_normal(self):
- config = self._make_one()
- config.use_legacy_sql = True
- config.default_dataset = DatasetReference(
- 'someproject', 'somedataset')
- config._properties['someNewProperty'] = 'Woohoo, alpha stuff.'
-
- resource = config.to_api_repr()
-
- self.assertTrue(resource['useLegacySql'])
- self.assertEqual(
- resource['defaultDataset']['projectId'], 'someproject')
- self.assertEqual(
- resource['defaultDataset']['datasetId'], 'somedataset')
- # Make sure unknown properties propagate.
- self.assertEqual(
- config._properties['someNewProperty'], 'Woohoo, alpha stuff.')
-
-
-class TestQueryJob(unittest.TestCase, _Base):
- JOB_TYPE = 'query'
- QUERY = 'select count(*) from persons'
- DESTINATION_TABLE = 'destination_table'
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.job import QueryJob
-
- return QueryJob
-
- def _makeResource(self, started=False, ended=False):
- resource = super(TestQueryJob, self)._makeResource(
- started, ended)
- config = resource['configuration']['query']
- config['query'] = self.QUERY
-
- if ended:
- resource['status'] = {'state': 'DONE'}
-
- return resource
-
- def _verifyBooleanResourceProperties(self, job, config):
-
- if 'allowLargeResults' in config:
- self.assertEqual(job.allow_large_results,
- config['allowLargeResults'])
- else:
- self.assertIsNone(job.allow_large_results)
- if 'flattenResults' in config:
- self.assertEqual(job.flatten_results,
- config['flattenResults'])
- else:
- self.assertIsNone(job.flatten_results)
- if 'useQueryCache' in config:
- self.assertEqual(job.use_query_cache,
- config['useQueryCache'])
- else:
- self.assertIsNone(job.use_query_cache)
- if 'useLegacySql' in config:
- self.assertEqual(job.use_legacy_sql,
- config['useLegacySql'])
- else:
- self.assertIsNone(job.use_legacy_sql)
-
- def _verifyIntegerResourceProperties(self, job, config):
- if 'maximumBillingTier' in config:
- self.assertEqual(
- job.maximum_billing_tier, config['maximumBillingTier'])
- else:
- self.assertIsNone(job.maximum_billing_tier)
- if 'maximumBytesBilled' in config:
- self.assertEqual(
- str(job.maximum_bytes_billed), config['maximumBytesBilled'])
- self.assertIsInstance(job.maximum_bytes_billed, int)
- else:
- self.assertIsNone(job.maximum_bytes_billed)
-
- def _verify_udf_resources(self, job, config):
- udf_resources = config.get('userDefinedFunctionResources', ())
- self.assertEqual(len(job.udf_resources), len(udf_resources))
- for found, expected in zip(job.udf_resources, udf_resources):
- if 'resourceUri' in expected:
- self.assertEqual(found.udf_type, 'resourceUri')
- self.assertEqual(found.value, expected['resourceUri'])
- else:
- self.assertEqual(found.udf_type, 'inlineCode')
- self.assertEqual(found.value, expected['inlineCode'])
-
- def _verifyQueryParameters(self, job, config):
- query_parameters = config.get('queryParameters', ())
- self.assertEqual(len(job.query_parameters), len(query_parameters))
- for found, expected in zip(job.query_parameters, query_parameters):
- self.assertEqual(found.to_api_repr(), expected)
-
- def _verify_table_definitions(self, job, config):
- table_defs = config.get('tableDefinitions')
- if job.table_definitions is None:
- self.assertIsNone(table_defs)
- else:
- self.assertEqual(len(job.table_definitions), len(table_defs))
- for found_key, found_ec in job.table_definitions.items():
- expected_ec = table_defs.get(found_key)
- self.assertIsNotNone(expected_ec)
- self.assertEqual(found_ec.to_api_repr(), expected_ec)
-
- def _verify_configuration_properties(self, job, configuration):
- if 'dryRun' in configuration:
- self.assertEqual(job.dry_run,
- configuration['dryRun'])
- else:
- self.assertIsNone(job.dry_run)
-
- def _verifyResourceProperties(self, job, resource):
- self._verifyReadonlyResourceProperties(job, resource)
-
- configuration = resource.get('configuration', {})
- self._verify_configuration_properties(job, configuration)
-
- query_config = resource.get('configuration', {}).get('query')
- self._verifyBooleanResourceProperties(job, query_config)
- self._verifyIntegerResourceProperties(job, query_config)
- self._verify_udf_resources(job, query_config)
- self._verifyQueryParameters(job, query_config)
- self._verify_table_definitions(job, query_config)
-
- self.assertEqual(job.query, query_config['query'])
- if 'createDisposition' in query_config:
- self.assertEqual(job.create_disposition,
- query_config['createDisposition'])
- else:
- self.assertIsNone(job.create_disposition)
- if 'defaultDataset' in query_config:
- ds_ref = job.default_dataset
- ds_ref = {
- 'projectId': ds_ref.project,
- 'datasetId': ds_ref.dataset_id,
- }
- self.assertEqual(ds_ref, query_config['defaultDataset'])
- else:
- self.assertIsNone(job.default_dataset)
- if 'destinationTable' in query_config:
- table = job.destination
- tb_ref = {
- 'projectId': table.project,
- 'datasetId': table.dataset_id,
- 'tableId': table.table_id
- }
- self.assertEqual(tb_ref, query_config['destinationTable'])
- else:
- self.assertIsNone(job.destination)
- if 'priority' in query_config:
- self.assertEqual(job.priority,
- query_config['priority'])
- else:
- self.assertIsNone(job.priority)
- if 'writeDisposition' in query_config:
- self.assertEqual(job.write_disposition,
- query_config['writeDisposition'])
- else:
- self.assertIsNone(job.write_disposition)
-
- def test_ctor_defaults(self):
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertEqual(job.query, self.QUERY)
- self.assertIs(job._client, client)
- self.assertEqual(job.job_type, self.JOB_TYPE)
- self.assertEqual(
- job.path,
- '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
-
- self._verifyInitialReadonlyProperties(job)
-
- self.assertFalse(job.use_legacy_sql)
-
- # set/read from resource['configuration']['query']
- self.assertIsNone(job.allow_large_results)
- self.assertIsNone(job.create_disposition)
- self.assertIsNone(job.default_dataset)
- self.assertIsNone(job.destination)
- self.assertIsNone(job.flatten_results)
- self.assertIsNone(job.priority)
- self.assertIsNone(job.use_query_cache)
- self.assertIsNone(job.dry_run)
- self.assertIsNone(job.write_disposition)
- self.assertIsNone(job.maximum_billing_tier)
- self.assertIsNone(job.maximum_bytes_billed)
- self.assertIsNone(job.table_definitions)
-
- def test_ctor_w_udf_resources(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import UDFResource
-
- RESOURCE_URI = 'gs://some-bucket/js/lib.js'
- udf_resources = [UDFResource("resourceUri", RESOURCE_URI)]
- client = _make_client(project=self.PROJECT)
- config = QueryJobConfig()
- config.udf_resources = udf_resources
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=config)
- self.assertEqual(job.udf_resources, udf_resources)
-
- def test_ctor_w_query_parameters(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
- client = _make_client(project=self.PROJECT)
- config = QueryJobConfig()
- config.query_parameters = query_parameters
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=config)
- self.assertEqual(job.query_parameters, query_parameters)
-
- def test_from_api_repr_missing_identity(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {}
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_missing_config(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- }
- }
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE, client=client)
-
- def test_from_api_repr_bare(self):
- self._setUpConstants()
- client = _make_client(project=self.PROJECT)
- RESOURCE = {
- 'id': self.JOB_ID,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'query': {'query': self.QUERY},
- },
- }
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_from_api_repr_w_properties(self):
- client = _make_client(project=self.PROJECT)
- RESOURCE = self._makeResource()
- query_config = RESOURCE['configuration']['query']
- query_config['createDisposition'] = 'CREATE_IF_NEEDED'
- query_config['writeDisposition'] = 'WRITE_TRUNCATE'
- query_config['destinationTable'] = {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.DESTINATION_TABLE,
- }
- klass = self._get_target_class()
- job = klass.from_api_repr(RESOURCE, client=client)
- self.assertIs(job._client, client)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_cancelled(self):
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- job._properties['status'] = {
- 'state': 'DONE',
- 'errorResult': {
- 'reason': 'stopped'
- }
- }
-
- self.assertTrue(job.cancelled())
-
- def test_done(self):
- client = _make_client(project=self.PROJECT)
- resource = self._makeResource(ended=True)
- job = self._get_target_class().from_api_repr(resource, client)
- self.assertTrue(job.done())
-
- def test_query_plan(self):
- from google.cloud.bigquery.job import QueryPlanEntry
- from google.cloud.bigquery.job import QueryPlanEntryStep
-
- plan_entries = [{
- 'name': 'NAME',
- 'id': 1234,
- 'waitRatioAvg': 2.71828,
- 'waitRatioMax': 3.14159,
- 'readRatioAvg': 1.41421,
- 'readRatioMax': 1.73205,
- 'computeRatioAvg': 0.69315,
- 'computeRatioMax': 1.09861,
- 'writeRatioAvg': 3.32193,
- 'writeRatioMax': 2.30258,
- 'recordsRead': '100',
- 'recordsWritten': '1',
- 'status': 'STATUS',
- 'steps': [{
- 'kind': 'KIND',
- 'substeps': ['SUBSTEP1', 'SUBSTEP2'],
- }],
- }]
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertEqual(job.query_plan, [])
-
- statistics = job._properties['statistics'] = {}
- self.assertEqual(job.query_plan, [])
-
- query_stats = statistics['query'] = {}
- self.assertEqual(job.query_plan, [])
-
- query_stats['queryPlan'] = plan_entries
-
- self.assertEqual(len(job.query_plan), len(plan_entries))
- for found, expected in zip(job.query_plan, plan_entries):
- self.assertIsInstance(found, QueryPlanEntry)
- self.assertEqual(found.name, expected['name'])
- self.assertEqual(found.entry_id, expected['id'])
- self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg'])
- self.assertEqual(found.wait_ratio_max, expected['waitRatioMax'])
- self.assertEqual(found.read_ratio_avg, expected['readRatioAvg'])
- self.assertEqual(found.read_ratio_max, expected['readRatioMax'])
- self.assertEqual(
- found.compute_ratio_avg, expected['computeRatioAvg'])
- self.assertEqual(
- found.compute_ratio_max, expected['computeRatioMax'])
- self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg'])
- self.assertEqual(found.write_ratio_max, expected['writeRatioMax'])
- self.assertEqual(
- found.records_read, int(expected['recordsRead']))
- self.assertEqual(
- found.records_written, int(expected['recordsWritten']))
- self.assertEqual(found.status, expected['status'])
-
- self.assertEqual(len(found.steps), len(expected['steps']))
- for f_step, e_step in zip(found.steps, expected['steps']):
- self.assertIsInstance(f_step, QueryPlanEntryStep)
- self.assertEqual(f_step.kind, e_step['kind'])
- self.assertEqual(f_step.substeps, e_step['substeps'])
-
- def test_total_bytes_processed(self):
- total_bytes = 1234
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertIsNone(job.total_bytes_processed)
-
- statistics = job._properties['statistics'] = {}
- self.assertIsNone(job.total_bytes_processed)
-
- query_stats = statistics['query'] = {}
- self.assertIsNone(job.total_bytes_processed)
-
- query_stats['totalBytesProcessed'] = str(total_bytes)
- self.assertEqual(job.total_bytes_processed, total_bytes)
-
- def test_total_bytes_billed(self):
- total_bytes = 1234
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertIsNone(job.total_bytes_billed)
-
- statistics = job._properties['statistics'] = {}
- self.assertIsNone(job.total_bytes_billed)
-
- query_stats = statistics['query'] = {}
- self.assertIsNone(job.total_bytes_billed)
-
- query_stats['totalBytesBilled'] = str(total_bytes)
- self.assertEqual(job.total_bytes_billed, total_bytes)
-
- def test_billing_tier(self):
- billing_tier = 1
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertIsNone(job.billing_tier)
-
- statistics = job._properties['statistics'] = {}
- self.assertIsNone(job.billing_tier)
-
- query_stats = statistics['query'] = {}
- self.assertIsNone(job.billing_tier)
-
- query_stats['billingTier'] = billing_tier
- self.assertEqual(job.billing_tier, billing_tier)
-
- def test_cache_hit(self):
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertIsNone(job.cache_hit)
-
- statistics = job._properties['statistics'] = {}
- self.assertIsNone(job.cache_hit)
-
- query_stats = statistics['query'] = {}
- self.assertIsNone(job.cache_hit)
-
- query_stats['cacheHit'] = True
- self.assertTrue(job.cache_hit)
-
- def test_num_dml_affected_rows(self):
- num_rows = 1234
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertIsNone(job.num_dml_affected_rows)
-
- statistics = job._properties['statistics'] = {}
- self.assertIsNone(job.num_dml_affected_rows)
-
- query_stats = statistics['query'] = {}
- self.assertIsNone(job.num_dml_affected_rows)
-
- query_stats['numDmlAffectedRows'] = str(num_rows)
- self.assertEqual(job.num_dml_affected_rows, num_rows)
-
- def test_statement_type(self):
- statement_type = 'SELECT'
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertIsNone(job.statement_type)
-
- statistics = job._properties['statistics'] = {}
- self.assertIsNone(job.statement_type)
-
- query_stats = statistics['query'] = {}
- self.assertIsNone(job.statement_type)
-
- query_stats['statementType'] = statement_type
- self.assertEqual(job.statement_type, statement_type)
-
- def test_referenced_tables(self):
- from google.cloud.bigquery.table import TableReference
-
- ref_tables_resource = [{
- 'projectId': self.PROJECT,
- 'datasetId': 'dataset',
- 'tableId': 'local1',
- }, {
-
- 'projectId': self.PROJECT,
- 'datasetId': 'dataset',
- 'tableId': 'local2',
- }, {
-
- 'projectId': 'other-project-123',
- 'datasetId': 'other-dataset',
- 'tableId': 'other-table',
- }]
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertEqual(job.referenced_tables, [])
-
- statistics = job._properties['statistics'] = {}
- self.assertEqual(job.referenced_tables, [])
-
- query_stats = statistics['query'] = {}
- self.assertEqual(job.referenced_tables, [])
-
- query_stats['referencedTables'] = ref_tables_resource
-
- local1, local2, remote = job.referenced_tables
-
- self.assertIsInstance(local1, TableReference)
- self.assertEqual(local1.table_id, 'local1')
- self.assertEqual(local1.dataset_id, 'dataset')
- self.assertEqual(local1.project, self.PROJECT)
-
- self.assertIsInstance(local2, TableReference)
- self.assertEqual(local2.table_id, 'local2')
- self.assertEqual(local2.dataset_id, 'dataset')
- self.assertEqual(local2.project, self.PROJECT)
-
- self.assertIsInstance(remote, TableReference)
- self.assertEqual(remote.table_id, 'other-table')
- self.assertEqual(remote.dataset_id, 'other-dataset')
- self.assertEqual(remote.project, 'other-project-123')
-
- def test_undeclared_query_paramters(self):
- from google.cloud.bigquery.query import ArrayQueryParameter
- from google.cloud.bigquery.query import ScalarQueryParameter
- from google.cloud.bigquery.query import StructQueryParameter
-
- undeclared = [{
- 'name': 'my_scalar',
- 'parameterType': {
- 'type': 'STRING',
- },
- 'parameterValue': {
- 'value': 'value',
- },
- }, {
- 'name': 'my_array',
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'INT64',
- },
- },
- 'parameterValue': {
- 'arrayValues': [
- {'value': '1066'},
- {'value': '1745'},
- ],
- },
- }, {
- 'name': 'my_struct',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [{
- 'name': 'count',
- 'type': {
- 'type': 'INT64',
- }
- }],
- },
- 'parameterValue': {
- 'structValues': {
- 'count': {
- 'value': '123',
- },
- }
- },
- }]
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- self.assertEqual(job.undeclared_query_paramters, [])
-
- statistics = job._properties['statistics'] = {}
- self.assertEqual(job.undeclared_query_paramters, [])
-
- query_stats = statistics['query'] = {}
- self.assertEqual(job.undeclared_query_paramters, [])
-
- query_stats['undeclaredQueryParamters'] = undeclared
-
- scalar, array, struct = job.undeclared_query_paramters
-
- self.assertIsInstance(scalar, ScalarQueryParameter)
- self.assertEqual(scalar.name, 'my_scalar')
- self.assertEqual(scalar.type_, 'STRING')
- self.assertEqual(scalar.value, 'value')
-
- self.assertIsInstance(array, ArrayQueryParameter)
- self.assertEqual(array.name, 'my_array')
- self.assertEqual(array.array_type, 'INT64')
- self.assertEqual(array.values, [1066, 1745])
-
- self.assertIsInstance(struct, StructQueryParameter)
- self.assertEqual(struct.name, 'my_struct')
- self.assertEqual(struct.struct_types, {'count': 'INT64'})
- self.assertEqual(struct.struct_values, {'count': 123})
-
- def test_query_results(self):
- from google.cloud.bigquery.query import QueryResults
-
- query_resource = {
- 'jobComplete': True,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- }
- connection = _Connection(query_resource)
- client = _make_client(self.PROJECT, connection=connection)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- results = job.query_results()
- self.assertIsInstance(results, QueryResults)
-
- def test_query_results_w_cached_value(self):
- from google.cloud.bigquery.query import QueryResults
-
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- resource = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- }
- query_results = QueryResults(resource)
- job._query_results = query_results
-
- results = job.query_results()
-
- self.assertIs(results, query_results)
-
- def test_result(self):
- query_resource = {
- 'jobComplete': True,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- }
- connection = _Connection(query_resource, query_resource)
- client = _make_client(self.PROJECT, connection=connection)
- resource = self._makeResource(ended=True)
- job = self._get_target_class().from_api_repr(resource, client)
-
- result = job.result()
-
- self.assertEqual(list(result), [])
-
- def test_result_invokes_begins(self):
- begun_resource = self._makeResource()
- incomplete_resource = {
- 'jobComplete': False,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- }
- query_resource = copy.deepcopy(incomplete_resource)
- query_resource['jobComplete'] = True
- done_resource = copy.deepcopy(begun_resource)
- done_resource['status'] = {'state': 'DONE'}
- connection = _Connection(
- begun_resource, incomplete_resource, query_resource, done_resource,
- query_resource)
- client = _make_client(project=self.PROJECT, connection=connection)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
-
- job.result()
-
- self.assertEqual(len(connection._requested), 4)
- begin_request, _, query_request, reload_request = connection._requested
- self.assertEqual(begin_request['method'], 'POST')
- self.assertEqual(query_request['method'], 'GET')
- self.assertEqual(reload_request['method'], 'GET')
-
- def test_result_w_timeout(self):
- begun_resource = self._makeResource()
- query_resource = {
- 'jobComplete': True,
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- }
- done_resource = copy.deepcopy(begun_resource)
- done_resource['status'] = {'state': 'DONE'}
- connection = _Connection(
- begun_resource, query_resource, done_resource)
- client = _make_client(project=self.PROJECT, connection=connection)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
-
- job.result(timeout=1.0)
-
- self.assertEqual(len(connection._requested), 3)
- begin_request, query_request, reload_request = connection._requested
- self.assertEqual(begin_request['method'], 'POST')
- self.assertEqual(query_request['method'], 'GET')
- self.assertEqual(
- query_request['path'],
- '/projects/{}/queries/{}'.format(self.PROJECT, self.JOB_ID))
- self.assertEqual(query_request['query_params']['timeoutMs'], 900)
- self.assertEqual(reload_request['method'], 'GET')
-
- def test_result_error(self):
- from google.cloud import exceptions
-
- client = _make_client(project=self.PROJECT)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
- error_result = {
- 'debugInfo': 'DEBUG',
- 'location': 'LOCATION',
- 'message': 'MESSAGE',
- 'reason': 'invalid'
- }
- job._properties['status'] = {
- 'errorResult': error_result,
- 'errors': [error_result],
- 'state': 'DONE'
- }
- job._set_future_result()
-
- with self.assertRaises(exceptions.GoogleCloudError) as exc_info:
- job.result()
-
- self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError)
- self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST)
-
- def test_begin_w_bound_client(self):
- from google.cloud.bigquery.dataset import DatasetReference
- from google.cloud.bigquery.job import QueryJobConfig
-
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- DS_ID = 'DATASET'
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
-
- config = QueryJobConfig()
- config.default_dataset = DatasetReference(self.PROJECT, DS_ID)
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=config)
-
- job.begin()
-
- self.assertIsNone(job.default_dataset)
- self.assertEqual(job.udf_resources, [])
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'query': {
- 'query': self.QUERY,
- 'useLegacySql': False,
- 'defaultDataset': {
- 'projectId': self.PROJECT,
- 'datasetId': DS_ID,
- },
- },
- },
- }
- self._verifyResourceProperties(job, RESOURCE)
- self.assertEqual(req['data'], SENT)
-
- def test_begin_w_alternate_client(self):
- from google.cloud.bigquery.dataset import DatasetReference
- from google.cloud.bigquery.job import QueryJobConfig
-
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- TABLE = 'TABLE'
- DS_ID = 'DATASET'
- RESOURCE = self._makeResource(ended=True)
- QUERY_CONFIGURATION = {
- 'query': self.QUERY,
- 'allowLargeResults': True,
- 'createDisposition': 'CREATE_NEVER',
- 'defaultDataset': {
- 'projectId': self.PROJECT,
- 'datasetId': DS_ID,
- },
- 'destinationTable': {
- 'projectId': self.PROJECT,
- 'datasetId': DS_ID,
- 'tableId': TABLE,
- },
- 'flattenResults': True,
- 'priority': 'INTERACTIVE',
- 'useQueryCache': True,
- 'useLegacySql': True,
- 'writeDisposition': 'WRITE_TRUNCATE',
- 'maximumBillingTier': 4,
- 'maximumBytesBilled': '123456'
- }
- RESOURCE['configuration']['query'] = QUERY_CONFIGURATION
- RESOURCE['configuration']['dryRun'] = True
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- dataset_ref = DatasetReference(self.PROJECT, DS_ID)
- table_ref = dataset_ref.table(TABLE)
-
- config = QueryJobConfig()
- config.allow_large_results = True
- config.create_disposition = 'CREATE_NEVER'
- config.default_dataset = dataset_ref
- config.destination = table_ref
- config.dry_run = True
- config.flatten_results = True
- config.maximum_billing_tier = 4
- config.priority = 'INTERACTIVE'
- config.use_legacy_sql = True
- config.use_query_cache = True
- config.write_disposition = 'WRITE_TRUNCATE'
- config.maximum_bytes_billed = 123456
- job = self._make_one(
- self.JOB_ID, self.QUERY, client1, job_config=config)
-
- job.begin(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'dryRun': True,
- 'query': QUERY_CONFIGURATION,
- },
- }
- self._verifyResourceProperties(job, RESOURCE)
- self.assertEqual(req['data'], SENT)
-
- def test_begin_w_udf(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import UDFResource
-
- RESOURCE_URI = 'gs://some-bucket/js/lib.js'
- INLINE_UDF_CODE = 'var someCode = "here";'
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- RESOURCE['configuration']['query']['userDefinedFunctionResources'] = [
- {'resourceUri': RESOURCE_URI},
- {'inlineCode': INLINE_UDF_CODE},
- ]
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- udf_resources = [
- UDFResource("resourceUri", RESOURCE_URI),
- UDFResource("inlineCode", INLINE_UDF_CODE),
- ]
- config = QueryJobConfig()
- config.udf_resources = udf_resources
- config.use_legacy_sql = True
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=config)
-
- job.begin()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(job.udf_resources, udf_resources)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'query': {
- 'query': self.QUERY,
- 'useLegacySql': True,
- 'userDefinedFunctionResources': [
- {'resourceUri': RESOURCE_URI},
- {'inlineCode': INLINE_UDF_CODE},
- ]
- },
- },
- }
- self._verifyResourceProperties(job, RESOURCE)
- self.assertEqual(req['data'], SENT)
-
- def test_begin_w_named_query_parameter(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)]
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- config = RESOURCE['configuration']['query']
- config['parameterMode'] = 'NAMED'
- config['queryParameters'] = [
- {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'INT64',
- },
- 'parameterValue': {
- 'value': '123',
- },
- },
- ]
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- jconfig = QueryJobConfig()
- jconfig.query_parameters = query_parameters
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=jconfig)
-
- job.begin()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(job.query_parameters, query_parameters)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'query': {
- 'query': self.QUERY,
- 'useLegacySql': False,
- 'parameterMode': 'NAMED',
- 'queryParameters': config['queryParameters'],
- },
- },
- }
- self._verifyResourceProperties(job, RESOURCE)
- self.assertEqual(req['data'], SENT)
-
- def test_begin_w_positional_query_parameter(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- query_parameters = [ScalarQueryParameter.positional('INT64', 123)]
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- config = RESOURCE['configuration']['query']
- config['parameterMode'] = 'POSITIONAL'
- config['queryParameters'] = [
- {
- 'parameterType': {
- 'type': 'INT64',
- },
- 'parameterValue': {
- 'value': '123',
- },
- },
- ]
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- jconfig = QueryJobConfig()
- jconfig.query_parameters = query_parameters
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=jconfig)
-
- job.begin()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(job.query_parameters, query_parameters)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'query': {
- 'query': self.QUERY,
- 'useLegacySql': False,
- 'parameterMode': 'POSITIONAL',
- 'queryParameters': config['queryParameters'],
- },
- },
- }
- self._verifyResourceProperties(job, RESOURCE)
- self.assertEqual(req['data'], SENT)
-
- def test_begin_w_table_defs(self):
- from google.cloud.bigquery.job import QueryJobConfig
- from google.cloud.bigquery.external_config import ExternalConfig
- from google.cloud.bigquery.external_config import BigtableColumn
- from google.cloud.bigquery.external_config import BigtableColumnFamily
-
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
-
- bt_config = ExternalConfig('BIGTABLE')
- bt_config.ignore_unknown_values = True
- bt_config.options.read_rowkey_as_string = True
- cf = BigtableColumnFamily()
- cf.family_id = 'cf'
- col = BigtableColumn()
- col.field_name = 'fn'
- cf.columns = [col]
- bt_config.options.column_families = [cf]
- BT_CONFIG_RESOURCE = {
- 'sourceFormat': 'BIGTABLE',
- 'ignoreUnknownValues': True,
- 'bigtableOptions': {
- 'readRowkeyAsString': True,
- 'columnFamilies': [{
- 'familyId': 'cf',
- 'columns': [{'fieldName': 'fn'}],
- }],
- },
- }
- CSV_CONFIG_RESOURCE = {
- 'sourceFormat': 'CSV',
- 'maxBadRecords': 8,
- 'csvOptions': {
- 'allowJaggedRows': True,
- },
- }
- csv_config = ExternalConfig('CSV')
- csv_config.max_bad_records = 8
- csv_config.options.allow_jagged_rows = True
- bt_table = 'bigtable-table'
- csv_table = 'csv-table'
- RESOURCE['configuration']['query']['tableDefinitions'] = {
- bt_table: BT_CONFIG_RESOURCE,
- csv_table: CSV_CONFIG_RESOURCE,
- }
- want_resource = copy.deepcopy(RESOURCE)
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- config = QueryJobConfig()
- config.table_definitions = {
- bt_table: bt_config,
- csv_table: csv_config,
- }
- config.use_legacy_sql = True
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=config)
-
- job.begin()
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'query': {
- 'query': self.QUERY,
- 'useLegacySql': True,
- 'tableDefinitions': {
- bt_table: BT_CONFIG_RESOURCE,
- csv_table: CSV_CONFIG_RESOURCE,
- },
- },
- },
- }
- self._verifyResourceProperties(job, want_resource)
- self.assertEqual(req['data'], SENT)
-
- def test_dry_run_query(self):
- from google.cloud.bigquery.job import QueryJobConfig
-
- PATH = '/projects/%s/jobs' % (self.PROJECT,)
- RESOURCE = self._makeResource()
- # Ensure None for missing server-set props
- del RESOURCE['statistics']['creationTime']
- del RESOURCE['etag']
- del RESOURCE['selfLink']
- del RESOURCE['user_email']
- RESOURCE['configuration']['dryRun'] = True
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- config = QueryJobConfig()
- config.dry_run = True
- job = self._make_one(
- self.JOB_ID, self.QUERY, client, job_config=config)
-
- job.begin()
- self.assertEqual(job.udf_resources, [])
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'POST')
- self.assertEqual(req['path'], PATH)
- SENT = {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- 'configuration': {
- 'query': {
- 'query': self.QUERY,
- 'useLegacySql': False,
- },
- 'dryRun': True,
- },
- }
- self._verifyResourceProperties(job, RESOURCE)
- self.assertEqual(req['data'], SENT)
-
- def test_exists_miss_w_bound_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn = _Connection()
- client = _make_client(project=self.PROJECT, connection=conn)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
-
- self.assertFalse(job.exists())
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_exists_hit_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection({})
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- job = self._make_one(self.JOB_ID, self.QUERY, client1)
-
- self.assertTrue(job.exists(client=client2))
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self.assertEqual(req['query_params'], {'fields': 'id'})
-
- def test_reload_w_bound_client(self):
- from google.cloud.bigquery.dataset import DatasetReference
- from google.cloud.bigquery.job import QueryJobConfig
-
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- DS_ID = 'DATASET'
- DEST_TABLE = 'dest_table'
- RESOURCE = self._makeResource()
- conn = _Connection(RESOURCE)
- client = _make_client(project=self.PROJECT, connection=conn)
- dataset_ref = DatasetReference(self.PROJECT, DS_ID)
- table_ref = dataset_ref.table(DEST_TABLE)
- config = QueryJobConfig()
- config.destination = table_ref
- job = self._make_one(self.JOB_ID, None, client, job_config=config)
-
- job.reload()
-
- self.assertNotEqual(job.destination, table_ref)
-
- self.assertEqual(len(conn._requested), 1)
- req = conn._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
- def test_reload_w_alternate_client(self):
- PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
- DS_ID = 'DATASET'
- DEST_TABLE = 'dest_table'
- RESOURCE = self._makeResource()
- q_config = RESOURCE['configuration']['query']
- q_config['destinationTable'] = {
- 'projectId': self.PROJECT,
- 'datasetId': DS_ID,
- 'tableId': DEST_TABLE,
- }
- conn1 = _Connection()
- client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _Connection(RESOURCE)
- client2 = _make_client(project=self.PROJECT, connection=conn2)
- job = self._make_one(self.JOB_ID, self.QUERY, client1)
-
- job.reload(client=client2)
-
- self.assertEqual(len(conn1._requested), 0)
- self.assertEqual(len(conn2._requested), 1)
- req = conn2._requested[0]
- self.assertEqual(req['method'], 'GET')
- self.assertEqual(req['path'], PATH)
- self._verifyResourceProperties(job, RESOURCE)
-
-
-class TestQueryPlanEntryStep(unittest.TestCase, _Base):
- KIND = 'KIND'
- SUBSTEPS = ('SUB1', 'SUB2')
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.job import QueryPlanEntryStep
-
- return QueryPlanEntryStep
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- step = self._make_one(self.KIND, self.SUBSTEPS)
- self.assertEqual(step.kind, self.KIND)
- self.assertEqual(step.substeps, list(self.SUBSTEPS))
-
- def test_from_api_repr_empty(self):
- klass = self._get_target_class()
- step = klass.from_api_repr({})
- self.assertIsNone(step.kind)
- self.assertEqual(step.substeps, [])
-
- def test_from_api_repr_normal(self):
- resource = {
- 'kind': self.KIND,
- 'substeps': self.SUBSTEPS,
- }
- klass = self._get_target_class()
- step = klass.from_api_repr(resource)
- self.assertEqual(step.kind, self.KIND)
- self.assertEqual(step.substeps, list(self.SUBSTEPS))
-
- def test___eq___mismatched_type(self):
- step = self._make_one(self.KIND, self.SUBSTEPS)
- self.assertNotEqual(step, object())
-
- def test___eq___mismatch_kind(self):
- step = self._make_one(self.KIND, self.SUBSTEPS)
- other = self._make_one('OTHER', self.SUBSTEPS)
- self.assertNotEqual(step, other)
-
- def test___eq___mismatch_substeps(self):
- step = self._make_one(self.KIND, self.SUBSTEPS)
- other = self._make_one(self.KIND, ())
- self.assertNotEqual(step, other)
-
- def test___eq___hit(self):
- step = self._make_one(self.KIND, self.SUBSTEPS)
- other = self._make_one(self.KIND, self.SUBSTEPS)
- self.assertEqual(step, other)
-
-
-class TestQueryPlanEntry(unittest.TestCase, _Base):
- NAME = 'NAME'
- ENTRY_ID = 1234
- WAIT_RATIO_AVG = 2.71828
- WAIT_RATIO_MAX = 3.14159
- READ_RATIO_AVG = 1.41421
- READ_RATIO_MAX = 1.73205
- COMPUTE_RATIO_AVG = 0.69315
- COMPUTE_RATIO_MAX = 1.09861
- WRITE_RATIO_AVG = 3.32193
- WRITE_RATIO_MAX = 2.30258
- RECORDS_READ = 100
- RECORDS_WRITTEN = 1
- STATUS = 'STATUS'
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.job import QueryPlanEntry
-
- return QueryPlanEntry
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- from google.cloud.bigquery.job import QueryPlanEntryStep
-
- steps = [QueryPlanEntryStep(
- kind=TestQueryPlanEntryStep.KIND,
- substeps=TestQueryPlanEntryStep.SUBSTEPS)]
- entry = self._make_one(
- name=self.NAME,
- entry_id=self.ENTRY_ID,
- wait_ratio_avg=self.WAIT_RATIO_AVG,
- wait_ratio_max=self.WAIT_RATIO_MAX,
- read_ratio_avg=self.READ_RATIO_AVG,
- read_ratio_max=self.READ_RATIO_MAX,
- compute_ratio_avg=self.COMPUTE_RATIO_AVG,
- compute_ratio_max=self.COMPUTE_RATIO_MAX,
- write_ratio_avg=self.WRITE_RATIO_AVG,
- write_ratio_max=self.WRITE_RATIO_MAX,
- records_read=self.RECORDS_READ,
- records_written=self.RECORDS_WRITTEN,
- status=self.STATUS,
- steps=steps,
- )
- self.assertEqual(entry.name, self.NAME)
- self.assertEqual(entry.entry_id, self.ENTRY_ID)
- self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG)
- self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX)
- self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG)
- self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX)
- self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG)
- self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX)
- self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG)
- self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX)
- self.assertEqual(entry.records_read, self.RECORDS_READ)
- self.assertEqual(entry.records_written, self.RECORDS_WRITTEN)
- self.assertEqual(entry.status, self.STATUS)
- self.assertEqual(entry.steps, steps)
-
- def test_from_api_repr_empty(self):
- klass = self._get_target_class()
-
- entry = klass.from_api_repr({})
-
- self.assertIsNone(entry.name)
- self.assertIsNone(entry.entry_id)
- self.assertIsNone(entry.wait_ratio_avg)
- self.assertIsNone(entry.wait_ratio_max)
- self.assertIsNone(entry.read_ratio_avg)
- self.assertIsNone(entry.read_ratio_max)
- self.assertIsNone(entry.compute_ratio_avg)
- self.assertIsNone(entry.compute_ratio_max)
- self.assertIsNone(entry.write_ratio_avg)
- self.assertIsNone(entry.write_ratio_max)
- self.assertIsNone(entry.records_read)
- self.assertIsNone(entry.records_written)
- self.assertIsNone(entry.status)
- self.assertEqual(entry.steps, [])
-
- def test_from_api_repr_normal(self):
- from google.cloud.bigquery.job import QueryPlanEntryStep
-
- steps = [QueryPlanEntryStep(
- kind=TestQueryPlanEntryStep.KIND,
- substeps=TestQueryPlanEntryStep.SUBSTEPS)]
- resource = {
- 'name': self.NAME,
- 'id': self.ENTRY_ID,
- 'waitRatioAvg': self.WAIT_RATIO_AVG,
- 'waitRatioMax': self.WAIT_RATIO_MAX,
- 'readRatioAvg': self.READ_RATIO_AVG,
- 'readRatioMax': self.READ_RATIO_MAX,
- 'computeRatioAvg': self.COMPUTE_RATIO_AVG,
- 'computeRatioMax': self.COMPUTE_RATIO_MAX,
- 'writeRatioAvg': self.WRITE_RATIO_AVG,
- 'writeRatioMax': self.WRITE_RATIO_MAX,
- 'recordsRead': str(self.RECORDS_READ),
- 'recordsWritten': str(self.RECORDS_WRITTEN),
- 'status': self.STATUS,
- 'steps': [{
- 'kind': TestQueryPlanEntryStep.KIND,
- 'substeps': TestQueryPlanEntryStep.SUBSTEPS,
- }]
- }
- klass = self._get_target_class()
-
- entry = klass.from_api_repr(resource)
- self.assertEqual(entry.name, self.NAME)
- self.assertEqual(entry.entry_id, self.ENTRY_ID)
- self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG)
- self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX)
- self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG)
- self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX)
- self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG)
- self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX)
- self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG)
- self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX)
- self.assertEqual(entry.records_read, self.RECORDS_READ)
- self.assertEqual(entry.records_written, self.RECORDS_WRITTEN)
- self.assertEqual(entry.status, self.STATUS)
- self.assertEqual(entry.steps, steps)
-
-
-class _Table(object):
-
- def __init__(self, table_id=None):
- self._table_id = table_id
-
- @property
- def table_id(self):
- return TestLoadJob.TABLE_ID
-
- @property
- def project(self):
- return TestLoadJob.PROJECT
-
- @property
- def dataset_id(self):
- return TestLoadJob.DS_ID
-
-
-class _Connection(object):
-
- def __init__(self, *responses):
- self._responses = responses
- self._requested = []
-
- def api_request(self, **kw):
- from google.cloud.exceptions import NotFound
-
- self._requested.append(kw)
-
- try:
- response, self._responses = self._responses[0], self._responses[1:]
- except IndexError:
- raise NotFound('miss')
- else:
- return response
diff --git a/bigquery/tests/unit/test_query.py b/bigquery/tests/unit/test_query.py
deleted file mode 100644
index e5c78ca..0000000
--- a/bigquery/tests/unit/test_query.py
+++ /dev/null
@@ -1,1253 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import datetime
-import unittest
-
-import mock
-
-
-class Test_UDFResource(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.query import UDFResource
-
- return UDFResource
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- udf = self._make_one('resourceUri', 'gs://some_bucket/some_file')
- self.assertEqual(udf.udf_type, 'resourceUri')
- self.assertEqual(udf.value, 'gs://some_bucket/some_file')
-
- def test___eq__(self):
- udf = self._make_one('resourceUri', 'gs://some_bucket/some_file')
- self.assertEqual(udf, udf)
- self.assertNotEqual(udf, object())
- wrong_val = self._make_one(
- 'resourceUri', 'gs://some_bucket/other_file')
- self.assertNotEqual(udf, wrong_val)
- wrong_type = self._make_one('inlineCode', udf.value)
- self.assertNotEqual(udf, wrong_type)
-
-
-class Test__AbstractQueryParameter(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.query import _AbstractQueryParameter
-
- return _AbstractQueryParameter
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_from_api_virtual(self):
- klass = self._get_target_class()
- with self.assertRaises(NotImplementedError):
- klass.from_api_repr({})
-
- def test_to_api_virtual(self):
- param = self._make_one()
- with self.assertRaises(NotImplementedError):
- param.to_api_repr()
-
-
-class Test_ScalarQueryParameter(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- return ScalarQueryParameter
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- param = self._make_one(name='foo', type_='INT64', value=123)
- self.assertEqual(param.name, 'foo')
- self.assertEqual(param.type_, 'INT64')
- self.assertEqual(param.value, 123)
-
- def test___eq__(self):
- param = self._make_one(name='foo', type_='INT64', value=123)
- self.assertEqual(param, param)
- self.assertNotEqual(param, object())
- alias = self._make_one(name='bar', type_='INT64', value=123)
- self.assertNotEqual(param, alias)
- wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0)
- self.assertNotEqual(param, wrong_type)
- wrong_val = self._make_one(name='foo', type_='INT64', value=234)
- self.assertNotEqual(param, wrong_val)
-
- def test_positional(self):
- klass = self._get_target_class()
- param = klass.positional(type_='INT64', value=123)
- self.assertEqual(param.name, None)
- self.assertEqual(param.type_, 'INT64')
- self.assertEqual(param.value, 123)
-
- def test_from_api_repr_w_name(self):
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'INT64',
- },
- 'parameterValue': {
- 'value': 123,
- },
- }
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
- self.assertEqual(param.name, 'foo')
- self.assertEqual(param.type_, 'INT64')
- self.assertEqual(param.value, 123)
-
- def test_from_api_repr_wo_name(self):
- RESOURCE = {
- 'parameterType': {
- 'type': 'INT64',
- },
- 'parameterValue': {
- 'value': '123',
- },
- }
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
- self.assertEqual(param.name, None)
- self.assertEqual(param.type_, 'INT64')
- self.assertEqual(param.value, 123)
-
- def test_to_api_repr_w_name(self):
- EXPECTED = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'INT64',
- },
- 'parameterValue': {
- 'value': '123',
- },
- }
- param = self._make_one(name='foo', type_='INT64', value=123)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_wo_name(self):
- EXPECTED = {
- 'parameterType': {
- 'type': 'INT64',
- },
- 'parameterValue': {
- 'value': '123',
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='INT64', value=123)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_float(self):
- EXPECTED = {
- 'parameterType': {
- 'type': 'FLOAT64',
- },
- 'parameterValue': {
- 'value': 12.345,
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='FLOAT64', value=12.345)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_bool(self):
- EXPECTED = {
- 'parameterType': {
- 'type': 'BOOL',
- },
- 'parameterValue': {
- 'value': 'false',
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='BOOL', value=False)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_timestamp_datetime(self):
- from google.cloud._helpers import UTC
-
- STAMP = '2016-12-20 15:58:27.339328+00:00'
- when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
- EXPECTED = {
- 'parameterType': {
- 'type': 'TIMESTAMP',
- },
- 'parameterValue': {
- 'value': STAMP,
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='TIMESTAMP', value=when)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_timestamp_micros(self):
- from google.cloud._helpers import _microseconds_from_datetime
-
- now = datetime.datetime.utcnow()
- seconds = _microseconds_from_datetime(now) / 1.0e6
- EXPECTED = {
- 'parameterType': {
- 'type': 'TIMESTAMP',
- },
- 'parameterValue': {
- 'value': seconds,
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='TIMESTAMP', value=seconds)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_datetime_datetime(self):
- from google.cloud._helpers import _datetime_to_rfc3339
-
- now = datetime.datetime.utcnow()
- EXPECTED = {
- 'parameterType': {
- 'type': 'DATETIME',
- },
- 'parameterValue': {
- 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z'
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='DATETIME', value=now)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_datetime_string(self):
- from google.cloud._helpers import _datetime_to_rfc3339
-
- now = datetime.datetime.utcnow()
- now_str = _datetime_to_rfc3339(now)
- EXPECTED = {
- 'parameterType': {
- 'type': 'DATETIME',
- },
- 'parameterValue': {
- 'value': now_str,
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='DATETIME', value=now_str)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_date_date(self):
- today = datetime.date.today()
- EXPECTED = {
- 'parameterType': {
- 'type': 'DATE',
- },
- 'parameterValue': {
- 'value': today.isoformat(),
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='DATE', value=today)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_date_string(self):
- today = datetime.date.today()
- today_str = today.isoformat(),
- EXPECTED = {
- 'parameterType': {
- 'type': 'DATE',
- },
- 'parameterValue': {
- 'value': today_str,
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='DATE', value=today_str)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_unknown_type(self):
- EXPECTED = {
- 'parameterType': {
- 'type': 'UNKNOWN',
- },
- 'parameterValue': {
- 'value': 'unknown',
- },
- }
- klass = self._get_target_class()
- param = klass.positional(type_='UNKNOWN', value='unknown')
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test___eq___wrong_type(self):
- field = self._make_one('test', 'STRING', 'value')
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___eq___name_mismatch(self):
- field = self._make_one('test', 'STRING', 'value')
- other = self._make_one('other', 'STRING', 'value')
- self.assertNotEqual(field, other)
-
- def test___eq___field_type_mismatch(self):
- field = self._make_one('test', 'STRING', None)
- other = self._make_one('test', 'INT64', None)
- self.assertNotEqual(field, other)
-
- def test___eq___value_mismatch(self):
- field = self._make_one('test', 'STRING', 'hello')
- other = self._make_one('test', 'STRING', 'world')
- self.assertNotEqual(field, other)
-
- def test___eq___hit(self):
- field = self._make_one('test', 'STRING', 'gotcha')
- other = self._make_one('test', 'STRING', 'gotcha')
- self.assertEqual(field, other)
-
- def test___ne___wrong_type(self):
- field = self._make_one('toast', 'INT64', 13)
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___ne___same_value(self):
- field1 = self._make_one('test', 'INT64', 12)
- field2 = self._make_one('test', 'INT64', 12)
- # unittest ``assertEqual`` uses ``==`` not ``!=``.
- comparison_val = (field1 != field2)
- self.assertFalse(comparison_val)
-
- def test___ne___different_values(self):
- field1 = self._make_one('test', 'INT64', 11)
- field2 = self._make_one('test', 'INT64', 12)
- self.assertNotEqual(field1, field2)
-
- def test___repr__(self):
- field1 = self._make_one('field1', 'STRING', 'value')
- expected = "ScalarQueryParameter('field1', 'STRING', 'value')"
- self.assertEqual(repr(field1), expected)
-
-
-def _make_subparam(name, type_, value):
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- return ScalarQueryParameter(name, type_, value)
-
-
-class Test_ArrayQueryParameter(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.query import ArrayQueryParameter
-
- return ArrayQueryParameter
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- param = self._make_one(name='foo', array_type='INT64', values=[1, 2])
- self.assertEqual(param.name, 'foo')
- self.assertEqual(param.array_type, 'INT64')
- self.assertEqual(param.values, [1, 2])
-
- def test___eq__(self):
- param = self._make_one(name='foo', array_type='INT64', values=[123])
- self.assertEqual(param, param)
- self.assertNotEqual(param, object())
- alias = self._make_one(name='bar', array_type='INT64', values=[123])
- self.assertNotEqual(param, alias)
- wrong_type = self._make_one(
- name='foo', array_type='FLOAT64', values=[123.0])
- self.assertNotEqual(param, wrong_type)
- wrong_val = self._make_one(
- name='foo', array_type='INT64', values=[234])
- self.assertNotEqual(param, wrong_val)
-
- def test_positional(self):
- klass = self._get_target_class()
- param = klass.positional(array_type='INT64', values=[1, 2])
- self.assertEqual(param.name, None)
- self.assertEqual(param.array_type, 'INT64')
- self.assertEqual(param.values, [1, 2])
-
- def test_from_api_repr_w_name(self):
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'INT64',
- },
- },
- 'parameterValue': {
- 'arrayValues': [
- {
- 'value': '1',
- },
- {
- 'value': '2'
- },
- ],
- },
- }
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
- self.assertEqual(param.name, 'foo')
- self.assertEqual(param.array_type, 'INT64')
- self.assertEqual(param.values, [1, 2])
-
- def test_from_api_repr_wo_name(self):
- RESOURCE = {
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'INT64',
- },
- },
- 'parameterValue': {
- 'arrayValues': [
- {
- 'value': '1',
- },
- {
- 'value': '2'
- },
- ],
- },
- }
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
- self.assertEqual(param.name, None)
- self.assertEqual(param.array_type, 'INT64')
- self.assertEqual(param.values, [1, 2])
-
- def test_from_api_repr_w_struct_type(self):
- from google.cloud.bigquery.query import StructQueryParameter
-
- RESOURCE = {
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {
- 'name': 'name',
- 'type': {'type': 'STRING'},
- },
- {
- 'name': 'age',
- 'type': {'type': 'INT64'},
- },
- ],
- },
- },
- 'parameterValue': {
- 'arrayValues': [
- {
- 'structValues': {
- 'name': {'value': 'Phred Phlyntstone'},
- 'age': {'value': '32'},
- },
- },
- {
- 'structValues': {
- 'name': {
- 'value': 'Bharney Rhubbyl',
- },
- 'age': {'value': '31'},
- },
- },
- ],
- },
- }
-
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
-
- phred = StructQueryParameter.positional(
- _make_subparam('name', 'STRING', 'Phred Phlyntstone'),
- _make_subparam('age', 'INT64', 32))
- bharney = StructQueryParameter.positional(
- _make_subparam('name', 'STRING', 'Bharney Rhubbyl'),
- _make_subparam('age', 'INT64', 31))
- self.assertEqual(param.array_type, 'STRUCT')
- self.assertEqual(param.values, [phred, bharney])
-
- def test_to_api_repr_w_name(self):
- EXPECTED = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'INT64',
- },
- },
- 'parameterValue': {
- 'arrayValues': [
- {
- 'value': '1',
- },
- {
- 'value': '2'
- },
- ],
- },
- }
- param = self._make_one(name='foo', array_type='INT64', values=[1, 2])
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_wo_name(self):
- EXPECTED = {
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'INT64',
- },
- },
- 'parameterValue': {
- 'arrayValues': [
- {
- 'value': '1',
- },
- {
- 'value': '2'
- },
- ],
- },
- }
- klass = self._get_target_class()
- param = klass.positional(array_type='INT64', values=[1, 2])
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_unknown_type(self):
- EXPECTED = {
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'UNKNOWN',
- },
- },
- 'parameterValue': {
- 'arrayValues': [
- {
- 'value': 'unknown',
- }
- ],
- },
- }
- klass = self._get_target_class()
- param = klass.positional(array_type='UNKNOWN', values=['unknown'])
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_record_type(self):
- from google.cloud.bigquery.query import StructQueryParameter
-
- EXPECTED = {
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'foo', 'type': {'type': 'STRING'}},
- {'name': 'bar', 'type': {'type': 'INT64'}},
- ],
- },
- },
- 'parameterValue': {
- 'arrayValues': [{
- 'structValues': {
- 'foo': {'value': 'Foo'},
- 'bar': {'value': '123'},
- }
- }]
- },
- }
- one = _make_subparam('foo', 'STRING', 'Foo')
- another = _make_subparam('bar', 'INT64', 123)
- struct = StructQueryParameter.positional(one, another)
- klass = self._get_target_class()
- param = klass.positional(array_type='RECORD', values=[struct])
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test___eq___wrong_type(self):
- field = self._make_one('test', 'STRING', ['value'])
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___eq___name_mismatch(self):
- field = self._make_one('field', 'STRING', ['value'])
- other = self._make_one('other', 'STRING', ['value'])
- self.assertNotEqual(field, other)
-
- def test___eq___field_type_mismatch(self):
- field = self._make_one('test', 'STRING', [])
- other = self._make_one('test', 'INT64', [])
- self.assertNotEqual(field, other)
-
- def test___eq___value_mismatch(self):
- field = self._make_one('test', 'STRING', ['hello'])
- other = self._make_one('test', 'STRING', ['hello', 'world'])
- self.assertNotEqual(field, other)
-
- def test___eq___hit(self):
- field = self._make_one('test', 'STRING', ['gotcha'])
- other = self._make_one('test', 'STRING', ['gotcha'])
- self.assertEqual(field, other)
-
- def test___ne___wrong_type(self):
- field = self._make_one('toast', 'INT64', [13])
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___ne___same_value(self):
- field1 = self._make_one('test', 'INT64', [12])
- field2 = self._make_one('test', 'INT64', [12])
- # unittest ``assertEqual`` uses ``==`` not ``!=``.
- comparison_val = (field1 != field2)
- self.assertFalse(comparison_val)
-
- def test___ne___different_values(self):
- field1 = self._make_one('test', 'INT64', [11])
- field2 = self._make_one('test', 'INT64', [12])
- self.assertNotEqual(field1, field2)
-
- def test___repr__(self):
- field1 = self._make_one('field1', 'STRING', ['value'])
- expected = "ArrayQueryParameter('field1', 'STRING', ['value'])"
- self.assertEqual(repr(field1), expected)
-
-
-class Test_StructQueryParameter(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.query import StructQueryParameter
-
- return StructQueryParameter
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor(self):
- sub_1 = _make_subparam('bar', 'INT64', 123)
- sub_2 = _make_subparam('baz', 'STRING', 'abc')
- param = self._make_one('foo', sub_1, sub_2)
- self.assertEqual(param.name, 'foo')
- self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
- self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
-
- def test___eq__(self):
- sub_1 = _make_subparam('bar', 'INT64', 123)
- sub_2 = _make_subparam('baz', 'STRING', 'abc')
- sub_3 = _make_subparam('baz', 'STRING', 'def')
- sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0)
- param = self._make_one('foo', sub_1, sub_2)
- self.assertEqual(param, param)
- self.assertNotEqual(param, object())
- alias = self._make_one('bar', sub_1, sub_2)
- self.assertNotEqual(param, alias)
- wrong_type = self._make_one('foo', sub_1_float, sub_2)
- self.assertNotEqual(param, wrong_type)
- wrong_val = self._make_one('foo', sub_2, sub_3)
- self.assertNotEqual(param, wrong_val)
-
- def test_positional(self):
- sub_1 = _make_subparam('bar', 'INT64', 123)
- sub_2 = _make_subparam('baz', 'STRING', 'abc')
- klass = self._get_target_class()
- param = klass.positional(sub_1, sub_2)
- self.assertEqual(param.name, None)
- self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
- self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
-
- def test_from_api_repr_w_name(self):
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'INT64'}},
- {'name': 'baz', 'type': {'type': 'STRING'}},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': 123},
- 'baz': {'value': 'abc'},
- },
- },
- }
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
- self.assertEqual(param.name, 'foo')
- self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
- self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
-
- def test_from_api_repr_wo_name(self):
- RESOURCE = {
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'INT64'}},
- {'name': 'baz', 'type': {'type': 'STRING'}},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': 123},
- 'baz': {'value': 'abc'},
- },
- },
- }
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
- self.assertEqual(param.name, None)
- self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
- self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
-
- def test_from_api_repr_w_nested_array(self):
- from google.cloud.bigquery.query import ArrayQueryParameter
-
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'STRING'}},
- {'name': 'baz', 'type': {
- 'type': 'ARRAY',
- 'arrayType': {'type': 'INT64'},
- }},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': 'abc'},
- 'baz': {'arrayValues': [
- {'value': '123'},
- {'value': '456'},
- ]},
- },
- },
- }
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
- self.assertEqual(
- param,
- self._make_one(
- 'foo',
- _make_subparam('bar', 'STRING', 'abc'),
- ArrayQueryParameter('baz', 'INT64', [123, 456])))
-
- def test_from_api_repr_w_nested_struct(self):
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'STRING'}},
- {'name': 'baz', 'type': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'qux', 'type': {'type': 'INT64'}},
- {'name': 'spam', 'type': {'type': 'BOOL'}},
- ],
- }},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': 'abc'},
- 'baz': {'structValues': {
- 'qux': {'value': '123'},
- 'spam': {'value': 'true'},
- }},
- },
- },
- }
-
- klass = self._get_target_class()
- param = klass.from_api_repr(RESOURCE)
-
- expected = self._make_one(
- 'foo',
- _make_subparam('bar', 'STRING', 'abc'),
- self._make_one(
- 'baz',
- _make_subparam('qux', 'INT64', 123),
- _make_subparam('spam', 'BOOL', True)))
- self.assertEqual(param.name, 'foo')
- self.assertEqual(param.struct_types, expected.struct_types)
- self.assertEqual(param.struct_values, expected.struct_values)
-
- def test_to_api_repr_w_name(self):
- EXPECTED = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'INT64'}},
- {'name': 'baz', 'type': {'type': 'STRING'}},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': '123'},
- 'baz': {'value': 'abc'},
- },
- },
- }
- sub_1 = _make_subparam('bar', 'INT64', 123)
- sub_2 = _make_subparam('baz', 'STRING', 'abc')
- param = self._make_one('foo', sub_1, sub_2)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_wo_name(self):
- EXPECTED = {
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'INT64'}},
- {'name': 'baz', 'type': {'type': 'STRING'}},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': '123'},
- 'baz': {'value': 'abc'},
- },
- },
- }
- sub_1 = _make_subparam('bar', 'INT64', 123)
- sub_2 = _make_subparam('baz', 'STRING', 'abc')
- klass = self._get_target_class()
- param = klass.positional(sub_1, sub_2)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_nested_array(self):
- from google.cloud.bigquery.query import ArrayQueryParameter
-
- EXPECTED = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'STRING'}},
- {'name': 'baz', 'type': {
- 'type': 'ARRAY',
- 'arrayType': {'type': 'INT64'},
- }},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': 'abc'},
- 'baz': {'arrayValues': [
- {'value': '123'},
- {'value': '456'},
- ]},
- },
- },
- }
- scalar = _make_subparam('bar', 'STRING', 'abc')
- array = ArrayQueryParameter('baz', 'INT64', [123, 456])
- param = self._make_one('foo', scalar, array)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test_to_api_repr_w_nested_struct(self):
- EXPECTED = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'bar', 'type': {'type': 'STRING'}},
- {'name': 'baz', 'type': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'qux', 'type': {'type': 'INT64'}},
- {'name': 'spam', 'type': {'type': 'BOOL'}},
- ],
- }},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'bar': {'value': 'abc'},
- 'baz': {'structValues': {
- 'qux': {'value': '123'},
- 'spam': {'value': 'true'},
- }},
- },
- },
- }
- scalar_1 = _make_subparam('bar', 'STRING', 'abc')
- scalar_2 = _make_subparam('qux', 'INT64', 123)
- scalar_3 = _make_subparam('spam', 'BOOL', True)
- sub = self._make_one('baz', scalar_2, scalar_3)
- param = self._make_one('foo', scalar_1, sub)
- self.assertEqual(param.to_api_repr(), EXPECTED)
-
- def test___eq___wrong_type(self):
- field = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'abc'))
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___eq___name_mismatch(self):
- field = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'abc'))
- other = self._make_one(
- 'other ', _make_subparam('bar', 'STRING', 'abc'))
- self.assertNotEqual(field, other)
-
- def test___eq___field_type_mismatch(self):
- field = self._make_one(
- 'test', _make_subparam('bar', 'STRING', None))
- other = self._make_one(
- 'test', _make_subparam('bar', 'INT64', None))
- self.assertNotEqual(field, other)
-
- def test___eq___value_mismatch(self):
- field = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'hello'))
- other = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'world'))
- self.assertNotEqual(field, other)
-
- def test___eq___hit(self):
- field = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'gotcha'))
- other = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'gotcha'))
- self.assertEqual(field, other)
-
- def test___ne___wrong_type(self):
- field = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'hello'))
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___ne___same_value(self):
- field1 = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'hello'))
- field2 = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'hello'))
- # unittest ``assertEqual`` uses ``==`` not ``!=``.
- comparison_val = (field1 != field2)
- self.assertFalse(comparison_val)
-
- def test___ne___different_values(self):
- field1 = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'hello'))
- field2 = self._make_one(
- 'test', _make_subparam('bar', 'STRING', 'world'))
- self.assertNotEqual(field1, field2)
-
- def test___repr__(self):
- field1 = self._make_one(
- 'test', _make_subparam('field1', 'STRING', 'hello'))
- got = repr(field1)
- self.assertIn('StructQueryParameter', got)
- self.assertIn("'field1', 'STRING'", got)
- self.assertIn("'field1': 'hello'", got)
-
-
-class TestQueryResults(unittest.TestCase):
- PROJECT = 'project'
- JOB_ID = 'test-synchronous-query'
- TOKEN = 'TOKEN'
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.query import QueryResults
-
- return QueryResults
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def _makeResource(self):
- return {
- 'jobReference': {
- 'projectId': self.PROJECT,
- 'jobId': self.JOB_ID,
- },
- }
-
- def _verifySchema(self, query, resource):
- from google.cloud.bigquery.schema import SchemaField
-
- if 'schema' in resource:
- fields = resource['schema']['fields']
- self.assertEqual(len(query.schema), len(fields))
- for found, expected in zip(query.schema, fields):
- self.assertIsInstance(found, SchemaField)
- self.assertEqual(found.name, expected['name'])
- self.assertEqual(found.field_type, expected['type'])
- self.assertEqual(found.mode, expected['mode'])
- self.assertEqual(found.description,
- expected.get('description'))
- self.assertEqual(found.fields, expected.get('fields', ()))
- else:
- self.assertEqual(query.schema, ())
-
- def test_ctor_defaults(self):
- query = self._make_one(self._makeResource())
- self.assertIsNone(query.cache_hit)
- self.assertIsNone(query.complete)
- self.assertIsNone(query.errors)
- self.assertIsNone(query.page_token)
- self.assertEqual(query.project, self.PROJECT)
- self.assertEqual(query.rows, [])
- self.assertEqual(query.schema, ())
- self.assertIsNone(query.total_rows)
- self.assertIsNone(query.total_bytes_processed)
-
- def test_cache_hit_missing(self):
- query = self._make_one(self._makeResource())
- self.assertIsNone(query.cache_hit)
-
- def test_cache_hit_present(self):
- resource = self._makeResource()
- resource['cacheHit'] = True
- query = self._make_one(resource)
- self.assertTrue(query.cache_hit)
-
- def test_complete_missing(self):
- query = self._make_one(self._makeResource())
- self.assertIsNone(query.complete)
-
- def test_complete_present(self):
- resource = self._makeResource()
- resource['jobComplete'] = True
- query = self._make_one(resource)
- self.assertTrue(query.complete)
-
- def test_errors_missing(self):
- query = self._make_one(self._makeResource())
- self.assertIsNone(query.errors)
-
- def test_errors_present(self):
- ERRORS = [
- {'reason': 'testing'},
- ]
- resource = self._makeResource()
- resource['errors'] = ERRORS
- query = self._make_one(resource)
- self.assertEqual(query.errors, ERRORS)
-
- def test_job_id_missing(self):
- with self.assertRaises(ValueError):
- self._make_one({})
-
- def test_job_id_broken_job_reference(self):
- resource = {'jobReference': {'bogus': 'BOGUS'}}
- with self.assertRaises(ValueError):
- self._make_one(resource)
-
- def test_job_id_present(self):
- resource = self._makeResource()
- resource['jobReference']['jobId'] = 'custom-job'
- query = self._make_one(resource)
- self.assertEqual(query.job_id, 'custom-job')
-
- def test_page_token_missing(self):
- query = self._make_one(self._makeResource())
- self.assertIsNone(query.page_token)
-
- def test_page_token_present(self):
- resource = self._makeResource()
- resource['pageToken'] = 'TOKEN'
- query = self._make_one(resource)
- self.assertEqual(query.page_token, 'TOKEN')
-
- def test_total_rows_present_integer(self):
- resource = self._makeResource()
- resource['totalRows'] = 42
- query = self._make_one(resource)
- self.assertEqual(query.total_rows, 42)
-
- def test_total_rows_present_string(self):
- resource = self._makeResource()
- resource['totalRows'] = '42'
- query = self._make_one(resource)
- self.assertEqual(query.total_rows, 42)
-
- def test_total_bytes_processed_missing(self):
- query = self._make_one(self._makeResource())
- self.assertIsNone(query.total_bytes_processed)
-
- def test_total_bytes_processed_present_integer(self):
- resource = self._makeResource()
- resource['totalBytesProcessed'] = 123456
- query = self._make_one(resource)
- self.assertEqual(query.total_bytes_processed, 123456)
-
- def test_total_bytes_processed_present_string(self):
- resource = self._makeResource()
- resource['totalBytesProcessed'] = '123456'
- query = self._make_one(resource)
- self.assertEqual(query.total_bytes_processed, 123456)
-
- def test_num_dml_affected_rows_missing(self):
- query = self._make_one(self._makeResource())
- self.assertIsNone(query.num_dml_affected_rows)
-
- def test_num_dml_affected_rows_present_integer(self):
- resource = self._makeResource()
- resource['numDmlAffectedRows'] = 123456
- query = self._make_one(resource)
- self.assertEqual(query.num_dml_affected_rows, 123456)
-
- def test_num_dml_affected_rows_present_string(self):
- resource = self._makeResource()
- resource['numDmlAffectedRows'] = '123456'
- query = self._make_one(resource)
- self.assertEqual(query.num_dml_affected_rows, 123456)
-
- def test_schema(self):
- query = self._make_one(self._makeResource())
- self._verifySchema(query, self._makeResource())
- resource = self._makeResource()
- resource['schema'] = {
- 'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'},
- ],
- }
- query._set_properties(resource)
- self._verifySchema(query, resource)
-
-
-class Test__query_param_from_api_repr(unittest.TestCase):
-
- @staticmethod
- def _call_fut(resource):
- from google.cloud.bigquery.query import _query_param_from_api_repr
-
- return _query_param_from_api_repr(resource)
-
- def test_w_scalar(self):
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {'type': 'INT64'},
- 'parameterValue': {'value': '123'},
- }
-
- parameter = self._call_fut(RESOURCE)
-
- self.assertIsInstance(parameter, ScalarQueryParameter)
- self.assertEqual(parameter.name, 'foo')
- self.assertEqual(parameter.type_, 'INT64')
- self.assertEqual(parameter.value, 123)
-
- def test_w_scalar_timestamp(self):
- from google.cloud._helpers import UTC
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- RESOURCE = {
- 'name': 'zoned',
- 'parameterType': {'type': 'TIMESTAMP'},
- 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'},
- }
-
- parameter = self._call_fut(RESOURCE)
-
- self.assertIsInstance(parameter, ScalarQueryParameter)
- self.assertEqual(parameter.name, 'zoned')
- self.assertEqual(parameter.type_, 'TIMESTAMP')
- self.assertEqual(
- parameter.value,
- datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC))
-
- def test_w_scalar_timestamp_micros(self):
- from google.cloud._helpers import UTC
- from google.cloud.bigquery.query import ScalarQueryParameter
-
- RESOURCE = {
- 'name': 'zoned',
- 'parameterType': {'type': 'TIMESTAMP'},
- 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'},
- }
-
- parameter = self._call_fut(RESOURCE)
-
- self.assertIsInstance(parameter, ScalarQueryParameter)
- self.assertEqual(parameter.name, 'zoned')
- self.assertEqual(parameter.type_, 'TIMESTAMP')
- self.assertEqual(
- parameter.value,
- datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC))
-
- def test_w_array(self):
- from google.cloud.bigquery.query import ArrayQueryParameter
-
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'ARRAY',
- 'arrayType': {'type': 'INT64'},
- },
- 'parameterValue': {
- 'arrayValues': [
- {'value': '123'},
- ]},
- }
-
- parameter = self._call_fut(RESOURCE)
-
- self.assertIsInstance(parameter, ArrayQueryParameter)
- self.assertEqual(parameter.name, 'foo')
- self.assertEqual(parameter.array_type, 'INT64')
- self.assertEqual(parameter.values, [123])
-
- def test_w_struct(self):
- from google.cloud.bigquery.query import StructQueryParameter
-
- RESOURCE = {
- 'name': 'foo',
- 'parameterType': {
- 'type': 'STRUCT',
- 'structTypes': [
- {'name': 'foo', 'type': {'type': 'STRING'}},
- {'name': 'bar', 'type': {'type': 'INT64'}},
- ],
- },
- 'parameterValue': {
- 'structValues': {
- 'foo': {'value': 'Foo'},
- 'bar': {'value': '123'},
- }
- },
- }
-
- parameter = self._call_fut(RESOURCE)
-
- self.assertIsInstance(parameter, StructQueryParameter)
- self.assertEqual(parameter.name, 'foo')
- self.assertEqual(
- parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'})
- self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123})
diff --git a/bigquery/tests/unit/test_schema.py b/bigquery/tests/unit/test_schema.py
deleted file mode 100644
index 84e5d30..0000000
--- a/bigquery/tests/unit/test_schema.py
+++ /dev/null
@@ -1,367 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import mock
-
-
-class TestSchemaField(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.schema import SchemaField
-
- return SchemaField
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_constructor_defaults(self):
- field = self._make_one('test', 'STRING')
- self.assertEqual(field._name, 'test')
- self.assertEqual(field._field_type, 'STRING')
- self.assertEqual(field._mode, 'NULLABLE')
- self.assertIsNone(field._description)
- self.assertEqual(field._fields, ())
-
- def test_constructor_explicit(self):
- field = self._make_one('test', 'STRING', mode='REQUIRED',
- description='Testing')
- self.assertEqual(field._name, 'test')
- self.assertEqual(field._field_type, 'STRING')
- self.assertEqual(field._mode, 'REQUIRED')
- self.assertEqual(field._description, 'Testing')
- self.assertEqual(field._fields, ())
-
- def test_constructor_subfields(self):
- sub_field1 = self._make_one('area_code', 'STRING')
- sub_field2 = self._make_one('local_number', 'STRING')
- field = self._make_one(
- 'phone_number',
- 'RECORD',
- fields=[sub_field1, sub_field2],
- )
- self.assertEqual(field._name, 'phone_number')
- self.assertEqual(field._field_type, 'RECORD')
- self.assertEqual(field._mode, 'NULLABLE')
- self.assertIsNone(field._description)
- self.assertEqual(len(field._fields), 2)
- self.assertIs(field._fields[0], sub_field1)
- self.assertIs(field._fields[1], sub_field2)
-
- def test_to_api_repr(self):
- field = self._make_one('foo', 'INTEGER', 'NULLABLE')
- self.assertEqual(field.to_api_repr(), {
- 'mode': 'nullable',
- 'name': 'foo',
- 'type': 'integer',
- })
-
- def test_to_api_repr_with_subfield(self):
- subfield = self._make_one('bar', 'INTEGER', 'NULLABLE')
- field = self._make_one('foo', 'RECORD', 'REQUIRED', fields=(subfield,))
- self.assertEqual(field.to_api_repr(), {
- 'fields': [{
- 'mode': 'nullable',
- 'name': 'bar',
- 'type': 'integer',
- }],
- 'mode': 'required',
- 'name': 'foo',
- 'type': 'record',
- })
-
- def test_from_api_repr(self):
- field = self._get_target_class().from_api_repr({
- 'fields': [{
- 'mode': 'nullable',
- 'name': 'bar',
- 'type': 'integer',
- }],
- 'mode': 'required',
- 'name': 'foo',
- 'type': 'record',
- })
- self.assertEqual(field.name, 'foo')
- self.assertEqual(field.field_type, 'RECORD')
- self.assertEqual(field.mode, 'REQUIRED')
- self.assertEqual(len(field.fields), 1)
- self.assertEqual(field.fields[0].name, 'bar')
- self.assertEqual(field.fields[0].field_type, 'INTEGER')
- self.assertEqual(field.fields[0].mode, 'NULLABLE')
-
- def test_name_property(self):
- name = 'lemon-ness'
- schema_field = self._make_one(name, 'INTEGER')
- self.assertIs(schema_field.name, name)
-
- def test_field_type_property(self):
- field_type = 'BOOLEAN'
- schema_field = self._make_one('whether', field_type)
- self.assertIs(schema_field.field_type, field_type)
-
- def test_mode_property(self):
- mode = 'REPEATED'
- schema_field = self._make_one('again', 'FLOAT', mode=mode)
- self.assertIs(schema_field.mode, mode)
-
- def test_is_nullable(self):
- mode = 'NULLABLE'
- schema_field = self._make_one('test', 'FLOAT', mode=mode)
- self.assertTrue(schema_field.is_nullable)
-
- def test_is_not_nullable(self):
- mode = 'REPEATED'
- schema_field = self._make_one('test', 'FLOAT', mode=mode)
- self.assertFalse(schema_field.is_nullable)
-
- def test_description_property(self):
- description = 'It holds some data.'
- schema_field = self._make_one(
- 'do', 'TIMESTAMP', description=description)
- self.assertIs(schema_field.description, description)
-
- def test_fields_property(self):
- sub_field1 = self._make_one('one', 'STRING')
- sub_field2 = self._make_one('fish', 'INTEGER')
- fields = (sub_field1, sub_field2)
- schema_field = self._make_one('boat', 'RECORD', fields=fields)
- self.assertIs(schema_field.fields, fields)
-
- def test___eq___wrong_type(self):
- field = self._make_one('test', 'STRING')
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___eq___name_mismatch(self):
- field = self._make_one('test', 'STRING')
- other = self._make_one('other', 'STRING')
- self.assertNotEqual(field, other)
-
- def test___eq___field_type_mismatch(self):
- field = self._make_one('test', 'STRING')
- other = self._make_one('test', 'INTEGER')
- self.assertNotEqual(field, other)
-
- def test___eq___mode_mismatch(self):
- field = self._make_one('test', 'STRING', mode='REQUIRED')
- other = self._make_one('test', 'STRING', mode='NULLABLE')
- self.assertNotEqual(field, other)
-
- def test___eq___description_mismatch(self):
- field = self._make_one('test', 'STRING', description='Testing')
- other = self._make_one('test', 'STRING', description='Other')
- self.assertNotEqual(field, other)
-
- def test___eq___fields_mismatch(self):
- sub1 = self._make_one('sub1', 'STRING')
- sub2 = self._make_one('sub2', 'STRING')
- field = self._make_one('test', 'RECORD', fields=[sub1])
- other = self._make_one('test', 'RECORD', fields=[sub2])
- self.assertNotEqual(field, other)
-
- def test___eq___hit(self):
- field = self._make_one('test', 'STRING', mode='REQUIRED',
- description='Testing')
- other = self._make_one('test', 'STRING', mode='REQUIRED',
- description='Testing')
- self.assertEqual(field, other)
-
- def test___eq___hit_case_diff_on_type(self):
- field = self._make_one('test', 'STRING', mode='REQUIRED',
- description='Testing')
- other = self._make_one('test', 'string', mode='REQUIRED',
- description='Testing')
- self.assertEqual(field, other)
-
- def test___eq___hit_w_fields(self):
- sub1 = self._make_one('sub1', 'STRING')
- sub2 = self._make_one('sub2', 'STRING')
- field = self._make_one('test', 'RECORD', fields=[sub1, sub2])
- other = self._make_one('test', 'RECORD', fields=[sub1, sub2])
- self.assertEqual(field, other)
-
- def test___ne___wrong_type(self):
- field = self._make_one('toast', 'INTEGER')
- other = object()
- self.assertNotEqual(field, other)
- self.assertEqual(field, mock.ANY)
-
- def test___ne___same_value(self):
- field1 = self._make_one('test', 'TIMESTAMP', mode='REPEATED')
- field2 = self._make_one('test', 'TIMESTAMP', mode='REPEATED')
- # unittest ``assertEqual`` uses ``==`` not ``!=``.
- comparison_val = (field1 != field2)
- self.assertFalse(comparison_val)
-
- def test___ne___different_values(self):
- field1 = self._make_one(
- 'test1', 'FLOAT', mode='REPEATED', description='Not same')
- field2 = self._make_one(
- 'test2', 'FLOAT', mode='NULLABLE', description='Knot saym')
- self.assertNotEqual(field1, field2)
-
- def test___hash__set_equality(self):
- sub1 = self._make_one('sub1', 'STRING')
- sub2 = self._make_one('sub2', 'STRING')
- field1 = self._make_one('test', 'RECORD', fields=[sub1])
- field2 = self._make_one('test', 'RECORD', fields=[sub2])
- set_one = {field1, field2}
- set_two = {field1, field2}
- self.assertEqual(set_one, set_two)
-
- def test___hash__not_equals(self):
- sub1 = self._make_one('sub1', 'STRING')
- sub2 = self._make_one('sub2', 'STRING')
- field1 = self._make_one('test', 'RECORD', fields=[sub1])
- field2 = self._make_one('test', 'RECORD', fields=[sub2])
- set_one = {field1}
- set_two = {field2}
- self.assertNotEqual(set_one, set_two)
-
- def test___repr__(self):
- field1 = self._make_one('field1', 'STRING')
- expected = "SchemaField('field1', 'string', 'NULLABLE', None, ())"
- self.assertEqual(repr(field1), expected)
-
-
-# TODO: dedup with the same class in test_table.py.
-class _SchemaBase(object):
-
- def _verify_field(self, field, r_field):
- self.assertEqual(field.name, r_field['name'])
- self.assertEqual(field.field_type, r_field['type'])
- self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE'))
-
- def _verifySchema(self, schema, resource):
- r_fields = resource['schema']['fields']
- self.assertEqual(len(schema), len(r_fields))
-
- for field, r_field in zip(schema, r_fields):
- self._verify_field(field, r_field)
-
-
-class Test_parse_schema_resource(unittest.TestCase, _SchemaBase):
-
- def _call_fut(self, resource):
- from google.cloud.bigquery.schema import _parse_schema_resource
-
- return _parse_schema_resource(resource)
-
- def _makeResource(self):
- return {
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'},
- ]},
- }
-
- def test__parse_schema_resource_defaults(self):
- RESOURCE = self._makeResource()
- schema = self._call_fut(RESOURCE['schema'])
- self._verifySchema(schema, RESOURCE)
-
- def test__parse_schema_resource_subfields(self):
- RESOURCE = self._makeResource()
- RESOURCE['schema']['fields'].append(
- {'name': 'phone',
- 'type': 'RECORD',
- 'mode': 'REPEATED',
- 'fields': [{'name': 'type',
- 'type': 'STRING',
- 'mode': 'REQUIRED'},
- {'name': 'number',
- 'type': 'STRING',
- 'mode': 'REQUIRED'}]})
- schema = self._call_fut(RESOURCE['schema'])
- self._verifySchema(schema, RESOURCE)
-
- def test__parse_schema_resource_fields_without_mode(self):
- RESOURCE = self._makeResource()
- RESOURCE['schema']['fields'].append(
- {'name': 'phone',
- 'type': 'STRING'})
-
- schema = self._call_fut(RESOURCE['schema'])
- self._verifySchema(schema, RESOURCE)
-
-
-class Test_build_schema_resource(unittest.TestCase, _SchemaBase):
-
- def _call_fut(self, resource):
- from google.cloud.bigquery.schema import _build_schema_resource
-
- return _build_schema_resource(resource)
-
- def test_defaults(self):
- from google.cloud.bigquery.schema import SchemaField
-
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- resource = self._call_fut([full_name, age])
- self.assertEqual(len(resource), 2)
- self.assertEqual(resource[0],
- {'name': 'full_name',
- 'type': 'STRING',
- 'mode': 'REQUIRED'})
- self.assertEqual(resource[1],
- {'name': 'age',
- 'type': 'INTEGER',
- 'mode': 'REQUIRED'})
-
- def test_w_description(self):
- from google.cloud.bigquery.schema import SchemaField
-
- DESCRIPTION = 'DESCRIPTION'
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED',
- description=DESCRIPTION)
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- resource = self._call_fut([full_name, age])
- self.assertEqual(len(resource), 2)
- self.assertEqual(resource[0],
- {'name': 'full_name',
- 'type': 'STRING',
- 'mode': 'REQUIRED',
- 'description': DESCRIPTION})
- self.assertEqual(resource[1],
- {'name': 'age',
- 'type': 'INTEGER',
- 'mode': 'REQUIRED'})
-
- def test_w_subfields(self):
- from google.cloud.bigquery.schema import SchemaField
-
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- ph_type = SchemaField('type', 'STRING', 'REQUIRED')
- ph_num = SchemaField('number', 'STRING', 'REQUIRED')
- phone = SchemaField('phone', 'RECORD', mode='REPEATED',
- fields=[ph_type, ph_num])
- resource = self._call_fut([full_name, phone])
- self.assertEqual(len(resource), 2)
- self.assertEqual(resource[0],
- {'name': 'full_name',
- 'type': 'STRING',
- 'mode': 'REQUIRED'})
- self.assertEqual(resource[1],
- {'name': 'phone',
- 'type': 'RECORD',
- 'mode': 'REPEATED',
- 'fields': [{'name': 'type',
- 'type': 'STRING',
- 'mode': 'REQUIRED'},
- {'name': 'number',
- 'type': 'STRING',
- 'mode': 'REQUIRED'}]})
diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py
deleted file mode 100644
index a40ab16..0000000
--- a/bigquery/tests/unit/test_table.py
+++ /dev/null
@@ -1,753 +0,0 @@
-# Copyright 2015 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import mock
-
-from google.cloud.bigquery.dataset import DatasetReference
-
-
-class _SchemaBase(object):
-
- def _verify_field(self, field, r_field):
- self.assertEqual(field.name, r_field['name'])
- self.assertEqual(field.field_type, r_field['type'])
- self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE'))
-
- def _verifySchema(self, schema, resource):
- r_fields = resource['schema']['fields']
- self.assertEqual(len(schema), len(r_fields))
-
- for field, r_field in zip(schema, r_fields):
- self._verify_field(field, r_field)
-
-
-class TestTableReference(unittest.TestCase):
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.table import TableReference
-
- return TableReference
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def test_ctor_defaults(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset_ref = DatasetReference('project_1', 'dataset_1')
-
- table_ref = self._make_one(dataset_ref, 'table_1')
- self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id)
- self.assertEqual(table_ref.table_id, 'table_1')
-
- def test_to_api_repr(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset_ref = DatasetReference('project_1', 'dataset_1')
- table_ref = self._make_one(dataset_ref, 'table_1')
-
- resource = table_ref.to_api_repr()
-
- self.assertEqual(
- resource,
- {
- 'projectId': 'project_1',
- 'datasetId': 'dataset_1',
- 'tableId': 'table_1',
- })
-
- def test_from_api_repr(self):
- from google.cloud.bigquery.dataset import DatasetReference
- from google.cloud.bigquery.table import TableReference
- dataset_ref = DatasetReference('project_1', 'dataset_1')
- expected = self._make_one(dataset_ref, 'table_1')
-
- got = TableReference.from_api_repr(
- {
- 'projectId': 'project_1',
- 'datasetId': 'dataset_1',
- 'tableId': 'table_1',
- })
-
- self.assertEqual(expected, got)
-
- def test___eq___wrong_type(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset_ref = DatasetReference('project_1', 'dataset_1')
- table = self._make_one(dataset_ref, 'table_1')
- other = object()
- self.assertNotEqual(table, other)
- self.assertEqual(table, mock.ANY)
-
- def test___eq___project_mismatch(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset = DatasetReference('project_1', 'dataset_1')
- other_dataset = DatasetReference('project_2', 'dataset_1')
- table = self._make_one(dataset, 'table_1')
- other = self._make_one(other_dataset, 'table_1')
- self.assertNotEqual(table, other)
-
- def test___eq___dataset_mismatch(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset = DatasetReference('project_1', 'dataset_1')
- other_dataset = DatasetReference('project_1', 'dataset_2')
- table = self._make_one(dataset, 'table_1')
- other = self._make_one(other_dataset, 'table_1')
- self.assertNotEqual(table, other)
-
- def test___eq___table_mismatch(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset = DatasetReference('project_1', 'dataset_1')
- table = self._make_one(dataset, 'table_1')
- other = self._make_one(dataset, 'table_2')
- self.assertNotEqual(table, other)
-
- def test___eq___equality(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset = DatasetReference('project_1', 'dataset_1')
- table = self._make_one(dataset, 'table_1')
- other = self._make_one(dataset, 'table_1')
- self.assertEqual(table, other)
-
- def test___hash__set_equality(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset = DatasetReference('project_1', 'dataset_1')
- table1 = self._make_one(dataset, 'table1')
- table2 = self._make_one(dataset, 'table2')
- set_one = {table1, table2}
- set_two = {table1, table2}
- self.assertEqual(set_one, set_two)
-
- def test___hash__not_equals(self):
- from google.cloud.bigquery.dataset import DatasetReference
- dataset = DatasetReference('project_1', 'dataset_1')
- table1 = self._make_one(dataset, 'table1')
- table2 = self._make_one(dataset, 'table2')
- set_one = {table1}
- set_two = {table2}
- self.assertNotEqual(set_one, set_two)
-
- def test___repr__(self):
- dataset = DatasetReference('project1', 'dataset1')
- table1 = self._make_one(dataset, 'table1')
- expected = "TableReference('project1', 'dataset1', 'table1')"
- self.assertEqual(repr(table1), expected)
-
-
-class TestTable(unittest.TestCase, _SchemaBase):
-
- PROJECT = 'prahj-ekt'
- DS_ID = 'dataset-name'
- TABLE_NAME = 'table-name'
-
- @staticmethod
- def _get_target_class():
- from google.cloud.bigquery.table import Table
-
- return Table
-
- def _make_one(self, *args, **kw):
- return self._get_target_class()(*args, **kw)
-
- def _setUpConstants(self):
- import datetime
- from google.cloud._helpers import UTC
-
- self.WHEN_TS = 1437767599.006
- self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(
- tzinfo=UTC)
- self.ETAG = 'ETAG'
- self.TABLE_FULL_ID = '%s:%s:%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_NAME)
- self.RESOURCE_URL = 'http://example.com/path/to/resource'
- self.NUM_BYTES = 12345
- self.NUM_ROWS = 67
- self.NUM_EST_BYTES = 1234
- self.NUM_EST_ROWS = 23
-
- def _makeResource(self):
- self._setUpConstants()
- return {
- 'creationTime': self.WHEN_TS * 1000,
- 'tableReference':
- {'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_NAME},
- 'schema': {'fields': [
- {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
- {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]},
- 'etag': 'ETAG',
- 'id': self.TABLE_FULL_ID,
- 'lastModifiedTime': self.WHEN_TS * 1000,
- 'location': 'US',
- 'selfLink': self.RESOURCE_URL,
- 'numRows': self.NUM_ROWS,
- 'numBytes': self.NUM_BYTES,
- 'type': 'TABLE',
- 'streamingBuffer': {
- 'estimatedRows': str(self.NUM_EST_ROWS),
- 'estimatedBytes': str(self.NUM_EST_BYTES),
- 'oldestEntryTime': self.WHEN_TS * 1000},
- 'externalDataConfiguration': {
- 'sourceFormat': 'CSV',
- 'csvOptions': {
- 'allowJaggedRows': True,
- 'encoding': 'encoding'}},
- 'labels': {'x': 'y'},
- }
-
- def _verifyReadonlyResourceProperties(self, table, resource):
- if 'creationTime' in resource:
- self.assertEqual(table.created, self.WHEN)
- else:
- self.assertIsNone(table.created)
-
- if 'etag' in resource:
- self.assertEqual(table.etag, self.ETAG)
- else:
- self.assertIsNone(table.etag)
-
- if 'numRows' in resource:
- self.assertEqual(table.num_rows, self.NUM_ROWS)
- else:
- self.assertIsNone(table.num_rows)
-
- if 'numBytes' in resource:
- self.assertEqual(table.num_bytes, self.NUM_BYTES)
- else:
- self.assertIsNone(table.num_bytes)
-
- if 'selfLink' in resource:
- self.assertEqual(table.self_link, self.RESOURCE_URL)
- else:
- self.assertIsNone(table.self_link)
-
- if 'streamingBuffer' in resource:
- self.assertEqual(table.streaming_buffer.estimated_rows,
- self.NUM_EST_ROWS)
- self.assertEqual(table.streaming_buffer.estimated_bytes,
- self.NUM_EST_BYTES)
- self.assertEqual(table.streaming_buffer.oldest_entry_time,
- self.WHEN)
- else:
- self.assertIsNone(table.streaming_buffer)
-
- self.assertEqual(table.full_table_id, self.TABLE_FULL_ID)
- self.assertEqual(table.table_type,
- 'TABLE' if 'view' not in resource else 'VIEW')
-
- def _verifyResourceProperties(self, table, resource):
-
- self._verifyReadonlyResourceProperties(table, resource)
-
- if 'expirationTime' in resource:
- self.assertEqual(table.expires, self.EXP_TIME)
- else:
- self.assertIsNone(table.expires)
-
- self.assertEqual(table.description, resource.get('description'))
- self.assertEqual(table.friendly_name, resource.get('friendlyName'))
- self.assertEqual(table.location, resource.get('location'))
-
- if 'view' in resource:
- self.assertEqual(table.view_query, resource['view']['query'])
- self.assertEqual(
- table.view_use_legacy_sql,
- resource['view'].get('useLegacySql', True))
- else:
- self.assertIsNone(table.view_query)
- self.assertIsNone(table.view_use_legacy_sql)
-
- if 'schema' in resource:
- self._verifySchema(table.schema, resource)
- else:
- self.assertEqual(table.schema, [])
-
- if 'externalDataConfiguration' in resource:
- edc = table.external_data_configuration
- self.assertEqual(edc.source_format, 'CSV')
- self.assertEqual(edc.options.allow_jagged_rows, True)
-
- if 'labels' in resource:
- self.assertEqual(table.labels, {'x': 'y'})
- else:
- self.assertEqual(table.labels, {})
-
- def test_ctor(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
-
- self.assertEqual(table.table_id, self.TABLE_NAME)
- self.assertEqual(table.project, self.PROJECT)
- self.assertEqual(table.dataset_id, self.DS_ID)
- self.assertEqual(
- table.path,
- '/projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_NAME))
- self.assertEqual(table.schema, [])
-
- self.assertIsNone(table.created)
- self.assertIsNone(table.etag)
- self.assertIsNone(table.modified)
- self.assertIsNone(table.num_bytes)
- self.assertIsNone(table.num_rows)
- self.assertIsNone(table.self_link)
- self.assertIsNone(table.full_table_id)
- self.assertIsNone(table.table_type)
- self.assertIsNone(table.description)
- self.assertIsNone(table.expires)
- self.assertIsNone(table.friendly_name)
- self.assertIsNone(table.location)
- self.assertIsNone(table.view_query)
- self.assertIsNone(table.view_use_legacy_sql)
- self.assertIsNone(table.external_data_configuration)
- self.assertEquals(table.labels, {})
-
- def test_ctor_w_schema(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
-
- self.assertEqual(table.schema, [full_name, age])
-
- def test_num_bytes_getter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
-
- # Check with no value set.
- self.assertIsNone(table.num_bytes)
-
- num_bytes = 1337
- # Check with integer value set.
- table._properties = {'numBytes': num_bytes}
- self.assertEqual(table.num_bytes, num_bytes)
-
- # Check with a string value set.
- table._properties = {'numBytes': str(num_bytes)}
- self.assertEqual(table.num_bytes, num_bytes)
-
- # Check with invalid int value.
- table._properties = {'numBytes': 'x'}
- with self.assertRaises(ValueError):
- getattr(table, 'num_bytes')
-
- def test_num_rows_getter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
-
- # Check with no value set.
- self.assertIsNone(table.num_rows)
-
- num_rows = 42
- # Check with integer value set.
- table._properties = {'numRows': num_rows}
- self.assertEqual(table.num_rows, num_rows)
-
- # Check with a string value set.
- table._properties = {'numRows': str(num_rows)}
- self.assertEqual(table.num_rows, num_rows)
-
- # Check with invalid int value.
- table._properties = {'numRows': 'x'}
- with self.assertRaises(ValueError):
- getattr(table, 'num_rows')
-
- def test_schema_setter_non_list(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(TypeError):
- table.schema = object()
-
- def test_schema_setter_invalid_field(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- with self.assertRaises(ValueError):
- table.schema = [full_name, object()]
-
- def test_schema_setter(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table.schema = [full_name, age]
- self.assertEqual(table.schema, [full_name, age])
-
- def test_props_set_by_server(self):
- import datetime
- from google.cloud._helpers import UTC
- from google.cloud._helpers import _millis
-
- CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC)
- MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC)
- TABLE_FULL_ID = '%s:%s:%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_NAME)
- URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % (
- self.PROJECT, self.DS_ID, self.TABLE_NAME)
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table._properties['creationTime'] = _millis(CREATED)
- table._properties['etag'] = 'ETAG'
- table._properties['lastModifiedTime'] = _millis(MODIFIED)
- table._properties['numBytes'] = 12345
- table._properties['numRows'] = 66
- table._properties['selfLink'] = URL
- table._properties['id'] = TABLE_FULL_ID
- table._properties['type'] = 'TABLE'
-
- self.assertEqual(table.created, CREATED)
- self.assertEqual(table.etag, 'ETAG')
- self.assertEqual(table.modified, MODIFIED)
- self.assertEqual(table.num_bytes, 12345)
- self.assertEqual(table.num_rows, 66)
- self.assertEqual(table.self_link, URL)
- self.assertEqual(table.full_table_id, TABLE_FULL_ID)
- self.assertEqual(table.table_type, 'TABLE')
-
- def test_description_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.description = 12345
-
- def test_description_setter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table.description = 'DESCRIPTION'
- self.assertEqual(table.description, 'DESCRIPTION')
-
- def test_expires_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.expires = object()
-
- def test_expires_setter(self):
- import datetime
- from google.cloud._helpers import UTC
-
- WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC)
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table.expires = WHEN
- self.assertEqual(table.expires, WHEN)
-
- def test_friendly_name_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.friendly_name = 12345
-
- def test_friendly_name_setter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table.friendly_name = 'FRIENDLY'
- self.assertEqual(table.friendly_name, 'FRIENDLY')
-
- def test_location_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.location = 12345
-
- def test_location_setter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table.location = 'LOCATION'
- self.assertEqual(table.location, 'LOCATION')
-
- def test_view_query_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.view_query = 12345
-
- def test_view_query_setter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table.view_query = 'select * from foo'
- self.assertEqual(table.view_query, 'select * from foo')
- self.assertEqual(table.view_use_legacy_sql, False)
-
- table.view_use_legacy_sql = True
- self.assertEqual(table.view_use_legacy_sql, True)
-
- def test_view_query_deleter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table.view_query = 'select * from foo'
- del table.view_query
- self.assertIsNone(table.view_query)
- self.assertIsNone(table.view_use_legacy_sql)
-
- def test_view_use_legacy_sql_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.view_use_legacy_sql = 12345
-
- def test_view_use_legacy_sql_setter(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- table.view_use_legacy_sql = True
- table.view_query = 'select * from foo'
- self.assertEqual(table.view_use_legacy_sql, True)
- self.assertEqual(table.view_query, 'select * from foo')
-
- def test_external_data_configuration_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.external_data_configuration = 12345
-
- def test_labels_setter_bad_value(self):
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = self._make_one(table_ref)
- with self.assertRaises(ValueError):
- table.labels = 12345
-
- def test_from_api_repr_missing_identity(self):
- self._setUpConstants()
- RESOURCE = {}
- klass = self._get_target_class()
- with self.assertRaises(KeyError):
- klass.from_api_repr(RESOURCE)
-
- def test_from_api_repr_bare(self):
- self._setUpConstants()
- RESOURCE = {
- 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME),
- 'tableReference': {
- 'projectId': self.PROJECT,
- 'datasetId': self.DS_ID,
- 'tableId': self.TABLE_NAME,
- },
- 'type': 'TABLE',
- }
- klass = self._get_target_class()
- table = klass.from_api_repr(RESOURCE)
- self.assertEqual(table.table_id, self.TABLE_NAME)
- self._verifyResourceProperties(table, RESOURCE)
-
- def test_from_api_repr_w_properties(self):
- import datetime
- from google.cloud._helpers import UTC
- from google.cloud._helpers import _millis
-
- RESOURCE = self._makeResource()
- RESOURCE['view'] = {'query': 'select fullname, age from person_ages'}
- RESOURCE['type'] = 'VIEW'
- RESOURCE['location'] = 'EU'
- self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC)
- RESOURCE['expirationTime'] = _millis(self.EXP_TIME)
- klass = self._get_target_class()
- table = klass.from_api_repr(RESOURCE)
- self._verifyResourceProperties(table, RESOURCE)
-
- def test_partition_type_setter_bad_type(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- with self.assertRaises(ValueError):
- table.partitioning_type = 123
-
- def test_partition_type_setter_unknown_value(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- with self.assertRaises(ValueError):
- table.partitioning_type = "HASH"
-
- def test_partition_type_setter_w_known_value(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- self.assertIsNone(table.partitioning_type)
- table.partitioning_type = 'DAY'
- self.assertEqual(table.partitioning_type, 'DAY')
-
- def test_partition_type_setter_w_none(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- table._properties['timePartitioning'] = {'type': 'DAY'}
- table.partitioning_type = None
- self.assertIsNone(table.partitioning_type)
- self.assertFalse('timePartitioning' in table._properties)
-
- def test_partition_experation_bad_type(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- with self.assertRaises(ValueError):
- table.partition_expiration = "NEVER"
-
- def test_partition_expiration_w_integer(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- self.assertIsNone(table.partition_expiration)
- table.partition_expiration = 100
- self.assertEqual(table.partitioning_type, "DAY")
- self.assertEqual(table.partition_expiration, 100)
-
- def test_partition_expiration_w_none(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- self.assertIsNone(table.partition_expiration)
- table._properties['timePartitioning'] = {
- 'type': 'DAY',
- 'expirationMs': 100,
- }
- table.partition_expiration = None
- self.assertEqual(table.partitioning_type, "DAY")
- self.assertIsNone(table.partition_expiration)
-
- def test_partition_expiration_w_none_no_partition_set(self):
- from google.cloud.bigquery.table import SchemaField
-
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- table = self._make_one(table_ref, schema=[full_name, age])
- self.assertIsNone(table.partition_expiration)
- table.partition_expiration = None
- self.assertIsNone(table.partitioning_type)
- self.assertIsNone(table.partition_expiration)
-
-
-class Test_row_from_mapping(unittest.TestCase, _SchemaBase):
-
- PROJECT = 'prahj-ekt'
- DS_ID = 'dataset-name'
- TABLE_NAME = 'table-name'
-
- def _call_fut(self, mapping, schema):
- from google.cloud.bigquery.table import _row_from_mapping
-
- return _row_from_mapping(mapping, schema)
-
- def test__row_from_mapping_wo_schema(self):
- from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
- MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32}
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- table = Table(table_ref)
-
- with self.assertRaises(ValueError) as exc:
- self._call_fut(MAPPING, table.schema)
-
- self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,))
-
- def test__row_from_mapping_w_invalid_schema(self):
- from google.cloud.bigquery.table import Table, SchemaField
- MAPPING = {
- 'full_name': 'Phred Phlyntstone',
- 'age': 32,
- 'colors': ['red', 'green'],
- 'bogus': 'WHATEVER',
- }
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- colors = SchemaField('colors', 'DATETIME', mode='REPEATED')
- bogus = SchemaField('joined', 'STRING', mode='BOGUS')
- table = Table(table_ref, schema=[full_name, age, colors, bogus])
-
- with self.assertRaises(ValueError) as exc:
- self._call_fut(MAPPING, table.schema)
-
- self.assertIn('Unknown field mode: BOGUS', str(exc.exception))
-
- def test__row_from_mapping_w_schema(self):
- from google.cloud.bigquery.table import Table, SchemaField
- MAPPING = {
- 'full_name': 'Phred Phlyntstone',
- 'age': 32,
- 'colors': ['red', 'green'],
- 'extra': 'IGNORED',
- }
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- table_ref = dataset.table(self.TABLE_NAME)
- full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
- age = SchemaField('age', 'INTEGER', mode='REQUIRED')
- colors = SchemaField('colors', 'DATETIME', mode='REPEATED')
- joined = SchemaField('joined', 'STRING', mode='NULLABLE')
- table = Table(table_ref, schema=[full_name, age, colors, joined])
-
- self.assertEqual(
- self._call_fut(MAPPING, table.schema),
- ('Phred Phlyntstone', 32, ['red', 'green'], None))
diff --git a/docs/bigquery/snippets.py b/docs/bigquery/snippets.py
deleted file mode 100644
index 3ae8486..0000000
--- a/docs/bigquery/snippets.py
+++ /dev/null
@@ -1,639 +0,0 @@
-# Copyright 2016 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Testable usage examples for Google BigQuery API wrapper
-
-Each example function takes a ``client`` argument (which must be an instance
-of :class:`google.cloud.bigquery.client.Client`) and uses it to perform a task
-with the API.
-
-To facilitate running the examples as system tests, each example is also passed
-a ``to_delete`` list; the function adds to the list any objects created which
-need to be deleted during teardown.
-"""
-
-import time
-
-import pytest
-import six
-
-from google.cloud import bigquery
-
-ORIGINAL_FRIENDLY_NAME = 'Original friendly name'
-ORIGINAL_DESCRIPTION = 'Original description'
-LOCALLY_CHANGED_FRIENDLY_NAME = 'Locally-changed friendly name'
-LOCALLY_CHANGED_DESCRIPTION = 'Locally-changed description'
-UPDATED_FRIENDLY_NAME = 'Updated friendly name'
-UPDATED_DESCRIPTION = 'Updated description'
-
-SCHEMA = [
- bigquery.SchemaField('full_name', 'STRING', mode='required'),
- bigquery.SchemaField('age', 'INTEGER', mode='required'),
-]
-
-ROWS = [
- ('Phred Phlyntstone', 32),
- ('Bharney Rhubble', 33),
- ('Wylma Phlyntstone', 29),
- ('Bhettye Rhubble', 27),
-]
-
-QUERY = (
- 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
- 'WHERE state = "TX"')
-
-
-@pytest.fixture(scope='module')
-def client():
- return bigquery.Client()
-
-
-@pytest.fixture
-def to_delete(client):
- doomed = []
- yield doomed
- for item in doomed:
- if isinstance(item, bigquery.Dataset):
- client.delete_dataset(item)
- elif isinstance(item, bigquery.Table):
- client.delete_table(item)
- else:
- item.delete()
-
-
-def _millis():
- return time.time() * 1000
-
-
-class _CloseOnDelete(object):
-
- def __init__(self, wrapped):
- self._wrapped = wrapped
-
- def delete(self):
- self._wrapped.close()
-
-
-def test_client_list_datasets(client):
- """List datasets for a project."""
-
- def do_something_with(_):
- pass
-
- # [START client_list_datasets]
- for dataset in client.list_datasets(): # API request(s)
- do_something_with(dataset)
- # [END client_list_datasets]
-
-
-def test_create_dataset(client, to_delete):
- """Create a dataset."""
- DATASET_ID = 'create_dataset_%d' % (_millis(),)
-
- # [START create_dataset]
- # DATASET_ID = 'dataset_ids_are_strings'
- dataset_ref = client.dataset(DATASET_ID)
- dataset = bigquery.Dataset(dataset_ref)
- dataset.description = 'my dataset'
- dataset = client.create_dataset(dataset) # API request
- # [END create_dataset]
-
- to_delete.append(dataset)
-
-
-def test_get_dataset(client, to_delete):
- """Reload a dataset's metadata."""
- DATASET_ID = 'get_dataset_%d' % (_millis(),)
- dataset_ref = client.dataset(DATASET_ID)
- dataset = bigquery.Dataset(dataset_ref)
- dataset.description = ORIGINAL_DESCRIPTION
- dataset = client.create_dataset(dataset) # API request
- to_delete.append(dataset)
-
- # [START get_dataset]
- assert dataset.description == ORIGINAL_DESCRIPTION
- dataset.description = LOCALLY_CHANGED_DESCRIPTION
- assert dataset.description == LOCALLY_CHANGED_DESCRIPTION
- dataset = client.get_dataset(dataset) # API request
- assert dataset.description == ORIGINAL_DESCRIPTION
- # [END get_dataset]
-
-
-def test_update_dataset_simple(client, to_delete):
- """Update a dataset's metadata."""
- DATASET_ID = 'update_dataset_simple_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dataset.description = ORIGINAL_DESCRIPTION
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START update_dataset_simple]
- assert dataset.description == ORIGINAL_DESCRIPTION
- dataset.description = UPDATED_DESCRIPTION
-
- dataset = client.update_dataset(dataset, ['description']) # API request
-
- assert dataset.description == UPDATED_DESCRIPTION
- # [END update_dataset_simple]
-
-
-def test_update_dataset_multiple_properties(client, to_delete):
- """Update a dataset's metadata."""
- DATASET_ID = 'update_dataset_multiple_properties_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dataset.description = ORIGINAL_DESCRIPTION
- dataset = client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START update_dataset_multiple_properties]
- assert dataset.description == ORIGINAL_DESCRIPTION
- assert dataset.default_table_expiration_ms is None
- entry = bigquery.AccessEntry(
- role='READER', entity_type='domain', entity_id='example.com')
- assert entry not in dataset.access_entries
- ONE_DAY_MS = 24 * 60 * 60 * 1000 # in milliseconds
- dataset.description = UPDATED_DESCRIPTION
- dataset.default_table_expiration_ms = ONE_DAY_MS
- entries = list(dataset.access_entries)
- entries.append(entry)
- dataset.access_entries = entries
-
- dataset = client.update_dataset(
- dataset,
- ['description', 'default_table_expiration_ms', 'access_entries']
- ) # API request
-
- assert dataset.description == UPDATED_DESCRIPTION
- assert dataset.default_table_expiration_ms == ONE_DAY_MS
- assert entry in dataset.access_entries
- # [END update_dataset_multiple_properties]
-
-
-def test_delete_dataset(client):
- """Delete a dataset."""
- DATASET_ID = 'delete_dataset_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- client.create_dataset(dataset)
-
- # [START delete_dataset]
- from google.cloud.exceptions import NotFound
-
- client.delete_dataset(dataset) # API request
-
- with pytest.raises(NotFound):
- client.get_dataset(dataset) # API request
- # [END delete_dataset]
-
-
-def test_list_dataset_tables(client, to_delete):
- """List tables within a dataset."""
- DATASET_ID = 'list_dataset_tables_dataset_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dataset = client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START list_dataset_tables]
- tables = list(client.list_dataset_tables(dataset)) # API request(s)
- assert len(tables) == 0
-
- table_ref = dataset.table('my_table')
- table = bigquery.Table(table_ref)
- table.view_query = QUERY
- client.create_table(table) # API request
- tables = list(client.list_dataset_tables(dataset)) # API request(s)
-
- assert len(tables) == 1
- assert tables[0].table_id == 'my_table'
- # [END list_dataset_tables]
-
- to_delete.insert(0, table)
-
-
-def test_create_table(client, to_delete):
- """Create a table."""
- DATASET_ID = 'create_table_dataset_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START create_table]
- SCHEMA = [
- bigquery.SchemaField('full_name', 'STRING', mode='required'),
- bigquery.SchemaField('age', 'INTEGER', mode='required'),
- ]
- table_ref = dataset.table('my_table')
- table = bigquery.Table(table_ref, schema=SCHEMA)
- table = client.create_table(table) # API request
-
- assert table.table_id == 'my_table'
- # [END create_table]
-
- to_delete.insert(0, table)
-
-
-def test_get_table(client, to_delete):
- """Reload a table's metadata."""
- DATASET_ID = 'get_table_dataset_%d' % (_millis(),)
- TABLE_ID = 'get_table_table_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dataset = client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
- table.description = ORIGINAL_DESCRIPTION
- table = client.create_table(table)
- to_delete.insert(0, table)
-
- # [START get_table]
- assert table.description == ORIGINAL_DESCRIPTION
- table.description = LOCALLY_CHANGED_DESCRIPTION
- table = client.get_table(table) # API request
- assert table.description == ORIGINAL_DESCRIPTION
- # [END get_table]
-
-
-def test_update_table_simple(client, to_delete):
- """Patch a table's metadata."""
- DATASET_ID = 'update_table_simple_dataset_%d' % (_millis(),)
- TABLE_ID = 'update_table_simple_table_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dataset.description = ORIGINAL_DESCRIPTION
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
- table.description = ORIGINAL_DESCRIPTION
- table = client.create_table(table)
- to_delete.insert(0, table)
-
- # [START update_table_simple]
- assert table.description == ORIGINAL_DESCRIPTION
- table.description = UPDATED_DESCRIPTION
-
- table = client.update_table(table, ['description']) # API request
-
- assert table.description == UPDATED_DESCRIPTION
- # [END update_table_simple]
-
-
-def test_update_table_multiple_properties(client, to_delete):
- """Update a table's metadata."""
- DATASET_ID = 'update_table_multiple_properties_dataset_%d' % (_millis(),)
- TABLE_ID = 'update_table_multiple_properties_table_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dataset.description = ORIGINAL_DESCRIPTION
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
- table.friendly_name = ORIGINAL_FRIENDLY_NAME
- table.description = ORIGINAL_DESCRIPTION
- table = client.create_table(table)
- to_delete.insert(0, table)
-
- # [START update_table_multiple_properties]
- assert table.friendly_name == ORIGINAL_FRIENDLY_NAME
- assert table.description == ORIGINAL_DESCRIPTION
-
- NEW_SCHEMA = list(table.schema)
- NEW_SCHEMA.append(bigquery.SchemaField('phone', 'STRING'))
- table.friendly_name = UPDATED_FRIENDLY_NAME
- table.description = UPDATED_DESCRIPTION
- table.schema = NEW_SCHEMA
- table = client.update_table(
- table,
- ['schema', 'friendly_name', 'description']
- ) # API request
-
- assert table.friendly_name == UPDATED_FRIENDLY_NAME
- assert table.description == UPDATED_DESCRIPTION
- assert table.schema == NEW_SCHEMA
- # [END update_table_multiple_properties]
-
-
-def test_table_create_rows(client, to_delete):
- """Insert / fetch table data."""
- DATASET_ID = 'table_create_rows_dataset_%d' % (_millis(),)
- TABLE_ID = 'table_create_rows_table_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dataset = client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
- table = client.create_table(table)
- to_delete.insert(0, table)
-
- # [START table_create_rows]
- ROWS_TO_INSERT = [
- (u'Phred Phlyntstone', 32),
- (u'Wylma Phlyntstone', 29),
- ]
-
- errors = client.create_rows(table, ROWS_TO_INSERT) # API request
-
- assert errors == []
- # [END table_create_rows]
-
-
-def test_load_table_from_file(client, to_delete):
- """Upload table data from a CSV file."""
- DATASET_ID = 'table_upload_from_file_dataset_%d' % (_millis(),)
- TABLE_ID = 'table_upload_from_file_table_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table_ref = dataset.table(TABLE_ID)
- table = bigquery.Table(table_ref, schema=SCHEMA)
- table = client.create_table(table)
- to_delete.insert(0, table)
-
- # [START load_table_from_file]
- csv_file = six.BytesIO(b"""full_name,age
-Phred Phlyntstone,32
-Wylma Phlyntstone,29
-""")
-
- table_ref = dataset.table(TABLE_ID)
- job_config = bigquery.LoadJobConfig()
- job_config.source_format = 'CSV'
- job_config.skip_leading_rows = 1
- job = client.load_table_from_file(
- csv_file, table_ref, job_config=job_config) # API request
- job.result() # Waits for table load to complete.
- # [END load_table_from_file]
-
- found_rows = []
-
- def do_something(row):
- found_rows.append(row)
-
- # [START table_list_rows]
- for row in client.list_rows(table): # API request
- do_something(row)
- # [END table_list_rows]
-
- assert len(found_rows) == 2
-
- # [START table_list_rows_iterator_properties]
- iterator = client.list_rows(table) # API request
- page = six.next(iterator.pages)
- rows = list(page)
- total = iterator.total_rows
- token = iterator.next_page_token
- # [END table_list_rows_iterator_properties]
-
- row_tuples = [r.values() for r in rows]
- assert len(rows) == total == 2
- assert token is None
- assert (u'Phred Phlyntstone', 32) in row_tuples
- assert (u'Wylma Phlyntstone', 29) in row_tuples
-
-
-def test_load_table_from_uri(client, to_delete):
- ROWS = [
- ('Phred Phlyntstone', 32),
- ('Bharney Rhubble', 33),
- ('Wylma Phlyntstone', 29),
- ('Bhettye Rhubble', 27),
- ]
- HEADER_ROW = ('Full Name', 'Age')
- bucket_name = 'gs_bq_load_test_%d' % (_millis(),)
- blob_name = 'person_ages.csv'
- bucket, blob = _write_csv_to_storage(
- bucket_name, blob_name, HEADER_ROW, ROWS)
- to_delete.extend((blob, bucket))
- DATASET_ID = 'delete_table_dataset_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START load_table_from_uri]
- table_ref = dataset.table('person_ages')
- table = bigquery.Table(table_ref)
- table.schema = [
- bigquery.SchemaField('full_name', 'STRING', mode='required'),
- bigquery.SchemaField('age', 'INTEGER', mode='required')
- ]
- client.create_table(table) # API request
- GS_URL = 'gs://{}/{}'.format(bucket_name, blob_name)
- job_id_prefix = "my_job"
- job_config = bigquery.LoadJobConfig()
- job_config.create_disposition = 'NEVER'
- job_config.skip_leading_rows = 1
- job_config.source_format = 'CSV'
- job_config.write_disposition = 'WRITE_EMPTY'
- load_job = client.load_table_from_uri(
- GS_URL, table_ref, job_config=job_config,
- job_id_prefix=job_id_prefix) # API request
-
- assert load_job.state == 'RUNNING'
- assert load_job.job_type == 'load'
-
- load_job.result() # Waits for table load to complete.
-
- assert load_job.state == 'DONE'
- assert load_job.job_id.startswith(job_id_prefix)
- # [END load_table_from_uri]
-
- to_delete.insert(0, table)
-
-
-def _write_csv_to_storage(bucket_name, blob_name, header_row, data_rows):
- import csv
- from google.cloud._testing import _NamedTemporaryFile
- from google.cloud.storage import Client as StorageClient
-
- storage_client = StorageClient()
-
- # In the **very** rare case the bucket name is reserved, this
- # fails with a ConnectionError.
- bucket = storage_client.create_bucket(bucket_name)
-
- blob = bucket.blob(blob_name)
-
- with _NamedTemporaryFile() as temp:
- with open(temp.name, 'w') as csv_write:
- writer = csv.writer(csv_write)
- writer.writerow(header_row)
- writer.writerows(data_rows)
-
- with open(temp.name, 'rb') as csv_read:
- blob.upload_from_file(csv_read, content_type='text/csv')
-
- return bucket, blob
-
-
-def test_copy_table(client, to_delete):
- DATASET_ID = 'copy_table_dataset_%d' % (_millis(),)
- # [START copy_table]
- source_dataset = bigquery.DatasetReference(
- 'bigquery-public-data', 'samples')
- source_table_ref = source_dataset.table('shakespeare')
-
- dest_dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- dest_dataset = client.create_dataset(dest_dataset) # API request
- dest_table_ref = dest_dataset.table('destination_table')
-
- job_config = bigquery.CopyJobConfig()
- job = client.copy_table(
- source_table_ref, dest_table_ref, job_config=job_config) # API request
- job.result() # Waits for job to complete.
-
- assert job.state == 'DONE'
- dest_table = client.get_table(dest_table_ref) # API request
- assert dest_table.table_id == 'destination_table'
- # [END copy_table]
-
- to_delete.append(dest_dataset)
- to_delete.insert(0, dest_table)
-
-
-def test_extract_table(client, to_delete):
- DATASET_ID = 'export_data_dataset_%d' % (_millis(),)
- dataset = bigquery.Dataset(client.dataset(DATASET_ID))
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table_ref = dataset.table('person_ages')
- table = client.create_table(bigquery.Table(table_ref, schema=SCHEMA))
- to_delete.insert(0, table)
- client.create_rows(table, ROWS)
-
- bucket_name = 'extract_person_ages_job_%d' % (_millis(),)
- # [START extract_table]
- from google.cloud.storage import Client as StorageClient
-
- storage_client = StorageClient()
- bucket = storage_client.create_bucket(bucket_name) # API request
- destination_blob_name = 'person_ages_out.csv'
- destination = bucket.blob(destination_blob_name)
-
- destination_uri = 'gs://{}/{}'.format(bucket_name, destination_blob_name)
- extract_job = client.extract_table(
- table_ref, destination_uri) # API request
- extract_job.result(timeout=100) # Waits for job to complete.
-
- got = destination.download_as_string().decode('utf-8') # API request
- assert 'Bharney Rhubble' in got
- # [END extract_table]
- to_delete.append(bucket)
- to_delete.insert(0, destination)
-
-
-def test_delete_table(client, to_delete):
- """Delete a table."""
- DATASET_ID = 'delete_table_dataset_%d' % (_millis(),)
- TABLE_ID = 'delete_table_table_%d' % (_millis(),)
- dataset_ref = client.dataset(DATASET_ID)
- dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
- to_delete.append(dataset)
-
- table_ref = dataset.table(TABLE_ID)
- table = bigquery.Table(table_ref, schema=SCHEMA)
- client.create_table(table)
- # [START delete_table]
- from google.cloud.exceptions import NotFound
-
- client.delete_table(table) # API request
-
- with pytest.raises(NotFound):
- client.get_table(table) # API request
- # [END delete_table]
-
-
-def test_client_query(client):
- """Run a query"""
-
- # [START client_query]
- QUERY = (
- 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
- 'WHERE state = "TX" '
- 'LIMIT 100')
- TIMEOUT = 30 # in seconds
- query_job = client.query(QUERY) # API request - starts the query
- assert query_job.state == 'RUNNING'
-
- # Waits for the query to finish
- iterator = query_job.result(timeout=TIMEOUT)
- rows = list(iterator)
-
- assert query_job.state == 'DONE'
- assert len(rows) == 100
- row = rows[0]
- assert row[0] == row.name == row['name']
- # [END client_query]
-
-
-def test_client_query_w_param(client):
- """Run a query using a query parameter"""
-
- # [START client_query_w_param]
- QUERY_W_PARAM = (
- 'SELECT name, state '
- 'FROM `bigquery-public-data.usa_names.usa_1910_2013` '
- 'WHERE state = @state '
- 'LIMIT 100')
- TIMEOUT = 30 # in seconds
- param = bigquery.ScalarQueryParameter('state', 'STRING', 'TX')
- job_config = bigquery.QueryJobConfig()
- job_config.query_parameters = [param]
- query_job = client.query(
- QUERY_W_PARAM, job_config=job_config) # API request - starts the query
- assert query_job.state == 'RUNNING'
-
- # Waits for the query to finish
- iterator = query_job.result(timeout=TIMEOUT)
- rows = list(iterator)
-
- assert query_job.state == 'DONE'
- assert len(rows) == 100
- row = rows[0]
- assert row[0] == row.name == row['name']
- assert row.state == 'TX'
- # [END client_query_w_param]
-
-
-def test_client_query_rows(client):
- """Run a simple query."""
-
- # [START client_query_rows]
- QUERY = (
- 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
- 'WHERE state = "TX" '
- 'LIMIT 100')
- TIMEOUT = 30 # in seconds
- rows = list(client.query_rows(QUERY, timeout=TIMEOUT)) # API request
-
- assert len(rows) == 100
- row = rows[0]
- assert row[0] == row.name == row['name']
- # [END client_query_rows]
-
-
-def test_client_list_jobs(client):
- """List jobs for a project."""
-
- def do_something_with(_):
- pass
-
- # [START client_list_jobs]
- job_iterator = client.list_jobs() # API request(s)
- for job in job_iterator:
- do_something_with(job)
- # [END client_list_jobs]
-
-
-if __name__ == '__main__':
- pytest.main()
diff --git a/docs/bigquery/usage.rst b/docs/bigquery/usage.rst
deleted file mode 100644
index 9a5e7f3..0000000
--- a/docs/bigquery/usage.rst
+++ /dev/null
@@ -1,266 +0,0 @@
-BigQuery
-========
-
-.. toctree::
- :maxdepth: 2
- :hidden:
-
- client
- dataset
- job
- query
- schema
- table
-
-Authentication / Configuration
-------------------------------
-
-- Use :class:`Client <google.cloud.bigquery.client.Client>` objects to configure
- your applications.
-
-- :class:`Client <google.cloud.bigquery.client.Client>` objects hold both a ``project``
- and an authenticated connection to the BigQuery service.
-
-- The authentication credentials can be implicitly determined from the
- environment or directly via
- :meth:`from_service_account_json <google.cloud.bigquery.client.Client.from_service_account_json>`
- and
- :meth:`from_service_account_p12 <google.cloud.bigquery.client.Client.from_service_account_p12>`.
-
-- After setting :envvar:`GOOGLE_APPLICATION_CREDENTIALS` and
- :envvar:`GOOGLE_CLOUD_PROJECT` environment variables, create an instance of
- :class:`Client <google.cloud.bigquery.client.Client>`.
-
- .. code-block:: python
-
- >>> from google.cloud import bigquery
- >>> client = bigquery.Client()
-
-
-Projects
---------
-
-A project is the top-level container in the ``BigQuery`` API: it is tied
-closely to billing, and can provide default access control across all its
-datasets. If no ``project`` is passed to the client container, the library
-attempts to infer a project using the environment (including explicit
-environment variables, GAE, and GCE).
-
-To override the project inferred from the environment, pass an explicit
-``project`` to the constructor, or to either of the alternative
-``classmethod`` factories:
-
-.. code-block:: python
-
- >>> from google.cloud import bigquery
- >>> client = bigquery.Client(project='PROJECT_ID')
-
-
-Project ACLs
-~~~~~~~~~~~~
-
-Each project has an access control list granting reader / writer / owner
-permission to one or more entities. This list cannot be queried or set
-via the API; it must be managed using the Google Developer Console.
-
-
-Datasets
---------
-
-A dataset represents a collection of tables, and applies several default
-policies to tables as they are created:
-
-- An access control list (ACL). When created, a dataset has an ACL
- which maps to the ACL inherited from its project.
-
-- A default table expiration period. If set, tables created within the
- dataset will have the value as their expiration period.
-
-See BigQuery documentation for more information on
-`Datasets <https://cloud.google.com/bigquery/docs/datasets>`_.
-
-
-Dataset operations
-~~~~~~~~~~~~~~~~~~
-
-List datasets for the client's project:
-
-.. literalinclude:: snippets.py
- :start-after: [START client_list_datasets]
- :end-before: [END client_list_datasets]
-
-Create a new dataset for the client's project:
-
-.. literalinclude:: snippets.py
- :start-after: [START create_dataset]
- :end-before: [END create_dataset]
-
-Refresh metadata for a dataset (to pick up changes made by another client):
-
-.. literalinclude:: snippets.py
- :start-after: [START get_dataset]
- :end-before: [END get_dataset]
-
-Update a property in a dataset's metadata:
-
-.. literalinclude:: snippets.py
- :start-after: [START update_dataset_simple]
- :end-before: [END update_dataset_simple]
-
-Update multiple properties in a dataset's metadata:
-
-.. literalinclude:: snippets.py
- :start-after: [START update_dataset_multiple_properties]
- :end-before: [END update_dataset_multiple_properties]
-
-Delete a dataset:
-
-.. literalinclude:: snippets.py
- :start-after: [START delete_dataset]
- :end-before: [END delete_dataset]
-
-
-Tables
-------
-
-Tables exist within datasets. See BigQuery documentation for more information
-on `Tables <https://cloud.google.com/bigquery/docs/tables>`_.
-
-Table operations
-~~~~~~~~~~~~~~~~~~
-List tables for the dataset:
-
-.. literalinclude:: snippets.py
- :start-after: [START list_dataset_tables]
- :end-before: [END list_dataset_tables]
-
-Create a table:
-
-.. literalinclude:: snippets.py
- :start-after: [START create_table]
- :end-before: [END create_table]
-
-Get a table:
-
-.. literalinclude:: snippets.py
- :start-after: [START get_table]
- :end-before: [END get_table]
-
-Update a property in a table's metadata:
-
-.. literalinclude:: snippets.py
- :start-after: [START update_table_simple]
- :end-before: [END update_table_simple]
-
-Update multiple properties in a table's metadata:
-
-.. literalinclude:: snippets.py
- :start-after: [START update_table_multiple_properties]
- :end-before: [END update_table_multiple_properties]
-
-Get rows from a table's data:
-
-.. literalinclude:: snippets.py
- :start-after: [START table_list_rows]
- :end-before: [END table_list_rows]
-
-Utilize iterator properties returned with row data:
-
-.. literalinclude:: snippets.py
- :start-after: [START table_list_rows_iterator_properties]
- :end-before: [END table_list_rows_iterator_properties]
-
-Insert rows into a table's data:
-
-.. literalinclude:: snippets.py
- :start-after: [START table_create_rows]
- :end-before: [END table_create_rows]
-
-Upload table data from a file:
-
-.. literalinclude:: snippets.py
- :start-after: [START load_table_from_file]
- :end-before: [END load_table_from_file]
-
-Load table data from Google Cloud Storage:
-
-.. literalinclude:: snippets.py
- :start-after: [START load_table_from_uri]
- :end-before: [END load_table_from_uri]
-
-Copy a table:
-
-.. literalinclude:: snippets.py
- :start-after: [START copy_table]
- :end-before: [END copy_table]
-
-Extract a table to Google Cloud Storage:
-
-.. literalinclude:: snippets.py
- :start-after: [START extract_table]
- :end-before: [END extract_table]
-
-Delete a table:
-
-.. literalinclude:: snippets.py
- :start-after: [START delete_table]
- :end-before: [END delete_table]
-
-
-Queries
--------
-
-Querying data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. literalinclude:: snippets.py
- :start-after: [START client_query]
- :end-before: [END client_query]
-
-.. note::
-
- - Use of the ``timeout`` parameter is optional. The query will continue to
- run in the background even if it takes longer the timeout allowed.
-
-
-Run a query using a named query parameter
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-See BigQuery documentation for more information on
-`parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.
-
-.. literalinclude:: snippets.py
- :start-after: [START client_query_w_param]
- :end-before: [END client_query_w_param]
-
-
-Querying Table Rows
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Run a query and wait for it to finish:
-
-.. literalinclude:: snippets.py
- :start-after: [START client_query_rows]
- :end-before: [END client_query_rows]
-
-.. note::
-
- - Use of the ``timeout`` parameter is optional. The query will continue to
- run in the background even if it takes longer the timeout allowed. The job
- may be retrieved using the job ID via
- :meth:`~google.cloud.bigquery.client.Client.get_job`
-
-
-List jobs for a project
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Jobs describe actions performed on data in BigQuery tables:
-
-- Load data into a table
-- Run a query against data in one or more tables
-- Extract data from a table
-- Copy a table
-
-.. literalinclude:: snippets.py
- :start-after: [START client_list_jobs]
- :end-before: [END client_list_jobs]