Blame - tools/gcp/utils/big_query_utils.py - platform/external/grpc-grpc

blob: 77a5f5691e9ea1155083d41cde84fc75e62189c0 [file] [log] [blame]

Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	1	#!/usr/bin/env python2.7
Jan Tattermusch	7897ae9	2017-06-07 22:57:36 +0200	[diff] [blame]	2	# Copyright 2015 gRPC authors.
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	3	#
Jan Tattermusch	7897ae9	2017-06-07 22:57:36 +0200	[diff] [blame]	4	# Licensed under the Apache License, Version 2.0 (the "License");
				5	# you may not use this file except in compliance with the License.
				6	# You may obtain a copy of the License at
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	7	#
Jan Tattermusch	7897ae9	2017-06-07 22:57:36 +0200	[diff] [blame]	8	# http://www.apache.org/licenses/LICENSE-2.0
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	9	#
Jan Tattermusch	7897ae9	2017-06-07 22:57:36 +0200	[diff] [blame]	10	# Unless required by applicable law or agreed to in writing, software
				11	# distributed under the License is distributed on an "AS IS" BASIS,
				12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	# See the License for the specific language governing permissions and
				14	# limitations under the License.
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	15
				16	import argparse
				17	import json
				18	import uuid
				19	import httplib2
				20
				21	from apiclient import discovery
				22	from apiclient.errors import HttpError
				23	from oauth2client.client import GoogleCredentials
				24
Matt Kwong	d14c0ea	2017-05-19 14:25:01 -0700	[diff] [blame]	25	# 30 days in milliseconds
				26	_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	27	NUM_RETRIES = 3
				28
				29
				30	def create_big_query():
				31	"""Authenticates with cloud platform and gets a BiqQuery service object
				32	"""
				33	creds = GoogleCredentials.get_application_default()
Matt Kwong	8961a10	2017-06-16 12:29:59 -0700	[diff] [blame]	34	return discovery.build('bigquery', 'v2', credentials=creds, cache_discovery=False)
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	35
				36
				37	def create_dataset(biq_query, project_id, dataset_id):
				38	is_success = True
				39	body = {
				40	'datasetReference': {
				41	'projectId': project_id,
				42	'datasetId': dataset_id
				43	}
				44	}
				45
				46	try:
				47	dataset_req = biq_query.datasets().insert(projectId=project_id, body=body)
				48	dataset_req.execute(num_retries=NUM_RETRIES)
				49	except HttpError as http_error:
				50	if http_error.resp.status == 409:
				51	print 'Warning: The dataset %s already exists' % dataset_id
				52	else:
				53	# Note: For more debugging info, print "http_error.content"
				54	print 'Error in creating dataset: %s. Err: %s' % (dataset_id, http_error)
				55	is_success = False
				56	return is_success
				57
				58
				59	def create_table(big_query, project_id, dataset_id, table_id, table_schema,
				60	description):
Jan Tattermusch	7d54db8	2016-04-14 16:57:45 -0700	[diff] [blame]	61	fields = [{'name': field_name,
				62	'type': field_type,
				63	'description': field_description
				64	} for (field_name, field_type, field_description) in table_schema]
				65	return create_table2(big_query, project_id, dataset_id, table_id,
				66	fields, description)
				67
				68
Matt Kwong	d14c0ea	2017-05-19 14:25:01 -0700	[diff] [blame]	69	def create_partitioned_table(big_query, project_id, dataset_id, table_id, table_schema,
				70	description, partition_type='DAY', expiration_ms=_EXPIRATION_MS):
				71	"""Creates a partitioned table. By default, a date-paritioned table is created with
				72	each partition lasting 30 days after it was last modified.
				73	"""
				74	fields = [{'name': field_name,
				75	'type': field_type,
				76	'description': field_description
				77	} for (field_name, field_type, field_description) in table_schema]
				78	return create_table2(big_query, project_id, dataset_id, table_id,
				79	fields, description, partition_type, expiration_ms)
				80
				81
Jan Tattermusch	7d54db8	2016-04-14 16:57:45 -0700	[diff] [blame]	82	def create_table2(big_query, project_id, dataset_id, table_id, fields_schema,
Matt Kwong	d14c0ea	2017-05-19 14:25:01 -0700	[diff] [blame]	83	description, partition_type=None, expiration_ms=None):
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	84	is_success = True
				85
				86	body = {
				87	'description': description,
				88	'schema': {
Jan Tattermusch	7d54db8	2016-04-14 16:57:45 -0700	[diff] [blame]	89	'fields': fields_schema
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	90	},
				91	'tableReference': {
				92	'datasetId': dataset_id,
				93	'projectId': project_id,
				94	'tableId': table_id
				95	}
				96	}
				97
Matt Kwong	d14c0ea	2017-05-19 14:25:01 -0700	[diff] [blame]	98	if partition_type and expiration_ms:
				99	body["timePartitioning"] = {
				100	"type": partition_type,
				101	"expirationMs": expiration_ms
				102	}
				103
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	104	try:
				105	table_req = big_query.tables().insert(projectId=project_id,
				106	datasetId=dataset_id,
				107	body=body)
				108	res = table_req.execute(num_retries=NUM_RETRIES)
				109	print 'Successfully created %s "%s"' % (res['kind'], res['id'])
				110	except HttpError as http_error:
				111	if http_error.resp.status == 409:
				112	print 'Warning: Table %s already exists' % table_id
				113	else:
				114	print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
				115	is_success = False
				116	return is_success
				117
				118
Jan Tattermusch	58ca8be	2017-06-27 13:45:46 +0200	[diff] [blame]	119	def patch_table(big_query, project_id, dataset_id, table_id, fields_schema):
				120	is_success = True
				121
				122	body = {
				123	'schema': {
				124	'fields': fields_schema
				125	},
				126	'tableReference': {
				127	'datasetId': dataset_id,
				128	'projectId': project_id,
				129	'tableId': table_id
				130	}
				131	}
				132
				133	try:
				134	table_req = big_query.tables().patch(projectId=project_id,
				135	datasetId=dataset_id,
				136	tableId=table_id,
				137	body=body)
				138	res = table_req.execute(num_retries=NUM_RETRIES)
				139	print 'Successfully patched %s "%s"' % (res['kind'], res['id'])
				140	except HttpError as http_error:
				141	print 'Error in creating table: %s. Err: %s' % (table_id, http_error)
				142	is_success = False
				143	return is_success
				144
				145
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	146	def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
				147	is_success = True
				148	body = {'rows': rows_list}
				149	try:
				150	insert_req = big_query.tabledata().insertAll(projectId=project_id,
				151	datasetId=dataset_id,
				152	tableId=table_id,
				153	body=body)
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	154	res = insert_req.execute(num_retries=NUM_RETRIES)
Jan Tattermusch	ac4251a	2016-04-15 14:44:59 -0700	[diff] [blame]	155	if res.get('insertErrors', None):
				156	print 'Error inserting rows! Response: %s' % res
				157	is_success = False
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	158	except HttpError as http_error:
Jan Tattermusch	ac4251a	2016-04-15 14:44:59 -0700	[diff] [blame]	159	print 'Error inserting rows to the table %s' % table_id
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	160	is_success = False
Jan Tattermusch	ac4251a	2016-04-15 14:44:59 -0700	[diff] [blame]	161
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	162	return is_success
				163
				164
				165	def sync_query_job(big_query, project_id, query, timeout=5000):
				166	query_data = {'query': query, 'timeoutMs': timeout}
				167	query_job = None
				168	try:
				169	query_job = big_query.jobs().query(
				170	projectId=project_id,
				171	body=query_data).execute(num_retries=NUM_RETRIES)
				172	except HttpError as http_error:
				173	print 'Query execute job failed with error: %s' % http_error
				174	print http_error.content
				175	return query_job
				176
				177	# List of (column name, column type, description) tuples
				178	def make_row(unique_row_id, row_values_dict):
Sree Kuchibhotla	2715a39	2016-02-24 12:01:52 -0800	[diff] [blame]	179	"""row_values_dict is a dictionary of column name and column value.
Sree Kuchibhotla	559e45b	2016-02-19 03:02:16 -0800	[diff] [blame]	180	"""
				181	return {'insertId': unique_row_id, 'json': row_values_dict}