Blame - tools/run_tests/performance/bq_upload_result.py - platform/external/grpc-grpc

blob: 89d2a9b320fde3b83914593b5e025b1ca3be3a34 [file] [log] [blame]

Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	1	#!/usr/bin/env python2.7
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	2	# Copyright 2016, Google Inc.
				3	# All rights reserved.
				4	#
				5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
				8	#
				9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
				18	#
				19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	31	# Uploads performance benchmark result file to bigquery.
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	32
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	33	import argparse
Jan Tattermusch	4843b51	2016-04-15 13:43:39 -0700	[diff] [blame]	34	import calendar
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	35	import json
				36	import os
				37	import sys
Jan Tattermusch	4843b51	2016-04-15 13:43:39 -0700	[diff] [blame]	38	import time
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	39	import uuid
				40
				41
				42	gcp_utils_dir = os.path.abspath(os.path.join(
				43	os.path.dirname(__file__), '../../gcp/utils'))
				44	sys.path.append(gcp_utils_dir)
				45	import big_query_utils
				46
				47
				48	_PROJECT_ID='grpc-testing'
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	49
				50
Jan Tattermusch	4de2c32	2016-05-10 14:33:07 -0700	[diff] [blame]	51	def _upload_netperf_latency_csv_to_bigquery(dataset_id, table_id, result_file):
				52	with open(result_file, 'r') as f:
				53	(col1, col2, col3) = f.read().split(',')
				54	latency50 = float(col1.strip()) * 1000
				55	latency90 = float(col2.strip()) * 1000
				56	latency99 = float(col3.strip()) * 1000
				57
				58	scenario_result = {
				59	'scenario': {
				60	'name': 'netperf_tcp_rr'
				61	},
				62	'summary': {
				63	'latency50': latency50,
				64	'latency90': latency90,
				65	'latency99': latency99
				66	}
				67	}
				68
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	69	bq = big_query_utils.create_big_query()
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	70	_create_results_table(bq, dataset_id, table_id)
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	71
Jan Tattermusch	4de2c32	2016-05-10 14:33:07 -0700	[diff] [blame]	72	if not _insert_result(bq, dataset_id, table_id, scenario_result, flatten=False):
				73	print 'Error uploading result to bigquery.'
				74	sys.exit(1)
				75
				76
				77	def _upload_scenario_result_to_bigquery(dataset_id, table_id, result_file):
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	78	with open(result_file, 'r') as f:
				79	scenario_result = json.loads(f.read())
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	80
Jan Tattermusch	4de2c32	2016-05-10 14:33:07 -0700	[diff] [blame]	81	bq = big_query_utils.create_big_query()
				82	_create_results_table(bq, dataset_id, table_id)
				83
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	84	if not _insert_result(bq, dataset_id, table_id, scenario_result):
				85	print 'Error uploading result to bigquery.'
				86	sys.exit(1)
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	87
				88
Jan Tattermusch	4de2c32	2016-05-10 14:33:07 -0700	[diff] [blame]	89	def _insert_result(bq, dataset_id, table_id, scenario_result, flatten=True):
				90	if flatten:
				91	_flatten_result_inplace(scenario_result)
Jan Tattermusch	4843b51	2016-04-15 13:43:39 -0700	[diff] [blame]	92	_populate_metadata_inplace(scenario_result)
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	93	row = big_query_utils.make_row(str(uuid.uuid4()), scenario_result)
				94	return big_query_utils.insert_rows(bq,
				95	_PROJECT_ID,
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	96	dataset_id,
				97	table_id,
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	98	[row])
				99
				100
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	101	def _create_results_table(bq, dataset_id, table_id):
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	102	with open(os.path.dirname(__file__) + '/scenario_result_schema.json', 'r') as f:
				103	table_schema = json.loads(f.read())
				104	desc = 'Results of performance benchmarks.'
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	105	return big_query_utils.create_table2(bq, _PROJECT_ID, dataset_id,
				106	table_id, table_schema, desc)
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	107
				108
				109	def _flatten_result_inplace(scenario_result):
				110	"""Bigquery is not really great for handling deeply nested data
				111	and repeated fields. To maintain values of some fields while keeping
				112	the schema relatively simple, we artificially leave some of the fields
				113	as JSON strings.
				114	"""
				115	scenario_result['scenario']['clientConfig'] = json.dumps(scenario_result['scenario']['clientConfig'])
				116	scenario_result['scenario']['serverConfig'] = json.dumps(scenario_result['scenario']['serverConfig'])
				117	scenario_result['latencies'] = json.dumps(scenario_result['latencies'])
Yuxuan Li	ac87a46	2016-11-11 12:05:11 -0800	[diff] [blame]	118	scenario_result['serverCpuStats'] = []
Yuxuan Li	d885a27	2016-11-09 15:46:06 -0800	[diff] [blame]	119	for stats in scenario_result['serverStats']:
Yuxuan Li	ac87a46	2016-11-11 12:05:11 -0800	[diff] [blame]	120	scenario_result['serverCpuStats'].append(dict())
				121	scenario_result['serverCpuStats'][-1]['totalCpuTime'] = stats.pop('totalCpuTime', None)
				122	scenario_result['serverCpuStats'][-1]['idleCpuTime'] = stats.pop('idleCpuTime', None)
Jan Tattermusch	efd9803	2016-04-14 16:29:24 -0700	[diff] [blame]	123	for stats in scenario_result['clientStats']:
				124	stats['latencies'] = json.dumps(stats['latencies'])
Craig Tiller	ed531b8	2016-11-01 10:27:18 -0700	[diff] [blame]	125	stats.pop('requestResults', None)
Jan Tattermusch	88cc4e2	2016-04-14 16:58:50 -0700	[diff] [blame]	126	scenario_result['serverCores'] = json.dumps(scenario_result['serverCores'])
Sree Kuchibhotla	6dbfce0	2016-07-15 11:05:24 -0700	[diff] [blame]	127	scenario_result['clientSuccess'] = json.dumps(scenario_result['clientSuccess'])
				128	scenario_result['serverSuccess'] = json.dumps(scenario_result['serverSuccess'])
Craig Tiller	77fbc1c	2016-10-31 14:04:03 -0700	[diff] [blame]	129	scenario_result['requestResults'] = json.dumps(scenario_result.get('requestResults', []))
Yuxuan Li	ac87a46	2016-11-11 12:05:11 -0800	[diff] [blame]	130	scenario_result['serverCpuUsage'] = scenario_result['summary'].pop('serverCpuUsage', None)
Craig Tiller	c939022	2016-11-01 15:47:24 -0700	[diff] [blame]	131	scenario_result['summary'].pop('successfulRequestsPerSecond', None)
				132	scenario_result['summary'].pop('failedRequestsPerSecond', None)
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	133
Yuxuan Li	317f60b	2016-11-09 15:08:26 -0800	[diff] [blame]	134
Jan Tattermusch	4843b51	2016-04-15 13:43:39 -0700	[diff] [blame]	135	def _populate_metadata_inplace(scenario_result):
				136	"""Populates metadata based on environment variables set by Jenkins."""
				137	# NOTE: Grabbing the Jenkins environment variables will only work if the
				138	# driver is running locally on the same machine where Jenkins has started
				139	# the job. For our setup, this is currently the case, so just assume that.
				140	build_number = os.getenv('BUILD_NUMBER')
				141	build_url = os.getenv('BUILD_URL')
				142	job_name = os.getenv('JOB_NAME')
				143	git_commit = os.getenv('GIT_COMMIT')
				144	# actual commit is the actual head of PR that is getting tested
				145	git_actual_commit = os.getenv('ghprbActualCommit')
				146
				147	utc_timestamp = str(calendar.timegm(time.gmtime()))
				148	metadata = {'created': utc_timestamp}
				149
				150	if build_number:
				151	metadata['buildNumber'] = build_number
				152	if build_url:
				153	metadata['buildUrl'] = build_url
				154	if job_name:
				155	metadata['jobName'] = job_name
				156	if git_commit:
				157	metadata['gitCommit'] = git_commit
				158	if git_actual_commit:
				159	metadata['gitActualCommit'] = git_actual_commit
				160
				161	scenario_result['metadata'] = metadata
				162
				163
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	164	argp = argparse.ArgumentParser(description='Upload result to big query.')
				165	argp.add_argument('--bq_result_table', required=True, default=None, type=str,
				166	help='Bigquery "dataset.table" to upload results to.')
				167	argp.add_argument('--file_to_upload', default='scenario_result.json', type=str,
				168	help='Report file to upload.')
Jan Tattermusch	4de2c32	2016-05-10 14:33:07 -0700	[diff] [blame]	169	argp.add_argument('--file_format',
				170	choices=['scenario_result','netperf_latency_csv'],
				171	default='scenario_result',
				172	help='Format of the file to upload.')
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	173
				174	args = argp.parse_args()
				175
				176	dataset_id, table_id = args.bq_result_table.split('.', 2)
Jan Tattermusch	4de2c32	2016-05-10 14:33:07 -0700	[diff] [blame]	177
				178	if args.file_format == 'netperf_latency_csv':
				179	_upload_netperf_latency_csv_to_bigquery(dataset_id, table_id, args.file_to_upload)
				180	else:
				181	_upload_scenario_result_to_bigquery(dataset_id, table_id, args.file_to_upload)
Jan Tattermusch	6d7fa55	2016-04-14 17:42:54 -0700	[diff] [blame]	182	print 'Successfully uploaded %s to BigQuery.\n' % args.file_to_upload