blob: 7947b9e4b6db964d967cb541fad82c535f845611 [file] [log] [blame]
Dan Shi4df39252013-03-19 13:19:45 -07001# pylint: disable-msg=C0111
2
Chris Masone859fdec2012-01-30 08:38:09 -08003# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7__author__ = 'cmasone@chromium.org (Chris Masone)'
8
Michael Tang9afc74b2016-03-21 10:19:23 -07009# The boto module is only available/used in Moblab for validation of cloud
10# storage access. The module is not available in the test lab environment,
11# and the import error is handled.
12try:
13 import boto
14except ImportError:
15 boto = None
Chris Masone859fdec2012-01-30 08:38:09 -080016import common
Simran Basi773a86e2015-05-13 19:15:42 -070017import ConfigParser
Chris Masonea8066a92012-05-01 16:52:31 -070018import datetime
Chris Masone859fdec2012-01-30 08:38:09 -080019import logging
Simran Basi71206ef2014-08-13 13:51:18 -070020import os
Michael Tang9afc74b2016-03-21 10:19:23 -070021import re
Simran Basi71206ef2014-08-13 13:51:18 -070022import shutil
Michael Tang9afc74b2016-03-21 10:19:23 -070023import socket
Aviv Keshetd83ef442013-01-16 16:19:35 -080024
Jakob Juelich82b7d1c2014-09-15 16:10:57 -070025from autotest_lib.frontend.afe import models
Matthew Sartorid96fb9b2015-05-19 18:04:58 -070026from autotest_lib.client.common_lib import control_data
Aviv Keshetd83ef442013-01-16 16:19:35 -080027from autotest_lib.client.common_lib import error
Simran Basi71206ef2014-08-13 13:51:18 -070028from autotest_lib.client.common_lib import global_config
Alex Miller7d658cf2013-09-04 16:00:35 -070029from autotest_lib.client.common_lib import priorities
Dan Shidfea3682014-08-10 23:38:40 -070030from autotest_lib.client.common_lib import time_utils
Chris Masone859fdec2012-01-30 08:38:09 -080031from autotest_lib.client.common_lib.cros import dev_server
Gabe Black1e1c41b2015-02-04 23:55:15 -080032from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Jakob Juelich9fffe4f2014-08-14 18:07:05 -070033from autotest_lib.frontend.afe import rpc_utils
Simran Basib6ec8ae2014-04-23 12:05:08 -070034from autotest_lib.server import utils
Dan Shi36cfd832014-10-10 13:38:51 -070035from autotest_lib.server.cros import provision
Chris Masone44e4d6c2012-08-15 14:25:53 -070036from autotest_lib.server.cros.dynamic_suite import constants
Chris Masoneb4935552012-08-14 12:05:54 -070037from autotest_lib.server.cros.dynamic_suite import control_file_getter
Chris Masone44e4d6c2012-08-15 14:25:53 -070038from autotest_lib.server.cros.dynamic_suite import tools
Dan Shi36cfd832014-10-10 13:38:51 -070039from autotest_lib.server.cros.dynamic_suite.suite import Suite
Simran Basi71206ef2014-08-13 13:51:18 -070040from autotest_lib.server.hosts import moblab_host
Dan Shidfea3682014-08-10 23:38:40 -070041from autotest_lib.site_utils import host_history
Dan Shi193905e2014-07-25 23:33:09 -070042from autotest_lib.site_utils import job_history
Dan Shid7bb4f12015-01-06 10:53:50 -080043from autotest_lib.site_utils import server_manager_utils
Dan Shi6964fa52014-12-18 11:04:27 -080044from autotest_lib.site_utils import stable_version_utils
Simran Basi71206ef2014-08-13 13:51:18 -070045
46
47_CONFIG = global_config.global_config
48MOBLAB_BOTO_LOCATION = '/home/moblab/.boto'
Chris Masone859fdec2012-01-30 08:38:09 -080049
Michael Tang9afc74b2016-03-21 10:19:23 -070050# Google Cloud Storage bucket url regex pattern. The pattern is used to extract
51# the bucket name from the bucket URL. For example, "gs://image_bucket/google"
52# should result in a bucket name "image_bucket".
53GOOGLE_STORAGE_BUCKET_URL_PATTERN = re.compile(
54 r'gs://(?P<bucket>[a-zA-Z][a-zA-Z0-9-_]*)/?.*')
55
56# Constants used in JSON RPC field names.
57_USE_EXISTING_BOTO_FILE = 'use_existing_boto_file'
58_GS_ACCESS_KEY_ID = 'gs_access_key_id'
59_GS_SECRETE_ACCESS_KEY = 'gs_secret_access_key'
60_IMAGE_STORAGE_SERVER = 'image_storage_server'
61_RESULT_STORAGE_SERVER = 'results_storage_server'
62
63
Chris Masonef8b53062012-05-08 22:14:18 -070064# Relevant CrosDynamicSuiteExceptions are defined in client/common_lib/error.py.
Chris Masone859fdec2012-01-30 08:38:09 -080065
66
Chris Masone62579122012-03-08 15:18:43 -080067def canonicalize_suite_name(suite_name):
Dan Shi70647ca2015-07-16 22:52:35 -070068 # Do not change this naming convention without updating
69 # site_utils.parse_job_name.
Chris Masone62579122012-03-08 15:18:43 -080070 return 'test_suites/control.%s' % suite_name
71
72
Chris Masoneaa10f8e2012-05-15 13:34:21 -070073def formatted_now():
Dan Shidfea3682014-08-10 23:38:40 -070074 return datetime.datetime.now().strftime(time_utils.TIME_FMT)
Chris Masoneaa10f8e2012-05-15 13:34:21 -070075
76
Simran Basib6ec8ae2014-04-23 12:05:08 -070077def _get_control_file_contents_by_name(build, ds, suite_name):
Chris Masone8dd27e02012-06-25 15:59:43 -070078 """Return control file contents for |suite_name|.
79
80 Query the dev server at |ds| for the control file |suite_name|, included
81 in |build| for |board|.
82
83 @param build: unique name by which to refer to the image from now on.
Chris Masone8dd27e02012-06-25 15:59:43 -070084 @param ds: a dev_server.DevServer instance to fetch control file with.
85 @param suite_name: canonicalized suite name, e.g. test_suites/control.bvt.
86 @raises ControlFileNotFound if a unique suite control file doesn't exist.
87 @raises NoControlFileList if we can't list the control files at all.
88 @raises ControlFileEmpty if the control file exists on the server, but
89 can't be read.
90
91 @return the contents of the desired control file.
92 """
93 getter = control_file_getter.DevServerGetter.create(build, ds)
Gabe Black1e1c41b2015-02-04 23:55:15 -080094 timer = autotest_stats.Timer('control_files.parse.%s.%s' %
95 (ds.get_server_name(ds.url()
96 ).replace('.', '_'),
97 suite_name.rsplit('.')[-1]))
Chris Masone8dd27e02012-06-25 15:59:43 -070098 # Get the control file for the suite.
99 try:
Prashanth Balasubramanianabe3bb72014-11-20 12:00:37 -0800100 with timer:
101 control_file_in = getter.get_control_file_contents_by_name(
102 suite_name)
Chris Masone8dd27e02012-06-25 15:59:43 -0700103 except error.CrosDynamicSuiteException as e:
Simran Basib6ec8ae2014-04-23 12:05:08 -0700104 raise type(e)("%s while testing %s." % (e, build))
Chris Masone8dd27e02012-06-25 15:59:43 -0700105 if not control_file_in:
106 raise error.ControlFileEmpty(
107 "Fetching %s returned no data." % suite_name)
Alex Millera713e252013-03-01 10:45:44 -0800108 # Force control files to only contain ascii characters.
109 try:
110 control_file_in.encode('ascii')
111 except UnicodeDecodeError as e:
112 raise error.ControlFileMalformed(str(e))
113
Chris Masone8dd27e02012-06-25 15:59:43 -0700114 return control_file_in
115
116
Dan Shi5e8fa182016-04-15 11:04:36 -0700117def _stage_build_artifacts(build, hostname=None):
Simran Basib6ec8ae2014-04-23 12:05:08 -0700118 """
119 Ensure components of |build| necessary for installing images are staged.
120
121 @param build image we want to stage.
Dan Shi5e8fa182016-04-15 11:04:36 -0700122 @param hostname hostname of a dut may run test on. This is to help to locate
123 a devserver closer to duts if needed. Default is None.
Simran Basib6ec8ae2014-04-23 12:05:08 -0700124
Prashanth B6285f6a2014-05-08 18:01:27 -0700125 @raises StageControlFileFailure: if the dev server throws 500 while staging
126 suite control files.
Simran Basib6ec8ae2014-04-23 12:05:08 -0700127
128 @return: dev_server.ImageServer instance to use with this build.
129 @return: timings dictionary containing staging start/end times.
130 """
131 timings = {}
Prashanth B6285f6a2014-05-08 18:01:27 -0700132 # Ensure components of |build| necessary for installing images are staged
133 # on the dev server. However set synchronous to False to allow other
134 # components to be downloaded in the background.
Dan Shi5e8fa182016-04-15 11:04:36 -0700135 ds = dev_server.resolve(build, hostname=hostname)
Simran Basib6ec8ae2014-04-23 12:05:08 -0700136 timings[constants.DOWNLOAD_STARTED_TIME] = formatted_now()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800137 timer = autotest_stats.Timer('control_files.stage.%s' % (
138 ds.get_server_name(ds.url()).replace('.', '_')))
Simran Basib6ec8ae2014-04-23 12:05:08 -0700139 try:
Prashanth Balasubramanianabe3bb72014-11-20 12:00:37 -0800140 with timer:
Dan Shi6450e142016-03-11 11:52:20 -0800141 ds.stage_artifacts(image=build, artifacts=['test_suites'])
Simran Basib6ec8ae2014-04-23 12:05:08 -0700142 except dev_server.DevServerException as e:
Prashanth B6285f6a2014-05-08 18:01:27 -0700143 raise error.StageControlFileFailure(
Simran Basib6ec8ae2014-04-23 12:05:08 -0700144 "Failed to stage %s: %s" % (build, e))
145 timings[constants.PAYLOAD_FINISHED_TIME] = formatted_now()
146 return (ds, timings)
147
148
MK Ryue301eb72015-06-25 12:51:02 -0700149@rpc_utils.route_rpc_to_master
Dan Shi5984d782016-04-05 18:43:51 -0700150def create_suite_job(name='', board='', pool='', control_file='',
Simran Basib6ec8ae2014-04-23 12:05:08 -0700151 check_hosts=True, num=None, file_bugs=False, timeout=24,
152 timeout_mins=None, priority=priorities.Priority.DEFAULT,
Fang Deng058860c2014-05-15 15:41:50 -0700153 suite_args=None, wait_for_results=True, job_retry=False,
Fang Deng443f1952015-01-02 14:51:49 -0800154 max_retries=None, max_runtime_mins=None, suite_min_duts=0,
Dan Shi36cfd832014-10-10 13:38:51 -0700155 offload_failures_only=False, builds={},
Dan Shi059261a2016-02-22 12:06:37 -0800156 test_source_build=None, run_prod_code=False,
157 delay_minutes=0, **kwargs):
Chris Masone859fdec2012-01-30 08:38:09 -0800158 """
159 Create a job to run a test suite on the given device with the given image.
160
161 When the timeout specified in the control file is reached, the
162 job is guaranteed to have completed and results will be available.
163
Simran Basib6ec8ae2014-04-23 12:05:08 -0700164 @param name: The test name if control_file is supplied, otherwise the name
165 of the test suite to run, e.g. 'bvt'.
Chris Masone859fdec2012-01-30 08:38:09 -0800166 @param board: the kind of device to run the tests on.
Dan Shi36cfd832014-10-10 13:38:51 -0700167 @param builds: the builds to install e.g.
168 {'cros-version:': 'x86-alex-release/R18-1655.0.0',
Dan Shi5984d782016-04-05 18:43:51 -0700169 'fwrw-version:': 'x86-alex-firmware/R36-5771.50.0',
Dan Shi36cfd832014-10-10 13:38:51 -0700170 'fwro-version:': 'x86-alex-firmware/R36-5771.49.0'}
171 If builds is given a value, it overrides argument build.
172 @param test_source_build: Build that contains the server-side test code.
Scott Zawalski65650172012-02-16 11:48:26 -0500173 @param pool: Specify the pool of machines to use for scheduling
174 purposes.
Chris Masone62579122012-03-08 15:18:43 -0800175 @param check_hosts: require appropriate live hosts to exist in the lab.
Aviv Keshetd83ef442013-01-16 16:19:35 -0800176 @param num: Specify the number of machines to schedule across (integer).
177 Leave unspecified or use None to use default sharding factor.
Alex Millerc577f3e2012-09-27 14:06:07 -0700178 @param file_bugs: File a bug on each test failure in this suite.
Alex Miller139690b2013-09-07 15:35:49 -0700179 @param timeout: The max lifetime of this suite, in hours.
Simran Basi7e605742013-11-12 13:43:36 -0800180 @param timeout_mins: The max lifetime of this suite, in minutes. Takes
181 priority over timeout.
Alex Miller139690b2013-09-07 15:35:49 -0700182 @param priority: Integer denoting priority. Higher is more important.
Aviv Keshet7cd12312013-07-25 10:25:55 -0700183 @param suite_args: Optional arguments which will be parsed by the suite
184 control file. Used by control.test_that_wrapper to
185 determine which tests to run.
Dan Shi95122412013-11-12 16:20:33 -0800186 @param wait_for_results: Set to False to run the suite job without waiting
187 for test jobs to finish. Default is True.
Fang Deng058860c2014-05-15 15:41:50 -0700188 @param job_retry: Set to True to enable job-level retry. Default is False.
Fang Deng443f1952015-01-02 14:51:49 -0800189 @param max_retries: Integer, maximum job retries allowed at suite level.
190 None for no max.
Simran Basi102e3522014-09-11 11:46:10 -0700191 @param max_runtime_mins: Maximum amount of time a job can be running in
192 minutes.
Fang Dengcbc01212014-11-25 16:09:46 -0800193 @param suite_min_duts: Integer. Scheduler will prioritize getting the
194 minimum number of machines for the suite when it is
195 competing with another suite that has a higher
196 priority but already got minimum machines it needs.
Simran Basi1e10e922015-04-16 15:09:56 -0700197 @param offload_failures_only: Only enable gs_offloading for failed jobs.
Simran Basi5ace6f22016-01-06 17:30:44 -0800198 @param run_prod_code: If True, the suite will run the test code that
199 lives in prod aka the test code currently on the
200 lab servers. If False, the control files and test
201 code for this suite run will be retrieved from the
202 build artifacts.
Dan Shi059261a2016-02-22 12:06:37 -0800203 @param delay_minutes: Delay the creation of test jobs for a given number of
204 minutes.
Simran Basib6ec8ae2014-04-23 12:05:08 -0700205 @param kwargs: extra keyword args. NOT USED.
Chris Masone859fdec2012-01-30 08:38:09 -0800206
Chris Masone8dd27e02012-06-25 15:59:43 -0700207 @raises ControlFileNotFound: if a unique suite control file doesn't exist.
208 @raises NoControlFileList: if we can't list the control files at all.
Prashanth B6285f6a2014-05-08 18:01:27 -0700209 @raises StageControlFileFailure: If the dev server throws 500 while
210 staging test_suites.
Chris Masone8dd27e02012-06-25 15:59:43 -0700211 @raises ControlFileEmpty: if the control file exists on the server, but
212 can't be read.
Chris Masone859fdec2012-01-30 08:38:09 -0800213
214 @return: the job ID of the suite; -1 on error.
215 """
Aviv Keshetd83ef442013-01-16 16:19:35 -0800216 if type(num) is not int and num is not None:
Chris Sosa18c70b32013-02-15 14:12:43 -0800217 raise error.SuiteArgumentException('Ill specified num argument %r. '
218 'Must be an integer or None.' % num)
Aviv Keshetd83ef442013-01-16 16:19:35 -0800219 if num == 0:
220 logging.warning("Can't run on 0 hosts; using default.")
221 num = None
Dan Shi36cfd832014-10-10 13:38:51 -0700222
Dan Shi2121a332016-02-25 14:22:22 -0800223 # Default test source build to CrOS build if it's not specified and
224 # run_prod_code is set to False.
225 if not run_prod_code:
226 test_source_build = Suite.get_test_source_build(
227 builds, test_source_build=test_source_build)
Dan Shi36cfd832014-10-10 13:38:51 -0700228
Dan Shi5e8fa182016-04-15 11:04:36 -0700229 # If 'prefer_local_devserver' is True in global setting, and both board
230 # and pool are specified, pick a dut in the given board and pool, and
231 # use that to help to pick a devserver in the same subnet of the duts
232 # to be used to run tests.
233 if dev_server.PREFER_LOCAL_DEVSERVER and pool and board:
234 sample_dut = rpc_utils.get_sample_dut(board, pool)
235 else:
236 sample_dut = None
237
Simran Basi5ace6f22016-01-06 17:30:44 -0800238 suite_name = canonicalize_suite_name(name)
239 if run_prod_code:
Dan Shi5e8fa182016-04-15 11:04:36 -0700240 ds = dev_server.resolve(test_source_build, hostname=sample_dut)
Simran Basi5ace6f22016-01-06 17:30:44 -0800241 keyvals = {}
242 getter = control_file_getter.FileSystemGetter(
243 [_CONFIG.get_config_value('SCHEDULER',
244 'drone_installation_directory')])
245 control_file = getter.get_control_file_contents_by_name(suite_name)
246 else:
Dan Shi5e8fa182016-04-15 11:04:36 -0700247 (ds, keyvals) = _stage_build_artifacts(
248 test_source_build, hostname=sample_dut)
Fang Dengcbc01212014-11-25 16:09:46 -0800249 keyvals[constants.SUITE_MIN_DUTS_KEY] = suite_min_duts
Chris Masone859fdec2012-01-30 08:38:09 -0800250
Simran Basib6ec8ae2014-04-23 12:05:08 -0700251 if not control_file:
Dan Shi36cfd832014-10-10 13:38:51 -0700252 # No control file was supplied so look it up from the build artifacts.
253 suite_name = canonicalize_suite_name(name)
254 control_file = _get_control_file_contents_by_name(test_source_build,
255 ds, suite_name)
Simran Basi86fe9c92016-02-09 17:58:20 -0800256 # Do not change this naming convention without updating
257 # site_utils.parse_job_name.
Dan Shi2121a332016-02-25 14:22:22 -0800258 if not run_prod_code:
259 name = '%s-%s' % (test_source_build, suite_name)
260 else:
261 # If run_prod_code is True, test_source_build is not set, use the
262 # first build in the builds list for the sutie job name.
263 name = '%s-%s' % (builds.values()[0], suite_name)
Chris Masone46d0eb12012-07-27 18:56:39 -0700264
Simran Basi7e605742013-11-12 13:43:36 -0800265 timeout_mins = timeout_mins or timeout * 60
Simran Basi102e3522014-09-11 11:46:10 -0700266 max_runtime_mins = max_runtime_mins or timeout * 60
Simran Basi7e605742013-11-12 13:43:36 -0800267
Simran Basib6ec8ae2014-04-23 12:05:08 -0700268 if not board:
Dan Shid215dbe2015-06-18 16:14:59 -0700269 board = utils.ParseBuildName(builds[provision.CROS_VERSION_PREFIX])[0]
Chris Masone46d0eb12012-07-27 18:56:39 -0700270
Dan Shi5984d782016-04-05 18:43:51 -0700271 # Prepend builds and board to the control file.
Scott Zawalski65650172012-02-16 11:48:26 -0500272 inject_dict = {'board': board,
Dan Shi6dc22d12016-04-06 22:10:04 -0700273 # `build` is needed for suites like AU to stage image inside
274 # suite control file.
275 'build': test_source_build,
Dan Shi36cfd832014-10-10 13:38:51 -0700276 'builds': builds,
Chris Masone62579122012-03-08 15:18:43 -0800277 'check_hosts': check_hosts,
Chris Masone46d0eb12012-07-27 18:56:39 -0700278 'pool': pool,
Aviv Keshetd83ef442013-01-16 16:19:35 -0800279 'num': num,
Dan Shib8a99112013-06-18 13:46:10 -0700280 'file_bugs': file_bugs,
Alex Miller139690b2013-09-07 15:35:49 -0700281 'timeout': timeout,
Simran Basi7e605742013-11-12 13:43:36 -0800282 'timeout_mins': timeout_mins,
Alex Miller7d658cf2013-09-04 16:00:35 -0700283 'devserver_url': ds.url(),
Aviv Keshet7cd12312013-07-25 10:25:55 -0700284 'priority': priority,
Dan Shi95122412013-11-12 16:20:33 -0800285 'suite_args' : suite_args,
Fang Deng058860c2014-05-15 15:41:50 -0700286 'wait_for_results': wait_for_results,
Simran Basi102e3522014-09-11 11:46:10 -0700287 'job_retry': job_retry,
Fang Deng443f1952015-01-02 14:51:49 -0800288 'max_retries': max_retries,
Fang Dengcbc01212014-11-25 16:09:46 -0800289 'max_runtime_mins': max_runtime_mins,
Dan Shi36cfd832014-10-10 13:38:51 -0700290 'offload_failures_only': offload_failures_only,
Simran Basi5ace6f22016-01-06 17:30:44 -0800291 'test_source_build': test_source_build,
Dan Shi059261a2016-02-22 12:06:37 -0800292 'run_prod_code': run_prod_code,
293 'delay_minutes': delay_minutes,
Aviv Keshet7cd12312013-07-25 10:25:55 -0700294 }
295
Simran Basib6ec8ae2014-04-23 12:05:08 -0700296 control_file = tools.inject_vars(inject_dict, control_file)
Chris Masone859fdec2012-01-30 08:38:09 -0800297
Jakob Juelich9fffe4f2014-08-14 18:07:05 -0700298 return rpc_utils.create_job_common(name,
Jakob Juelich59cfe542014-09-02 16:37:46 -0700299 priority=priority,
300 timeout_mins=timeout_mins,
301 max_runtime_mins=max_runtime_mins,
302 control_type='Server',
303 control_file=control_file,
304 hostless=True,
Fang Dengcbc01212014-11-25 16:09:46 -0800305 keyvals=keyvals)
Simran Basi71206ef2014-08-13 13:51:18 -0700306
307
308# TODO: hide the following rpcs under is_moblab
309def moblab_only(func):
310 """Ensure moblab specific functions only run on Moblab devices."""
311 def verify(*args, **kwargs):
312 if not utils.is_moblab():
313 raise error.RPCException('RPC: %s can only run on Moblab Systems!',
314 func.__name__)
315 return func(*args, **kwargs)
316 return verify
317
318
319@moblab_only
320def get_config_values():
321 """Returns all config values parsed from global and shadow configs.
322
323 Config values are grouped by sections, and each section is composed of
324 a list of name value pairs.
325 """
326 sections =_CONFIG.get_sections()
327 config_values = {}
328 for section in sections:
329 config_values[section] = _CONFIG.config.items(section)
Jakob Juelich9fffe4f2014-08-14 18:07:05 -0700330 return rpc_utils.prepare_for_serialization(config_values)
Simran Basi71206ef2014-08-13 13:51:18 -0700331
332
Michael Tang9afc74b2016-03-21 10:19:23 -0700333def _write_config_file(config_file, config_values, overwrite=False):
334 """Writes out a configuration file.
Simran Basi71206ef2014-08-13 13:51:18 -0700335
Michael Tang9afc74b2016-03-21 10:19:23 -0700336 @param config_file: The name of the configuration file.
337 @param config_values: The ConfigParser object.
338 @param ovewrite: Flag on if overwriting is allowed.
339 """
340 if not config_file:
341 raise error.RPCException('Empty config file name.')
342 if not overwrite and os.path.exists(config_file):
343 raise error.RPCException('Config file already exists.')
344
345 if config_values:
346 with open(config_file, 'w') as config_file:
347 config_values.write(config_file)
348
349
350def _read_original_config():
351 """Reads the orginal configuratino without shadow.
352
353 @return: A configuration object, see global_config_class.
Simran Basi71206ef2014-08-13 13:51:18 -0700354 """
Simran Basi773a86e2015-05-13 19:15:42 -0700355 original_config = global_config.global_config_class()
356 original_config.set_config_files(shadow_file='')
Michael Tang9afc74b2016-03-21 10:19:23 -0700357 return original_config
358
359
360def _read_raw_config(config_file):
361 """Reads the raw configuration from a configuration file.
362
363 @param: config_file: The path of the configuration file.
364
365 @return: A ConfigParser object.
366 """
367 shadow_config = ConfigParser.RawConfigParser()
368 shadow_config.read(config_file)
369 return shadow_config
370
371
372def _get_shadow_config_from_partial_update(config_values):
373 """Finds out the new shadow configuration based on a partial update.
374
375 Since the input is only a partial config, we should not lose the config
376 data inside the existing shadow config file. We also need to distinguish
377 if the input config info overrides with a new value or reverts back to
378 an original value.
379
380 @param config_values: See get_moblab_settings().
381
382 @return: The new shadow configuration as ConfigParser object.
383 """
384 original_config = _read_original_config()
385 existing_shadow = _read_raw_config(_CONFIG.shadow_file)
386 for section, config_value_list in config_values.iteritems():
387 for key, value in config_value_list:
388 if original_config.get_config_value(section, key,
389 default='',
390 allow_blank=True) != value:
391 if not existing_shadow.has_section(section):
392 existing_shadow.add_section(section)
393 existing_shadow.set(section, key, value)
394 elif existing_shadow.has_option(section, key):
395 existing_shadow.remove_option(section, key)
396 return existing_shadow
397
398
399def _update_partial_config(config_values):
400 """Updates the shadow configuration file with a partial config udpate.
401
402 @param config_values: See get_moblab_settings().
403 """
404 existing_config = _get_shadow_config_from_partial_update(config_values)
405 _write_config_file(_CONFIG.shadow_file, existing_config, True)
406
407
408@moblab_only
409def update_config_handler(config_values):
410 """Update config values and override shadow config.
411
412 @param config_values: See get_moblab_settings().
413 """
414 original_config = _read_original_config()
Simran Basi773a86e2015-05-13 19:15:42 -0700415 new_shadow = ConfigParser.RawConfigParser()
Simran Basi71206ef2014-08-13 13:51:18 -0700416 for section, config_value_list in config_values.iteritems():
417 for key, value in config_value_list:
Simran Basi773a86e2015-05-13 19:15:42 -0700418 if original_config.get_config_value(section, key,
419 default='',
420 allow_blank=True) != value:
421 if not new_shadow.has_section(section):
422 new_shadow.add_section(section)
423 new_shadow.set(section, key, value)
Michael Tang9afc74b2016-03-21 10:19:23 -0700424
Simran Basi71206ef2014-08-13 13:51:18 -0700425 if not _CONFIG.shadow_file or not os.path.exists(_CONFIG.shadow_file):
426 raise error.RPCException('Shadow config file does not exist.')
Michael Tang9afc74b2016-03-21 10:19:23 -0700427 _write_config_file(_CONFIG.shadow_file, new_shadow, True)
Simran Basi71206ef2014-08-13 13:51:18 -0700428
Simran Basi71206ef2014-08-13 13:51:18 -0700429 # TODO (sbasi) crbug.com/403916 - Remove the reboot command and
430 # instead restart the services that rely on the config values.
431 os.system('sudo reboot')
432
433
434@moblab_only
435def reset_config_settings():
436 with open(_CONFIG.shadow_file, 'w') as config_file:
Dan Shi36cfd832014-10-10 13:38:51 -0700437 pass
Simran Basi71206ef2014-08-13 13:51:18 -0700438 os.system('sudo reboot')
439
440
441@moblab_only
442def set_boto_key(boto_key):
443 """Update the boto_key file.
444
445 @param boto_key: File name of boto_key uploaded through handle_file_upload.
446 """
447 if not os.path.exists(boto_key):
448 raise error.RPCException('Boto key: %s does not exist!' % boto_key)
449 shutil.copyfile(boto_key, moblab_host.MOBLAB_BOTO_LOCATION)
Dan Shi193905e2014-07-25 23:33:09 -0700450
451
Dan Shiaec99012016-01-07 09:09:16 -0800452@moblab_only
453def set_launch_control_key(launch_control_key):
454 """Update the launch_control_key file.
455
456 @param launch_control_key: File name of launch_control_key uploaded through
457 handle_file_upload.
458 """
459 if not os.path.exists(launch_control_key):
460 raise error.RPCException('Launch Control key: %s does not exist!' %
461 launch_control_key)
462 shutil.copyfile(launch_control_key,
463 moblab_host.MOBLAB_LAUNCH_CONTROL_KEY_LOCATION)
464 # Restart the devserver service.
465 os.system('sudo restart moblab-devserver-init')
466
467
Michael Tang9afc74b2016-03-21 10:19:23 -0700468###########Moblab Config Wizard RPCs #######################
469def _get_public_ip_address(socket_handle):
470 """Gets the public IP address.
471
472 Connects to Google DNS server using a socket and gets the preferred IP
473 address from the connection.
474
475 @param: socket_handle: a unix socket.
476
477 @return: public ip address as string.
478 """
479 try:
480 socket_handle.settimeout(1)
481 socket_handle.connect(('8.8.8.8', 53))
482 socket_name = socket_handle.getsockname()
483 if socket_name is not None:
484 logging.info('Got socket name from UDP socket.')
485 return socket_name[0]
486 logging.warn('Created UDP socket but with no socket_name.')
487 except socket.error:
488 logging.warn('Could not get socket name from UDP socket.')
489 return None
490
491
492def _get_network_info():
493 """Gets the network information.
494
495 TCP socket is used to test the connectivity. If there is no connectivity, try to
496 get the public IP with UDP socket.
497
498 @return: a tuple as (public_ip_address, connected_to_internet).
499 """
500 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
501 ip = _get_public_ip_address(s)
502 if ip is not None:
503 logging.info('Established TCP connection with well known server.')
504 return (ip, True)
505 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
506 return (_get_public_ip_address(s), False)
507
508
509@moblab_only
510def get_network_info():
511 """Returns the server ip addresses, and if the server connectivity.
512
513 The server ip addresses as an array of strings, and the connectivity as a
514 flag.
515 """
516 network_info = {}
517 info = _get_network_info()
518 if info[0] is not None:
519 network_info['server_ips'] = [info[0]]
520 network_info['is_connected'] = info[1]
521
522 return rpc_utils.prepare_for_serialization(network_info)
523
524
525# Gets the boto configuration.
526def _get_boto_config():
527 """Reads the boto configuration from the boto file.
528
529 @return: Boto configuration as ConfigParser object.
530 """
531 boto_config = ConfigParser.ConfigParser()
532 boto_config.read(MOBLAB_BOTO_LOCATION)
533 return boto_config
534
535
536@moblab_only
537def get_cloud_storage_info():
538 """RPC handler to get the cloud storage access information.
539 """
540 cloud_storage_info = {}
541 value =_CONFIG.get_config_value('CROS', _IMAGE_STORAGE_SERVER)
542 if value is not None:
543 cloud_storage_info[_IMAGE_STORAGE_SERVER] = value
544 value =_CONFIG.get_config_value('CROS', _RESULT_STORAGE_SERVER)
545 if value is not None:
546 cloud_storage_info[_RESULT_STORAGE_SERVER] = value
547
548 boto_config = _get_boto_config()
549 sections = boto_config.sections()
550
551 if sections:
552 cloud_storage_info[_USE_EXISTING_BOTO_FILE] = True
553 else:
554 cloud_storage_info[_USE_EXISTING_BOTO_FILE] = False
555 if 'Credentials' in sections:
556 options = boto_config.options('Credentials')
557 if _GS_ACCESS_KEY_ID in options:
558 value = boto_config.get('Credentials', _GS_ACCESS_KEY_ID)
559 cloud_storage_info[_GS_ACCESS_KEY_ID] = value
560 if _GS_SECRETE_ACCESS_KEY in options:
561 value = boto_config.get('Credentials', _GS_SECRETE_ACCESS_KEY)
562 cloud_storage_info[_GS_SECRETE_ACCESS_KEY] = value
563
564 return rpc_utils.prepare_for_serialization(cloud_storage_info)
565
566
567def _get_bucket_name_from_url(bucket_url):
568 """Gets the bucket name from a bucket url.
569
570 @param: bucket_url: the bucket url string.
571 """
572 if bucket_url:
573 match = GOOGLE_STORAGE_BUCKET_URL_PATTERN.match(bucket_url)
574 if match:
575 return match.group('bucket')
576 return None
577
578
579def _is_valid_boto_key(key_id, key_secret):
580 """Checks if the boto key is valid.
581
582 @param: key_id: The boto key id string.
583 @param: key_secret: The boto key string.
584
585 @return: A tuple as (valid_boolean, details_string).
586 """
587 if not key_id or not key_secret:
588 return (False, "Empty key id or secret.")
589 conn = boto.connect_gs(key_id, key_secret)
590 try:
591 buckets = conn.get_all_buckets()
592 return (True, None)
593 except boto.exception.GSResponseError:
594 details = "The boto access key is not valid"
595 return (False, details)
596 finally:
597 conn.close()
598
599
600def _is_valid_bucket(key_id, key_secret, bucket_name):
601 """Checks if a bucket is valid and accessible.
602
603 @param: key_id: The boto key id string.
604 @param: key_secret: The boto key string.
605 @param: bucket name string.
606
607 @return: A tuple as (valid_boolean, details_string).
608 """
609 if not key_id or not key_secret or not bucket_name:
610 return (False, "Server error: invalid argument")
611 conn = boto.connect_gs(key_id, key_secret)
612 bucket = conn.lookup(bucket_name)
613 conn.close()
614 if bucket:
615 return (True, None)
616 return (False, "Bucket %s does not exist." % bucket_name)
617
618
619def _is_valid_bucket_url(key_id, key_secret, bucket_url):
620 """Validates the bucket url is accessible.
621
622 @param: key_id: The boto key id string.
623 @param: key_secret: The boto key string.
624 @param: bucket url string.
625
626 @return: A tuple as (valid_boolean, details_string).
627 """
628 bucket_name = _get_bucket_name_from_url(bucket_url)
629 if bucket_name:
630 return _is_valid_bucket(key_id, key_secret, bucket_name)
631 return (False, "Bucket url %s is not valid" % bucket_url)
632
633
634def _validate_cloud_storage_info(cloud_storage_info):
635 """Checks if the cloud storage information is valid.
636
637 @param: cloud_storage_info: The JSON RPC object for cloud storage info.
638
639 @return: A tuple as (valid_boolean, details_string).
640 """
641 valid = True
642 details = None
643 if not cloud_storage_info[_USE_EXISTING_BOTO_FILE]:
644 key_id = cloud_storage_info[_GS_ACCESS_KEY_ID]
645 key_secret = cloud_storage_info[_GS_SECRETE_ACCESS_KEY]
646 valid, details = _is_valid_boto_key(key_id, key_secret)
647
648 if valid:
649 valid, details = _is_valid_bucket_url(
650 key_id, key_secret, cloud_storage_info[_IMAGE_STORAGE_SERVER])
651
652 if valid:
653 valid, details = _is_valid_bucket_url(
654 key_id, key_secret, cloud_storage_info[_RESULT_STORAGE_SERVER])
655 return (valid, details)
656
657
658def _create_operation_status_response(is_ok, details):
659 """Helper method to create a operation status reponse.
660
661 @param: is_ok: Boolean for if the operation is ok.
662 @param: details: A detailed string.
663
664 @return: A serialized JSON RPC object.
665 """
666 status_response = {'status_ok': is_ok}
667 if details:
668 status_response['status_details'] = details
669 return rpc_utils.prepare_for_serialization(status_response)
670
671
672@moblab_only
673def validate_cloud_storage_info(cloud_storage_info):
674 """RPC handler to check if the cloud storage info is valid.
675 """
676 valid, details = _validate_cloud_storage_info(cloud_storage_info)
677 return _create_operation_status_response(valid, details)
678
679
680@moblab_only
681def submit_wizard_config_info(cloud_storage_info):
682 """RPC handler to submit the cloud storage info.
683 """
684 valid, details = _validate_cloud_storage_info(cloud_storage_info)
685 if not valid:
686 return _create_operation_status_response(valid, details)
687 config_update = {}
688 config_update['CROS'] = [
689 (_IMAGE_STORAGE_SERVER, cloud_storage_info[_IMAGE_STORAGE_SERVER]),
690 (_RESULT_STORAGE_SERVER, cloud_storage_info[_RESULT_STORAGE_SERVER])
691 ]
692 _update_partial_config(config_update)
693
694 if not cloud_storage_info[_USE_EXISTING_BOTO_FILE]:
695 boto_config = ConfigParser.RawConfigParser()
696 boto_config.add_section('Credentials')
697 boto_config.set('Credentials', _GS_ACCESS_KEY_ID,
698 cloud_storage_info[_GS_ACCESS_KEY_ID])
699 boto_config.set('Credentials', _GS_SECRETE_ACCESS_KEY,
700 cloud_storage_info[_GS_SECRETE_ACCESS_KEY])
701 _write_config_file(MOBLAB_BOTO_LOCATION, boto_config, True)
702
703 _CONFIG.parse_config_file()
704
705 return _create_operation_status_response(True, None)
706
707
Dan Shi193905e2014-07-25 23:33:09 -0700708def get_job_history(**filter_data):
709 """Get history of the job, including the special tasks executed for the job
710
711 @param filter_data: filter for the call, should at least include
712 {'job_id': [job id]}
713 @returns: JSON string of the job's history, including the information such
714 as the hosts run the job and the special tasks executed before
715 and after the job.
716 """
717 job_id = filter_data['job_id']
718 job_info = job_history.get_job_info(job_id)
Dan Shidfea3682014-08-10 23:38:40 -0700719 return rpc_utils.prepare_for_serialization(job_info.get_history())
720
721
722def get_host_history(start_time, end_time, hosts=None, board=None, pool=None):
723 """Get history of a list of host.
724
725 The return is a JSON string of host history for each host, for example,
726 {'172.22.33.51': [{'status': 'Resetting'
727 'start_time': '2014-08-07 10:02:16',
728 'end_time': '2014-08-07 10:03:16',
729 'log_url': 'http://autotest/reset-546546/debug',
730 'dbg_str': 'Task: Special Task 19441991 (host ...)'},
731 {'status': 'Running'
732 'start_time': '2014-08-07 10:03:18',
733 'end_time': '2014-08-07 10:13:00',
734 'log_url': 'http://autotest/reset-546546/debug',
735 'dbg_str': 'HQE: 15305005, for job: 14995562'}
736 ]
737 }
738 @param start_time: start time to search for history, can be string value or
739 epoch time.
740 @param end_time: end time to search for history, can be string value or
741 epoch time.
742 @param hosts: A list of hosts to search for history. Default is None.
743 @param board: board type of hosts. Default is None.
744 @param pool: pool type of hosts. Default is None.
745 @returns: JSON string of the host history.
746 """
747 return rpc_utils.prepare_for_serialization(
748 host_history.get_history_details(
749 start_time=start_time, end_time=end_time,
750 hosts=hosts, board=board, pool=pool,
751 process_pool_size=4))
Jakob Juelich59cfe542014-09-02 16:37:46 -0700752
753
MK Ryu07a109f2015-07-21 17:44:32 -0700754def shard_heartbeat(shard_hostname, jobs=(), hqes=(), known_job_ids=(),
755 known_host_ids=(), known_host_statuses=()):
Jakob Juelich1b525742014-09-30 13:08:07 -0700756 """Receive updates for job statuses from shards and assign hosts and jobs.
Jakob Juelich59cfe542014-09-02 16:37:46 -0700757
758 @param shard_hostname: Hostname of the calling shard
Jakob Juelicha94efe62014-09-18 16:02:49 -0700759 @param jobs: Jobs in serialized form that should be updated with newer
760 status from a shard.
761 @param hqes: Hostqueueentries in serialized form that should be updated with
762 newer status from a shard. Note that for every hostqueueentry
763 the corresponding job must be in jobs.
Jakob Juelich1b525742014-09-30 13:08:07 -0700764 @param known_job_ids: List of ids of jobs the shard already has.
765 @param known_host_ids: List of ids of hosts the shard already has.
MK Ryu07a109f2015-07-21 17:44:32 -0700766 @param known_host_statuses: List of statuses of hosts the shard already has.
Jakob Juelicha94efe62014-09-18 16:02:49 -0700767
Fang Dengf3705992014-12-16 17:32:18 -0800768 @returns: Serialized representations of hosts, jobs, suite job keyvals
769 and their dependencies to be inserted into a shard's database.
Jakob Juelich59cfe542014-09-02 16:37:46 -0700770 """
Jakob Juelich1b525742014-09-30 13:08:07 -0700771 # The following alternatives to sending host and job ids in every heartbeat
772 # have been considered:
773 # 1. Sending the highest known job and host ids. This would work for jobs:
774 # Newer jobs always have larger ids. Also, if a job is not assigned to a
775 # particular shard during a heartbeat, it never will be assigned to this
776 # shard later.
777 # This is not true for hosts though: A host that is leased won't be sent
778 # to the shard now, but might be sent in a future heartbeat. This means
779 # sometimes hosts should be transfered that have a lower id than the
780 # maximum host id the shard knows.
781 # 2. Send the number of jobs/hosts the shard knows to the master in each
782 # heartbeat. Compare these to the number of records that already have
783 # the shard_id set to this shard. In the normal case, they should match.
784 # In case they don't, resend all entities of that type.
785 # This would work well for hosts, because there aren't that many.
786 # Resending all jobs is quite a big overhead though.
787 # Also, this approach might run into edge cases when entities are
788 # ever deleted.
789 # 3. Mixtures of the above: Use 1 for jobs and 2 for hosts.
790 # Using two different approaches isn't consistent and might cause
791 # confusion. Also the issues with the case of deletions might still
792 # occur.
793 #
794 # The overhead of sending all job and host ids in every heartbeat is low:
795 # At peaks one board has about 1200 created but unfinished jobs.
796 # See the numbers here: http://goo.gl/gQCGWH
797 # Assuming that job id's have 6 digits and that json serialization takes a
798 # comma and a space as overhead, the traffic per id sent is about 8 bytes.
799 # If 5000 ids need to be sent, this means 40 kilobytes of traffic.
800 # A NOT IN query with 5000 ids took about 30ms in tests made.
801 # These numbers seem low enough to outweigh the disadvantages of the
802 # solutions described above.
Gabe Black1e1c41b2015-02-04 23:55:15 -0800803 timer = autotest_stats.Timer('shard_heartbeat')
Jakob Juelich59cfe542014-09-02 16:37:46 -0700804 with timer:
805 shard_obj = rpc_utils.retrieve_shard(shard_hostname=shard_hostname)
Jakob Juelicha94efe62014-09-18 16:02:49 -0700806 rpc_utils.persist_records_sent_from_shard(shard_obj, jobs, hqes)
MK Ryu07a109f2015-07-21 17:44:32 -0700807 assert len(known_host_ids) == len(known_host_statuses)
808 for i in range(len(known_host_ids)):
809 host_model = models.Host.objects.get(pk=known_host_ids[i])
810 if host_model.status != known_host_statuses[i]:
811 host_model.status = known_host_statuses[i]
812 host_model.save()
813
Fang Dengf3705992014-12-16 17:32:18 -0800814 hosts, jobs, suite_keyvals = rpc_utils.find_records_for_shard(
MK Ryu07a109f2015-07-21 17:44:32 -0700815 shard_obj, known_job_ids=known_job_ids,
816 known_host_ids=known_host_ids)
Jakob Juelich59cfe542014-09-02 16:37:46 -0700817 return {
818 'hosts': [host.serialize() for host in hosts],
819 'jobs': [job.serialize() for job in jobs],
Fang Dengf3705992014-12-16 17:32:18 -0800820 'suite_keyvals': [kv.serialize() for kv in suite_keyvals],
Jakob Juelich59cfe542014-09-02 16:37:46 -0700821 }
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700822
823
824def get_shards(**filter_data):
825 """Return a list of all shards.
826
827 @returns A sequence of nested dictionaries of shard information.
828 """
829 shards = models.Shard.query_objects(filter_data)
830 serialized_shards = rpc_utils.prepare_rows_as_nested_dicts(shards, ())
831 for serialized, shard in zip(serialized_shards, shards):
832 serialized['labels'] = [label.name for label in shard.labels.all()]
833
834 return serialized_shards
835
836
MK Ryu5dfcc892015-07-16 15:34:04 -0700837def add_shard(hostname, labels):
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700838 """Add a shard and start running jobs on it.
839
840 @param hostname: The hostname of the shard to be added; needs to be unique.
MK Ryu5dfcc892015-07-16 15:34:04 -0700841 @param labels: Board labels separated by a comma. Jobs of one of the labels
842 will be assigned to the shard.
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700843
Jakob Juelich8b110ee2014-09-15 16:13:42 -0700844 @raises error.RPCException: If label provided doesn't start with `board:`
845 @raises model_logic.ValidationError: If a shard with the given hostname
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700846 already exists.
Jakob Juelich8b110ee2014-09-15 16:13:42 -0700847 @raises models.Label.DoesNotExist: If the label specified doesn't exist.
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700848 """
MK Ryu5dfcc892015-07-16 15:34:04 -0700849 labels = labels.split(',')
850 label_models = []
851 for label in labels:
852 if not label.startswith('board:'):
853 raise error.RPCException('Sharding only supports for `board:.*` '
854 'labels.')
855 # Fetch label first, so shard isn't created when label doesn't exist.
856 label_models.append(models.Label.smart_get(label))
Jakob Juelich8b110ee2014-09-15 16:13:42 -0700857
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700858 shard = models.Shard.add_object(hostname=hostname)
MK Ryu5dfcc892015-07-16 15:34:04 -0700859 for label in label_models:
860 shard.labels.add(label)
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700861 return shard.id
862
863
864def delete_shard(hostname):
865 """Delete a shard and reclaim all resources from it.
866
867 This claims back all assigned hosts from the shard. To ensure all DUTs are
xixuan03cb93f2016-03-22 16:21:41 -0700868 in a sane state, a Reboot task with highest priority is scheduled for them.
869 This reboots the DUTs and then all left tasks continue to run in drone of
870 the master.
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700871
xixuan03cb93f2016-03-22 16:21:41 -0700872 The procedure for deleting a shard:
873 * Lock all unlocked hosts on that shard.
874 * Remove shard information .
875 * Assign a reboot task with highest priority to these hosts.
876 * Unlock these hosts, then, the reboot tasks run in front of all other
877 tasks.
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700878
879 The status of jobs that haven't been reported to be finished yet, will be
880 lost. The master scheduler will pick up the jobs and execute them.
881
882 @param hostname: Hostname of the shard to delete.
883 """
884 shard = rpc_utils.retrieve_shard(shard_hostname=hostname)
xixuan03cb93f2016-03-22 16:21:41 -0700885 hostnames_to_lock = [h.hostname for h in
886 models.Host.objects.filter(shard=shard, locked=False)]
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700887
888 # TODO(beeps): Power off shard
xixuan03cb93f2016-03-22 16:21:41 -0700889 # For ChromeOS hosts, a reboot test with the highest priority is added to
890 # the DUT. After a reboot it should be ganranteed that no processes from
891 # prior tests that were run by a shard are still running on.
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700892
xixuan03cb93f2016-03-22 16:21:41 -0700893 # Lock all unlocked hosts.
894 dicts = {'locked': True, 'lock_time': datetime.datetime.now()}
895 models.Host.objects.filter(hostname__in=hostnames_to_lock).update(**dicts)
896
897 # Remove shard information.
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700898 models.Host.objects.filter(shard=shard).update(shard=None)
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700899 models.Job.objects.filter(shard=shard).update(shard=None)
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700900 shard.labels.clear()
Jakob Juelich82b7d1c2014-09-15 16:10:57 -0700901 shard.delete()
Dan Shi6964fa52014-12-18 11:04:27 -0800902
xixuan03cb93f2016-03-22 16:21:41 -0700903 # Assign a reboot task with highest priority: Super.
904 t = models.Test.objects.get(name='platform_BootPerfServer:shard')
905 c = utils.read_file(os.path.join(common.autotest_dir, t.path))
906 if hostnames_to_lock:
907 rpc_utils.create_job_common(
908 'reboot_dut_for_shard_deletion',
909 priority=priorities.Priority.SUPER,
910 control_type='Server',
911 control_file=c, hosts=hostnames_to_lock)
912
913 # Unlock these shard-related hosts.
914 dicts = {'locked': False, 'lock_time': None}
915 models.Host.objects.filter(hostname__in=hostnames_to_lock).update(**dicts)
916
Dan Shi6964fa52014-12-18 11:04:27 -0800917
MK Ryua34e3b12015-08-21 16:20:47 -0700918def get_servers(hostname=None, role=None, status=None):
Dan Shid7bb4f12015-01-06 10:53:50 -0800919 """Get a list of servers with matching role and status.
920
MK Ryua34e3b12015-08-21 16:20:47 -0700921 @param hostname: FQDN of the server.
Dan Shid7bb4f12015-01-06 10:53:50 -0800922 @param role: Name of the server role, e.g., drone, scheduler. Default to
923 None to match any role.
924 @param status: Status of the server, e.g., primary, backup, repair_required.
925 Default to None to match any server status.
926
927 @raises error.RPCException: If server database is not used.
928 @return: A list of server names for servers with matching role and status.
929 """
930 if not server_manager_utils.use_server_db():
931 raise error.RPCException('Server database is not enabled. Please try '
932 'retrieve servers from global config.')
MK Ryua34e3b12015-08-21 16:20:47 -0700933 servers = server_manager_utils.get_servers(hostname=hostname, role=role,
Dan Shid7bb4f12015-01-06 10:53:50 -0800934 status=status)
935 return [s.get_details() for s in servers]
936
937
MK Ryufbb002c2015-06-08 14:13:16 -0700938@rpc_utils.route_rpc_to_master
Simran Basibeb2bb22016-02-03 15:25:48 -0800939def get_stable_version(board=stable_version_utils.DEFAULT, android=False):
Dan Shi6964fa52014-12-18 11:04:27 -0800940 """Get stable version for the given board.
941
942 @param board: Name of the board.
Simran Basibeb2bb22016-02-03 15:25:48 -0800943 @param android: If True, the given board is an Android-based device. If
944 False, assume its a Chrome OS-based device.
945
Dan Shi6964fa52014-12-18 11:04:27 -0800946 @return: Stable version of the given board. Return global configure value
947 of CROS.stable_cros_version if stable_versinos table does not have
948 entry of board DEFAULT.
949 """
Simran Basibeb2bb22016-02-03 15:25:48 -0800950 return stable_version_utils.get(board=board, android=android)
Dan Shi25e1fd42014-12-19 14:36:42 -0800951
952
MK Ryufbb002c2015-06-08 14:13:16 -0700953@rpc_utils.route_rpc_to_master
Dan Shi25e1fd42014-12-19 14:36:42 -0800954def get_all_stable_versions():
955 """Get stable versions for all boards.
956
957 @return: A dictionary of board:version.
958 """
959 return stable_version_utils.get_all()
960
961
MK Ryufbb002c2015-06-08 14:13:16 -0700962@rpc_utils.route_rpc_to_master
Dan Shi25e1fd42014-12-19 14:36:42 -0800963def set_stable_version(version, board=stable_version_utils.DEFAULT):
964 """Modify stable version for the given board.
965
966 @param version: The new value of stable version for given board.
967 @param board: Name of the board, default to value `DEFAULT`.
968 """
969 stable_version_utils.set(version=version, board=board)
970
971
MK Ryufbb002c2015-06-08 14:13:16 -0700972@rpc_utils.route_rpc_to_master
Dan Shi25e1fd42014-12-19 14:36:42 -0800973def delete_stable_version(board):
974 """Modify stable version for the given board.
975
976 Delete a stable version entry in afe_stable_versions table for a given
977 board, so default stable version will be used.
978
979 @param board: Name of the board.
980 """
981 stable_version_utils.delete(board=board)
Matthew Sartorid96fb9b2015-05-19 18:04:58 -0700982
983
Michael Tang340efe32016-04-16 12:15:17 -0700984def get_tests_by_build(build, ignore_invalid_tests=False):
Matthew Sartorid96fb9b2015-05-19 18:04:58 -0700985 """Get the tests that are available for the specified build.
986
987 @param build: unique name by which to refer to the image.
Michael Tang340efe32016-04-16 12:15:17 -0700988 @param ignore_invalid_tests: flag on if unparsable tests are ignored.
Matthew Sartorid96fb9b2015-05-19 18:04:58 -0700989
990 @return: A sorted list of all tests that are in the build specified.
991 """
992 # Stage the test artifacts.
993 try:
994 ds = dev_server.ImageServer.resolve(build)
995 build = ds.translate(build)
996 except dev_server.DevServerException as e:
997 raise ValueError('Could not resolve build %s: %s' % (build, e))
998
999 try:
Dan Shi6450e142016-03-11 11:52:20 -08001000 ds.stage_artifacts(image=build, artifacts=['test_suites'])
Matthew Sartorid96fb9b2015-05-19 18:04:58 -07001001 except dev_server.DevServerException as e:
1002 raise error.StageControlFileFailure(
1003 'Failed to stage %s: %s' % (build, e))
1004
1005 # Collect the control files specified in this build
1006 cfile_getter = control_file_getter.DevServerGetter.create(build, ds)
1007 control_file_list = cfile_getter.get_control_file_list()
1008
1009 test_objects = []
1010 _id = 0
1011 for control_file_path in control_file_list:
1012 # Read and parse the control file
1013 control_file = cfile_getter.get_control_file_contents(
1014 control_file_path)
Michael Tang340efe32016-04-16 12:15:17 -07001015 try:
1016 control_obj = control_data.parse_control_string(control_file)
1017 except:
1018 logging.info('Failed to parse congtrol file: %s', control_file_path)
1019 if not ignore_invalid_tests:
1020 raise
Matthew Sartorid96fb9b2015-05-19 18:04:58 -07001021
1022 # Extract the values needed for the AFE from the control_obj.
1023 # The keys list represents attributes in the control_obj that
1024 # are required by the AFE
1025 keys = ['author', 'doc', 'name', 'time', 'test_type', 'experimental',
1026 'test_category', 'test_class', 'dependencies', 'run_verify',
1027 'sync_count', 'job_retries', 'retries', 'path']
1028
1029 test_object = {}
1030 for key in keys:
1031 test_object[key] = getattr(control_obj, key) if hasattr(
1032 control_obj, key) else ''
1033
1034 # Unfortunately, the AFE expects different key-names for certain
1035 # values, these must be corrected to avoid the risk of tests
1036 # being omitted by the AFE.
1037 # The 'id' is an additional value used in the AFE.
Matthew Sartori10438092015-06-24 14:30:18 -07001038 # The control_data parsing does not reference 'run_reset', but it
1039 # is also used in the AFE and defaults to True.
Matthew Sartorid96fb9b2015-05-19 18:04:58 -07001040 test_object['id'] = _id
Matthew Sartori10438092015-06-24 14:30:18 -07001041 test_object['run_reset'] = True
Matthew Sartorid96fb9b2015-05-19 18:04:58 -07001042 test_object['description'] = test_object.get('doc', '')
1043 test_object['test_time'] = test_object.get('time', 0)
1044 test_object['test_retry'] = test_object.get('retries', 0)
1045
1046 # Fix the test name to be consistent with the current presentation
1047 # of test names in the AFE.
1048 testpath, subname = os.path.split(control_file_path)
1049 testname = os.path.basename(testpath)
1050 subname = subname.split('.')[1:]
1051 if subname:
1052 testname = '%s:%s' % (testname, ':'.join(subname))
1053
1054 test_object['name'] = testname
1055
Matthew Sartori10438092015-06-24 14:30:18 -07001056 # Correct the test path as parse_control_string sets an empty string.
1057 test_object['path'] = control_file_path
1058
Matthew Sartorid96fb9b2015-05-19 18:04:58 -07001059 _id += 1
1060 test_objects.append(test_object)
1061
Matthew Sartori10438092015-06-24 14:30:18 -07001062 test_objects = sorted(test_objects, key=lambda x: x.get('name'))
Matthew Sartorid96fb9b2015-05-19 18:04:58 -07001063 return rpc_utils.prepare_for_serialization(test_objects)