jamesren | c394022 | 2010-02-19 21:57:37 +0000 | [diff] [blame] | 1 | import common |
| 2 | import os |
| 3 | from autotest_lib.frontend.afe import models as afe_models, model_logic |
jamesren | 3e9f609 | 2010-03-11 21:32:10 +0000 | [diff] [blame] | 4 | from autotest_lib.frontend.planner import models, model_attributes |
jamesren | 9a6f5f6 | 2010-05-05 22:55:54 +0000 | [diff] [blame] | 5 | from autotest_lib.frontend.planner import failure_actions, control_file |
jamesren | 6275824 | 2010-04-28 18:08:25 +0000 | [diff] [blame] | 6 | from autotest_lib.frontend.tko import models as tko_models |
jamesren | 0cde1eb | 2010-04-09 20:45:49 +0000 | [diff] [blame] | 7 | from autotest_lib.client.common_lib import global_config, utils, global_config |
jamesren | c394022 | 2010-02-19 21:57:37 +0000 | [diff] [blame] | 8 | |
| 9 | |
| 10 | PLANNER_LABEL_PREFIX = 'planner_' |
| 11 | PLANNER_ATOMIC_GROUP_NAME = 'planner_global_atomic_group' |
| 12 | SERVER = global_config.global_config.get_config_value('SERVER', 'hostname') |
| 13 | LAZY_LOADED_FILES = {} |
| 14 | |
| 15 | |
| 16 | def create_plan_label(plan): |
| 17 | """ |
| 18 | Creates the host label to apply on the plan hosts |
| 19 | """ |
| 20 | group, _ = afe_models.AtomicGroup.objects.get_or_create( |
| 21 | name=PLANNER_ATOMIC_GROUP_NAME) |
| 22 | if group.invalid: |
| 23 | group.invalid = False |
| 24 | group.save() |
| 25 | |
| 26 | name = PLANNER_LABEL_PREFIX + plan.name |
| 27 | if bool(afe_models.Label.valid_objects.filter(name=name)): |
| 28 | raise model_logic.ValidationError('Label %s already exists, ' |
| 29 | 'cannot start plan' % name) |
| 30 | label = afe_models.Label(name=name, atomic_group=group) |
| 31 | label.save() |
| 32 | |
| 33 | return label |
| 34 | |
| 35 | |
| 36 | def start_plan(plan, label): |
| 37 | """ |
| 38 | Takes the necessary steps to start a test plan in Autotest |
| 39 | """ |
jamesren | 0cde1eb | 2010-04-09 20:45:49 +0000 | [diff] [blame] | 40 | timeout = global_config.global_config.get_config_value( |
| 41 | 'PLANNER', 'execution_engine_timeout') |
jamesren | e38a0a7 | 2010-04-19 18:05:31 +0000 | [diff] [blame] | 42 | control = _get_execution_engine_control( |
| 43 | server=SERVER, |
| 44 | plan_id=plan.id, |
| 45 | label_name=label.name, |
| 46 | owner=afe_models.User.current_user().login) |
jamesren | 3e9f609 | 2010-03-11 21:32:10 +0000 | [diff] [blame] | 47 | options = {'name': plan.name + '_execution_engine', |
| 48 | 'priority': afe_models.Job.Priority.MEDIUM, |
jamesren | 30f06c7 | 2010-04-12 18:23:56 +0000 | [diff] [blame] | 49 | 'control_file': control, |
jamesren | 3e9f609 | 2010-03-11 21:32:10 +0000 | [diff] [blame] | 50 | 'control_type': afe_models.Job.ControlType.SERVER, |
| 51 | 'synch_count': None, |
jamesren | 0cde1eb | 2010-04-09 20:45:49 +0000 | [diff] [blame] | 52 | 'timeout': timeout, |
jamesren | 16e1bbb | 2010-04-12 18:23:46 +0000 | [diff] [blame] | 53 | 'max_runtime_hrs': timeout, |
jamesren | 3e9f609 | 2010-03-11 21:32:10 +0000 | [diff] [blame] | 54 | 'run_verify': False, |
| 55 | 'reboot_before': False, |
| 56 | 'reboot_after': False, |
jamesren | ab9e11b | 2010-04-16 23:44:51 +0000 | [diff] [blame] | 57 | 'dependencies': ()} |
jamesren | 3e9f609 | 2010-03-11 21:32:10 +0000 | [diff] [blame] | 58 | job = afe_models.Job.create(owner=afe_models.User.current_user().login, |
| 59 | options=options, hosts=()) |
| 60 | job.queue(hosts=()) |
jamesren | c394022 | 2010-02-19 21:57:37 +0000 | [diff] [blame] | 61 | |
| 62 | |
jamesren | e38a0a7 | 2010-04-19 18:05:31 +0000 | [diff] [blame] | 63 | def _get_execution_engine_control(server, plan_id, label_name, owner): |
jamesren | c394022 | 2010-02-19 21:57:37 +0000 | [diff] [blame] | 64 | """ |
| 65 | Gets the control file to run the execution engine |
| 66 | """ |
jamesren | 30f06c7 | 2010-04-12 18:23:56 +0000 | [diff] [blame] | 67 | control = lazy_load(os.path.join(os.path.dirname(__file__), |
| 68 | 'execution_engine_control.srv')) |
jamesren | e38a0a7 | 2010-04-19 18:05:31 +0000 | [diff] [blame] | 69 | return control % dict(server=server, plan_id=plan_id, |
| 70 | label_name=label_name, owner=owner) |
jamesren | c394022 | 2010-02-19 21:57:37 +0000 | [diff] [blame] | 71 | |
| 72 | |
| 73 | def lazy_load(path): |
| 74 | """ |
| 75 | Lazily loads the file indicated by the path given, and caches the result |
| 76 | """ |
| 77 | if path not in LAZY_LOADED_FILES: |
| 78 | LAZY_LOADED_FILES[path] = utils.read_file(path) |
| 79 | |
| 80 | return LAZY_LOADED_FILES[path] |
jamesren | 3e9f609 | 2010-03-11 21:32:10 +0000 | [diff] [blame] | 81 | |
| 82 | |
| 83 | def update_hosts_table(plan): |
| 84 | """ |
| 85 | Resolves the host labels into host objects |
| 86 | |
| 87 | Adds or removes hosts from the planner Hosts model based on changes to the |
| 88 | host label |
| 89 | """ |
| 90 | label_hosts = set() |
| 91 | |
| 92 | for label in plan.host_labels.all(): |
| 93 | for afe_host in label.host_set.all(): |
| 94 | host, created = models.Host.objects.get_or_create(plan=plan, |
| 95 | host=afe_host) |
| 96 | if created: |
| 97 | host.added_by_label = True |
| 98 | host.save() |
| 99 | |
| 100 | label_hosts.add(host.host.id) |
| 101 | |
| 102 | deleted_hosts = models.Host.objects.filter( |
| 103 | plan=plan, added_by_label=True).exclude(host__id__in=label_hosts) |
| 104 | deleted_hosts.delete() |
| 105 | |
| 106 | |
| 107 | def compute_next_test_config(plan, host): |
| 108 | """ |
| 109 | Gets the next test config that should be run for this plan and host |
| 110 | |
| 111 | Returns None if the host is already running a job. Also sets the host's |
| 112 | complete bit if the host is finished running tests. |
| 113 | """ |
| 114 | if host.blocked: |
| 115 | return None |
| 116 | |
| 117 | test_configs = plan.testconfig_set.order_by('execution_order') |
| 118 | for test_config in test_configs: |
| 119 | afe_jobs = plan.job_set.filter(test_config=test_config) |
| 120 | afe_job_ids = afe_jobs.values_list('afe_job', flat=True) |
| 121 | hqes = afe_models.HostQueueEntry.objects.filter(job__id__in=afe_job_ids, |
| 122 | host=host.host) |
jamesren | dbeebf8 | 2010-04-08 22:58:26 +0000 | [diff] [blame] | 123 | if not hqes and not bool(test_config.skipped_hosts.filter(host=host)): |
| 124 | return test_config |
jamesren | 3e9f609 | 2010-03-11 21:32:10 +0000 | [diff] [blame] | 125 | for hqe in hqes: |
| 126 | if not hqe.complete: |
| 127 | # HostQueueEntry still active for this host, |
| 128 | # should not run another test |
| 129 | return None |
| 130 | |
| 131 | # All HQEs related to this host are complete |
| 132 | host.complete = True |
| 133 | host.save() |
| 134 | return None |
| 135 | |
| 136 | |
| 137 | def check_for_completion(plan): |
| 138 | """ |
| 139 | Checks if a plan is actually complete. Sets complete=True if so |
| 140 | """ |
| 141 | if not models.Host.objects.filter(plan=plan, complete=False): |
| 142 | plan.complete = True |
| 143 | plan.save() |
| 144 | |
| 145 | |
| 146 | def compute_test_run_status(status): |
| 147 | """ |
| 148 | Converts a TKO test status to a Planner test run status |
| 149 | """ |
| 150 | Status = model_attributes.TestRunStatus |
| 151 | if status == 'GOOD': |
| 152 | return Status.PASSED |
| 153 | if status == 'RUNNING': |
| 154 | return Status.ACTIVE |
| 155 | return Status.FAILED |
| 156 | |
| 157 | |
| 158 | def add_test_run(plan, planner_job, tko_test, hostname, status): |
| 159 | """ |
| 160 | Adds a TKO test to the Planner Test Run tables |
| 161 | """ |
| 162 | host = afe_models.Host.objects.get(hostname=hostname) |
| 163 | |
| 164 | planner_host = models.Host.objects.get(plan=plan, host=host) |
| 165 | test_run, _ = models.TestRun.objects.get_or_create(plan=plan, |
| 166 | test_job=planner_job, |
| 167 | tko_test=tko_test, |
| 168 | host=planner_host) |
| 169 | test_run.status = status |
| 170 | test_run.save() |
jamesren | 4be631f | 2010-04-08 23:01:22 +0000 | [diff] [blame] | 171 | |
| 172 | |
jamesren | 6275824 | 2010-04-28 18:08:25 +0000 | [diff] [blame] | 173 | def process_failure(failure_id, host_action, test_action, labels, keyvals, |
| 174 | bugs, reason, invalidate): |
| 175 | if keyvals is None: |
| 176 | keyvals = {} |
| 177 | |
| 178 | failure = models.TestRun.objects.get(id=failure_id) |
| 179 | |
| 180 | _process_host_action(failure.host, host_action) |
| 181 | _process_test_action(failure.test_job, test_action) |
| 182 | |
| 183 | # Add the test labels |
| 184 | for label in labels: |
| 185 | tko_test_label, _ = ( |
| 186 | tko_models.TestLabel.objects.get_or_create(name=label)) |
| 187 | failure.tko_test.testlabel_set.add(tko_test_label) |
| 188 | |
| 189 | # Set the job keyvals |
| 190 | for key, value in keyvals.iteritems(): |
| 191 | keyval, created = tko_models.JobKeyval.objects.get_or_create( |
| 192 | job=failure.tko_test.job, key=key) |
| 193 | if not created: |
| 194 | tko_models.JobKeyval.objects.create(job=failure.tko_test.job, |
| 195 | key='original_' + key, |
| 196 | value=keyval.value) |
| 197 | keyval.value = value |
| 198 | keyval.save() |
| 199 | |
| 200 | # Add the bugs |
| 201 | for bug_id in bugs: |
| 202 | bug, _ = models.Bug.objects.get_or_create(external_uid=bug_id) |
| 203 | failure.bugs.add(bug) |
| 204 | |
| 205 | # Set the failure reason |
| 206 | if reason is not None: |
| 207 | tko_models.TestAttribute.objects.create(test=failure.tko_test, |
| 208 | attribute='original_reason', |
| 209 | value=failure.tko_test.reason) |
| 210 | failure.tko_test.reason = reason |
| 211 | failure.tko_test.save() |
| 212 | |
| 213 | # Set 'invalidated', 'seen', and 'triaged' |
| 214 | failure.invalidated = invalidate |
| 215 | failure.seen = True |
| 216 | failure.triaged = True |
| 217 | failure.save() |
| 218 | |
| 219 | |
jamesren | 4be631f | 2010-04-08 23:01:22 +0000 | [diff] [blame] | 220 | def _site_process_host_action_dummy(host, action): |
| 221 | return False |
| 222 | |
| 223 | |
jamesren | 6275824 | 2010-04-28 18:08:25 +0000 | [diff] [blame] | 224 | def _process_host_action(host, action): |
jamesren | 4be631f | 2010-04-08 23:01:22 +0000 | [diff] [blame] | 225 | """ |
| 226 | Takes the specified action on the host |
| 227 | """ |
| 228 | HostAction = failure_actions.HostAction |
| 229 | if action not in HostAction.values: |
| 230 | raise ValueError('Unexpected host action %s' % action) |
| 231 | |
| 232 | site_process = utils.import_site_function( |
| 233 | __file__, 'autotest_lib.frontend.planner.site_rpc_utils', |
| 234 | 'site_process_host_action', _site_process_host_action_dummy) |
| 235 | |
| 236 | if not site_process(host, action): |
| 237 | # site_process_host_action returns True and and only if it matched a |
| 238 | # site-specific processing option |
| 239 | if action == HostAction.BLOCK: |
| 240 | host.blocked = True |
| 241 | elif action == HostAction.UNBLOCK: |
| 242 | host.blocked = False |
| 243 | else: |
| 244 | assert action == HostAction.REINSTALL |
| 245 | raise NotImplemented('TODO: implement reinstall') |
| 246 | |
| 247 | host.save() |
| 248 | |
| 249 | |
jamesren | 6275824 | 2010-04-28 18:08:25 +0000 | [diff] [blame] | 250 | def _process_test_action(planner_job, action): |
jamesren | 4be631f | 2010-04-08 23:01:22 +0000 | [diff] [blame] | 251 | """ |
| 252 | Takes the specified action for this planner job |
| 253 | """ |
| 254 | TestAction = failure_actions.TestAction |
| 255 | if action not in TestAction.values: |
| 256 | raise ValueError('Unexpected test action %s' % action) |
| 257 | |
| 258 | if action == TestAction.SKIP: |
| 259 | # Do nothing |
| 260 | pass |
| 261 | else: |
| 262 | assert action == TestAction.RERUN |
| 263 | planner_job.requires_rerun = True |
| 264 | planner_job.save() |
jamesren | 9a6f5f6 | 2010-05-05 22:55:54 +0000 | [diff] [blame] | 265 | |
| 266 | |
| 267 | def set_additional_parameters(plan, additional_parameters): |
| 268 | if not additional_parameters: |
| 269 | return |
| 270 | |
| 271 | for index, additional_parameter in enumerate(additional_parameters): |
| 272 | hostname_regex = additional_parameter['hostname_regex'] |
| 273 | param_type = additional_parameter['param_type'] |
| 274 | param_values = additional_parameter['param_values'] |
| 275 | |
| 276 | additional_param = models.AdditionalParameter.objects.create( |
| 277 | plan=plan, hostname_regex=hostname_regex, |
| 278 | param_type=param_type, application_order=index) |
| 279 | |
| 280 | for key, value in param_values.iteritems(): |
| 281 | models.AdditionalParameterValue.objects.create( |
| 282 | additional_parameter=additional_param, |
| 283 | key=key, value=repr(value)) |
| 284 | |
| 285 | |
| 286 | def _additional_wrap_arguments_dummy(plan, hostname): |
| 287 | return {} |
| 288 | |
| 289 | |
| 290 | def get_wrap_arguments(plan, hostname, param_type): |
| 291 | additional_param = ( |
| 292 | models.AdditionalParameter.find_applicable_additional_parameter( |
| 293 | plan=plan, hostname=hostname, param_type=param_type)) |
| 294 | if not additional_param: |
| 295 | return {} |
| 296 | |
| 297 | param_values = additional_param.additionalparametervalue_set.values_list( |
| 298 | 'key', 'value') |
| 299 | return dict(param_values) |
| 300 | |
| 301 | |
| 302 | def wrap_control_file(plan, hostname, run_verify, test_config): |
| 303 | """ |
| 304 | Wraps a control file using the ControlParameters for the plan |
| 305 | """ |
| 306 | site_additional_wrap_arguments = utils.import_site_function( |
| 307 | __file__, 'autotest_lib.frontend.planner.site_rpc_utils', |
| 308 | 'additional_wrap_arguments', _additional_wrap_arguments_dummy) |
| 309 | additional_wrap_arguments = site_additional_wrap_arguments(plan, hostname) |
| 310 | |
| 311 | verify_params = get_wrap_arguments( |
| 312 | plan, hostname, model_attributes.AdditionalParameterType.VERIFY) |
| 313 | |
| 314 | return control_file.wrap_control_file( |
| 315 | control_file=test_config.control_file.contents, |
| 316 | is_server=test_config.is_server, |
| 317 | skip_verify=(not run_verify), |
| 318 | verify_params=verify_params, |
| 319 | **additional_wrap_arguments) |
jamesren | 2e48bcb | 2010-05-18 20:41:36 +0000 | [diff] [blame^] | 320 | |
| 321 | |
| 322 | def compute_passed(host): |
| 323 | """ |
| 324 | Returns True if the host can be considered to have passed its test plan |
| 325 | |
| 326 | A 'pass' means that, for every test configuration in the plan, the machine |
| 327 | had at least one AFE job with no failed tests. 'passed' could also be None, |
| 328 | meaning that this host is still running tests. |
| 329 | """ |
| 330 | if not host.complete: |
| 331 | return None |
| 332 | |
| 333 | test_configs = host.plan.testconfig_set.exclude(skipped_hosts=host.host) |
| 334 | for test_config in test_configs: |
| 335 | for planner_job in test_config.job_set.all(): |
| 336 | bad = planner_job.testrun_set.exclude(tko_test__status__word='GOOD') |
| 337 | if not bad: |
| 338 | break |
| 339 | else: |
| 340 | # Didn't break out of loop; this test config had no good jobs |
| 341 | return False |
| 342 | return True |