blob: 5d6ae921b75fbb1dc4f22b7fc04482aad75631ca [file] [log] [blame]
cmticee5bc63b2015-05-27 16:59:37 -07001#!/usr/bin/python
2#
3# Copyright 2015 Google INc. All Rights Reserved.
4
5import argparse
6import getpass
7import os
8import sys
9import traceback
10
11from utils import logger
12from utils import machines
13from utils import misc
14
15
16class AFELockException(Exception):
17 """Base class for exceptions in this module."""
18
19
20class MachineNotPingable(AFELockException):
21 """Raised when machine does not respond to ping."""
22
23
24class MissingHostInfo(AFELockException):
25 """Raised when cannot find info about machine on machine servers."""
26
27
28class UpdateNonLocalMachine(AFELockException):
29 """Raised when user requests to add/remove a ChromeOS HW Lab machine.."""
30
31
32class DuplicateAdd(AFELockException):
33 """Raised when user requests to add a machine that's already on the server."""
34
35
36class UpdateServerError(AFELockException):
37 """Raised when attempt to add/remove a machine from local server fails."""
38
39
40class LockingError(AFELockException):
41 """Raised when server fails to lock/unlock machine as requested."""
42
43
cmticee5bc63b2015-05-27 16:59:37 -070044class DontOwnLock(AFELockException):
45 """Raised when user attmepts to unlock machine locked by someone else."""
46 # This should not be raised if the user specified '--force'
47
48
49class NoAFEServer(AFELockException):
50 """Raised when cannot find/access the autotest server."""
51
52
53class AFEAccessError(AFELockException):
54 """Raised when cannot get information about lab machine from lab server."""
55
56
57class AFELockManager(object):
58 """Class for locking/unlocking machines vie Autotest Front End servers.
59
60 This class contains methods for checking the locked status of machines
61 on both the ChromeOS HW Lab AFE server and a local AFE server. It also
62 has methods for adding/removing machines from the local server, and for
63 changing the lock status of machines on either server. For the ChromeOS
64 HW Lab, it only allows access to the toolchain team lab machines, as
65 defined in toolchain-utils/crosperf/default_remotes. By default it will
66 look for a local server on chrotomation2.mtv.corp.google.com, but an
67 alternative local AFE server can be supplied, if desired.
68
69 !!!IMPORTANT NOTE!!! The AFE server can only be called from the main
70 thread/process of a program. If you launch threads and try to call it
71 from a thread, you will get an error. This has to do with restrictions
72 in the Python virtual machine (and signal handling) and cannot be changed.
73 """
74
75 LOCAL_SERVER = 'chrotomation2.mtv.corp.google.com'
76
77 def __init__(self, remotes, force_option, chromeos_root, local_server,
78 local=True, log=None):
79 """Initializes an AFELockManager object.
80
81 Args:
82 remotes: A list of machine names or ip addresses to be managed. Names
83 and ip addresses should be represented as strings. If the list is empty,
84 the lock manager will get all known machines.
85 force_option: A Boolean indicating whether or not to force an unlock of
86 a machine that was locked by someone else.
87 chromeos_root: The ChromeOS chroot to use for the autotest scripts.
88 local_server: A string containing the name or ip address of the machine
89 that is running an AFE server, which is to be used for managing
90 machines that are not in the ChromeOS HW lab.
91 local: A Boolean indicating whether or not to use/allow a local AFE
92 server to be used (see local_server argument).
93 log: If not None, this is the logger object to be used for writing out
94 informational output messages. It is expected to be an instance of
95 Logger class from utils/logger.py.
96 """
97 self.chromeos_root = chromeos_root
98 self.user = getpass.getuser()
99 self.logger = log or logger.GetLogger()
100 autotest_path = os.path.join(chromeos_root,
101 'src/third_party/autotest/files')
102
cmticed1172b42015-06-12 15:14:09 -0700103 sys.path.append(chromeos_root)
cmticee5bc63b2015-05-27 16:59:37 -0700104 sys.path.append(autotest_path)
105 sys.path.append(os.path.join(autotest_path, 'server', 'cros'))
106
107 # We have to wait to do these imports until the paths above have
108 # been fixed.
109 from client import setup_modules
110 setup_modules.setup(base_path=autotest_path,
111 root_module_name='autotest_lib')
112
113 from dynamic_suite import frontend_wrappers
114
115 self.afe = frontend_wrappers.RetryingAFE(timeout_min=30,
116 delay_sec=10,
cmticed1172b42015-06-12 15:14:09 -0700117 debug=False,
118 server='cautotest')
cmticee5bc63b2015-05-27 16:59:37 -0700119 if not local:
120 self.local_afe = None
121 else:
122 dargs = {}
123 dargs['server'] = local_server or AFELockManager.LOCAL_SERVER
124 # Make sure local server is pingable.
125 error_msg = ('Local autotest server machine %s not responding to ping.'
126 % dargs['server'])
127 self.CheckMachine(dargs['server'], error_msg)
128 self.local_afe = frontend_wrappers.RetryingAFE(timeout_min=30,
129 delay_sec=10,
130 debug=False,
131 **dargs)
132 self.local = local
133 self.machines = list(set(remotes)) or []
134 self.force = force_option
135 self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
136 if not self.machines:
137 self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines()
138
139 def CheckMachine(self, machine, error_msg):
140 """Verifies that machine is responding to ping.
141
142 Args:
143 machine: String containing the name or ip address of machine to check.
144 error_msg: Message to print if ping fails.
145
146 Raises:
147 MachineNotPingable: If machine is not responding to 'ping'
148 """
149 if not machines.MachineIsPingable(machine, logging_level='none'):
150 raise MachineNotPingable(error_msg)
151
152 def MachineIsKnown(self, machine):
153 """Checks to see if either AFE server knows the given machine.
154
155 Args:
156 machine: String containing name or ip address of machine to check.
157
158 Returns:
159 Boolean indicating if the machine is in the list of known machines for
160 either AFE server.
161 """
162 if machine in self.toolchain_lab_machines:
163 return True
164 elif self.local_afe and machine in self.GetAllNonlabMachines():
165 return True
166
167 return False
168
169 def GetAllToolchainLabMachines(self):
170 """Gets a list of all the toolchain machines in the ChromeOS HW lab.
171
172 Returns:
173 A list of names of the toolchain machines in the ChromeOS HW lab.
174 """
Han Shen441c9492015-06-11 13:56:08 -0700175 machines_file = os.path.join(os.path.dirname(__file__),
176 'crosperf', 'default_remotes')
cmticee5bc63b2015-05-27 16:59:37 -0700177 machine_list = []
178 with open(machines_file, 'r') as input_file:
179 lines = input_file.readlines()
180 for line in lines:
181 board, remotes = line.split(':')
182 remotes = remotes.strip()
183 for r in remotes.split():
184 machine_list.append(r.strip())
185 return machine_list
186
187 def GetAllNonlabMachines(self):
188 """Gets a list of all known machines on the local AFE server.
189
190 Returns:
191 A list of the names of the machines on the local AFE server.
192 """
193 non_lab_machines = []
194 if self.local_afe:
195 non_lab_machines = self.local_afe.get_hostnames()
196 return non_lab_machines
197
198 def PrintStatusHeader(self, is_lab_machine):
199 """Prints the status header lines for machines.
200
201 Args: Boolean indicating whether to print HW Lab header or local
202 machine header (different spacing).
203 """
204 if is_lab_machine:
205 print '\nMachine (Board)\t\t\t\tStatus'
206 print '---------------\t\t\t\t------\n'
207 else:
208 print '\nMachine (Board)\t\tStatus'
209 print '---------------\t\t------\n'
210
211 def RemoveLocalMachine(self, m):
212 """Removes a machine from the local AFE server.
213
214 Args:
215 m: The machine to remove.
216
217 Raises:
218 MissingHostInfo: Can't find machine to be removed.
219 """
220 if self.local_afe:
221 host_info = self.local_afe.get_hosts(hostname=m)
222 if host_info:
223 host_info = host_info[0]
224 host_info.delete()
225 else:
226 raise MissingHostInfo('Cannot find/delete machine %s.' % m)
227
228 def AddLocalMachine(self, m):
229 """Adds a machine to the local AFE server.
230
231 Args:
232 m: The machine to be added.
233 """
234 if self.local_afe:
235 error_msg = 'Machine %s is not responding to ping.' % m
236 self.CheckMachine(m, error_msg)
237 host = self.local_afe.create_host(m)
238
239 def AddMachinesToLocalServer(self):
240 """Adds one or more machines to the local AFE server.
241
242 Verify that the requested machines are legal to add to the local server,
243 i.e. that they are not ChromeOS HW lab machines, and they are not already
244 on the local server. Call AddLocalMachine for each valid machine.
245
246 Raises:
247 DuplicateAdd: Attempt to add a machine that is already on the server.
248 UpdateNonLocalMachine: Attempt to add a ChromeOS HW lab machine.
249 UpdateServerError: Something went wrong while attempting to add a
250 machine.
251 """
252 for m in self.machines:
253 if m in self.toolchain_lab_machines:
254 raise UpdateNonLocalMachine('Machine %s is already in the ChromeOS HW '
255 'Lab. Cannot add it to local server.' % m)
256 host_info = self.local_afe.get_hosts(hostname=m)
257 if host_info:
258 raise DuplicateAdd('Machine %s is already on the local server.' % m)
259 try:
260 self.AddLocalMachine(m)
261 self.logger.LogOutput('Successfully added %s to local server.' % m)
262 except Exception as e:
263 traceback.print_exc()
264 raise UpdateServerError('Error occurred while attempting to add %s. %s'
265 % (m, str(e)))
266
267 def RemoveMachinesFromLocalServer(self):
268 """Removes one or more machines from the local AFE server.
269
270 Verify that the requested machines are legal to remove from the local
271 server, i.e. that they are not ChromeOS HW lab machines. Call
272 RemoveLocalMachine for each valid machine.
273
274 Raises:
275 UpdateServerError: Something went wrong while attempting to remove a
276 machine.
277 """
278 for m in self.machines:
279 if m in self.toolchain_lab_machines:
280 raise UpdateNonLocalMachine('Machine %s is in the ChromeOS HW Lab. '
281 'This script cannot remove lab machines.'
282 % m)
283 try:
284 self.RemoveLocalMachine(m)
285 self.logger.LogOutput('Successfully removed %s from local server.' % m)
286 except Exception as e:
287 traceback.print_exc()
288 raise UpdateServerError('Error occurred while attempting to remove %s '
289 '(%s).' % (m, str(e)))
290
291 def ListMachineStates(self, machine_states):
292 """Gets and prints the current status for a list of machines.
293
294 Prints out the current status for all of the machines in the current
295 AFELockManager's list of machines (set when the object is initialized).
296
297 Args:
298 machine_states: A dictionary of the current state of every machine in
299 the current AFELockManager's list of machines. Normally obtained by
300 calling AFELockManager::GetMachineStates.
301 """
302 local_machines = []
303 printed_hdr = False
304 for m in machine_states:
305 cros_name = m + '.cros'
306 if (m in self.toolchain_lab_machines or
307 cros_name in self.toolchain_lab_machines):
308 if not printed_hdr:
309 self.PrintStatusHeader(True)
310 printed_hdr = True
311 state = machine_states[m]
312 if state['locked']:
313 print ('%s (%s)\tlocked by %s since %s' %
314 (m, state['board'], state['locked_by'], state['lock_time']))
315 else:
316 print '%s (%s)\tunlocked' % (m, state['board'])
317 else:
318 local_machines.append(m)
319
320 if local_machines:
321 self.PrintStatusHeader(False)
322 for m in local_machines:
323 state = machine_states[m]
324 if state['locked']:
325 print ('%s (%s)\tlocked by %s since %s' %
326 (m, state['board'], state['locked_by'], state['lock_time']))
327 else:
328 print '%s (%s)\tunlocked' % (m, state['board'])
329
330
331 def UpdateLockInAFE(self, should_lock_machine, machine):
332 """Calls an AFE server to lock/unlock a machine.
333
334 Args:
335 should_lock_machine: Boolean indicating whether to lock the machine (True)
336 or unlock the machine (False).
337 machine: The machine to update.
338
339 Raises:
340 LockingError: An error occurred while attempting to update the machine
341 state.
342 """
343 action = 'lock'
344 if not should_lock_machine:
345 action = 'unlock'
346 kwargs = {'locked': should_lock_machine}
cmtice25c94f12015-07-24 11:37:34 -0700347 kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user
cmticee5bc63b2015-05-27 16:59:37 -0700348
349 if machine in self.toolchain_lab_machines:
350 m = machine.split('.')[0]
cmticee5bc63b2015-05-27 16:59:37 -0700351 afe_server = self.afe
352 else:
353 m = machine
354 afe_server = self.local_afe
355
356 try:
357 afe_server.run('modify_hosts',
358 host_filter_data={'hostname__in': [m]},
359 update_data=kwargs)
360 except Exception as e:
361 traceback.print_exc()
362 raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e)))
363
364 def UpdateMachines(self, lock_machines):
365 """Sets the locked state of the machines to the requested value.
366
367 The machines updated are the ones in self.machines (specified when the
368 class object was intialized).
369
370 Args:
371 lock_machines: Boolean indicating whether to lock the machines (True) or
372 unlock the machines (False).
cmticef3eb8032015-07-27 13:55:52 -0700373
374 Returns:
375 A list of the machines whose state was successfully updated.
cmticee5bc63b2015-05-27 16:59:37 -0700376 """
cmticef3eb8032015-07-27 13:55:52 -0700377 updated_machines = []
cmticee5bc63b2015-05-27 16:59:37 -0700378 for m in self.machines:
379 self.UpdateLockInAFE(lock_machines, m)
380
381 # Since we returned from self.UpdateLockInAFE we assume the request
382 # succeeded.
383 if lock_machines:
384 self.logger.LogOutput('Locked machine(s) %s.' % m)
385 else:
386 self.logger.LogOutput('Unlocked machine(s) %s.' % m)
cmticef3eb8032015-07-27 13:55:52 -0700387 updated_machines.append(m)
388
389 return updated_machines
390
391 def _InternalRemoveMachine(self, machine):
392 """Remove machine from internal list of machines.
393
394 Args:
395 machine: Name of machine to be removed from internal list.
396 """
397 # Check to see if machine is lab machine and if so, make sure it has
398 # ".cros" on the end.
399 cros_machine = machine
400 if machine.find('rack') > 0 and machine.find('row') > 0:
401 if machine.find('.cros') == -1:
402 cros_machine = cros_machine + '.cros'
403
404 self.machines = [m for m in self.machines if m != cros_machine and
405 m != machine]
cmticee5bc63b2015-05-27 16:59:37 -0700406
407 def CheckMachineLocks(self, machine_states, cmd):
408 """Check that every machine in requested list is in the proper state.
409
410 If the cmd is 'unlock' verify that every machine is locked by requestor.
411 If the cmd is 'lock' verify that every machine is currently unlocked.
412
413 Args:
414 machine_states: A dictionary of the current state of every machine in
415 the current AFELockManager's list of machines. Normally obtained by
416 calling AFELockManager::GetMachineStates.
417 cmd: 'lock' or 'unlock'. The user-requested action for the machines.
418
419 Raises:
cmticee5bc63b2015-05-27 16:59:37 -0700420 DontOwnLock: The lock on a requested machine is owned by someone else.
421 """
422 for k, state in machine_states.iteritems():
423 if cmd == 'unlock':
424 if not state['locked']:
cmticef3eb8032015-07-27 13:55:52 -0700425 self.logger.LogWarning('Attempt to unlock already unlocked machine '
426 '(%s).' % k)
427 self._InternalRemoveMachine(k)
cmticee5bc63b2015-05-27 16:59:37 -0700428
cmticef3eb8032015-07-27 13:55:52 -0700429 if state['locked'] and state['locked_by'] != self.user:
cmticee5bc63b2015-05-27 16:59:37 -0700430 raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
431 'else (%s).' % (k, state['locked_by']))
432 elif cmd == 'lock':
433 if state['locked']:
cmticef3eb8032015-07-27 13:55:52 -0700434 self.logger.LogWarning('Attempt to lock already locked machine (%s)' % k)
435 self._InternalRemoveMachine(k)
cmticee5bc63b2015-05-27 16:59:37 -0700436
437 def HasAFEServer(self, local):
438 """Verifies that the AFELockManager has appropriate AFE server.
439
440 Args:
441 local: Boolean indicating whether we are checking for the local server
442 (True) or for the global server (False).
443
444 Returns:
445 A boolean indicating if the AFELockManager has the requested AFE server.
446 """
447 if local:
448 return self.local_afe is not None
449 else:
450 return self.afe is not None
451
452 def GetMachineStates(self, cmd=''):
453 """Gets the current state of all the requested machines.
454
455 Gets the current state of all the requested machines, both from the HW lab
456 sever and from the local server. Stores the data in a dictionary keyed
457 by machine name.
458
459 Args:
460 cmd: The command for which we are getting the machine states. This is
461 important because if one of the requested machines is missing we raise
462 an exception, unless the requested command is 'add'.
463
464 Returns:
465 A dictionary of machine states for all the machines in the AFELockManager
466 object.
467
468 Raises:
469 NoAFEServer: Cannot find the HW Lab or local AFE server.
470 AFEAccessError: An error occurred when querying the server about a
471 machine.
472 """
473 if not self.HasAFEServer(False):
474 raise NoAFEServer('Error: Cannot connect to main AFE server.')
475
476 if self.local and not self.HasAFEServer(True):
477 raise NoAFEServer('Error: Cannot connect to local AFE server.')
478
479 machines = {}
480 for m in self.machines:
481 host_info = None
482 if m in self.toolchain_lab_machines:
483 mod_host = m.split('.')[0]
484 host_info = self.afe.get_hosts(hostname=mod_host)
485 if not host_info:
486 raise AFEAccessError('Unable to get information about %s from main'
487 ' autotest server.' % m)
488 else:
489 host_info = self.local_afe.get_hosts(hostname=m)
490 if not host_info and cmd != 'add':
491 raise AFEAccessError('Unable to get information about %s from '
492 'local autotest server.' % m)
493 if host_info:
494 host_info = host_info[0]
495 name = host_info.hostname
496 values = {}
497 values['board'] = host_info.platform if host_info.platform else '??'
498 values['locked'] = host_info.locked
499 if host_info.locked:
500 values['locked_by'] = host_info.locked_by
501 values['lock_time'] = host_info.lock_time
502 else:
503 values['locked_by'] = ''
504 values['lock_time'] = ''
505 machines[name] = values
506 else:
507 machines[m] = {}
508 return machines
509
510
511def Main(argv):
512 """
513 Parse the options, initialize lock manager and dispatch proper method.
514
515 Args:
516 argv: The options with which this script was invoked.
517
518 Returns:
519 0 unless an exception is raised.
520 """
521 parser = argparse.ArgumentParser()
522
523 parser.add_argument('--list', dest='cmd', action='store_const',
524 const='status',
525 help='List current status of all known machines.')
526 parser.add_argument('--lock', dest='cmd', action='store_const',
527 const='lock', help='Lock given machine(s).')
528 parser.add_argument('--unlock', dest='cmd', action='store_const',
529 const='unlock', help='Unlock given machine(s).')
530 parser.add_argument('--status', dest='cmd', action='store_const',
531 const='status',
532 help='List current status of given machine(s).')
533 parser.add_argument('--add_machine', dest='cmd', action='store_const',
534 const='add',
535 help='Add machine to local machine server.')
536 parser.add_argument('--remove_machine', dest='cmd',
537 action='store_const', const='remove',
538 help='Remove machine from the local machine server.')
539 parser.add_argument('--nolocal', dest='local',
540 action='store_false', default=True,
541 help='Do not try to use local machine server.')
542 parser.add_argument('--remote', dest='remote',
543 help='machines on which to operate')
544 parser.add_argument('--chromeos_root', dest='chromeos_root', required=True,
545 help='ChromeOS root to use for autotest scripts.')
546 parser.add_argument('--local_server', dest='local_server', default=None,
547 help='Alternate local autotest server to use.')
548 parser.add_argument('--force', dest='force', action='store_true',
549 default=False,
550 help='Force lock/unlock of machines, even if not'
551 ' current lock owner.')
552
553 options = parser.parse_args(argv)
554
555 if not options.remote and options.cmd != 'status':
556 parser.error('No machines specified for operation.')
557
558 if not os.path.isdir(options.chromeos_root):
559 parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
560
561 if not options.cmd:
562 parser.error('No operation selected (--list, --status, --lock, --unlock,'
563 ' --add_machine, --remove_machine).')
564
565 machine_list = []
566 if options.remote:
567 machine_list = options.remote.split()
568
569 lock_manager = AFELockManager(machine_list, options.force,
570 options.chromeos_root, options.local_server,
571 options.local)
572
573 machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
574 cmd = options.cmd
575
576 if cmd == 'status':
577 lock_manager.ListMachineStates(machine_states)
578
579 elif cmd == 'lock':
580 if not lock_manager.force:
581 lock_manager.CheckMachineLocks(machine_states, cmd)
582 lock_manager.UpdateMachines(True)
583
584 elif cmd == 'unlock':
585 if not lock_manager.force:
586 lock_manager.CheckMachineLocks(machine_states, cmd)
587 lock_manager.UpdateMachines(False)
588
589 elif cmd == 'add':
590 lock_manager.AddMachinesToLocalServer()
591
592 elif cmd == 'remove':
593 lock_manager.RemoveMachinesFromLocalServer()
594
595 return 0
596
597
598if __name__ == '__main__':
599 sys.exit(Main(sys.argv[1:]))