blob: 54219cd44336697cc00d692902c7ad529ed4a1d9 [file] [log] [blame]
cmticee5bc63b2015-05-27 16:59:37 -07001#!/usr/bin/python
2#
3# Copyright 2015 Google INc. All Rights Reserved.
4
5import argparse
6import getpass
7import os
8import sys
9import traceback
10
11from utils import logger
12from utils import machines
13from utils import misc
14
15
16class AFELockException(Exception):
17 """Base class for exceptions in this module."""
18
19
20class MachineNotPingable(AFELockException):
21 """Raised when machine does not respond to ping."""
22
23
24class MissingHostInfo(AFELockException):
25 """Raised when cannot find info about machine on machine servers."""
26
27
28class UpdateNonLocalMachine(AFELockException):
29 """Raised when user requests to add/remove a ChromeOS HW Lab machine.."""
30
31
32class DuplicateAdd(AFELockException):
33 """Raised when user requests to add a machine that's already on the server."""
34
35
36class UpdateServerError(AFELockException):
37 """Raised when attempt to add/remove a machine from local server fails."""
38
39
40class LockingError(AFELockException):
41 """Raised when server fails to lock/unlock machine as requested."""
42
43
44class DuplicateLock(AFELockException):
45 """Raised when user attempts to lock an already locked machine."""
46
47
48class DuplicateUnlock(AFELockException):
49 """Raised when user attempts to unlock an already unlocked machine."""
50
51
52class DontOwnLock(AFELockException):
53 """Raised when user attmepts to unlock machine locked by someone else."""
54 # This should not be raised if the user specified '--force'
55
56
57class NoAFEServer(AFELockException):
58 """Raised when cannot find/access the autotest server."""
59
60
61class AFEAccessError(AFELockException):
62 """Raised when cannot get information about lab machine from lab server."""
63
64
65class AFELockManager(object):
66 """Class for locking/unlocking machines vie Autotest Front End servers.
67
68 This class contains methods for checking the locked status of machines
69 on both the ChromeOS HW Lab AFE server and a local AFE server. It also
70 has methods for adding/removing machines from the local server, and for
71 changing the lock status of machines on either server. For the ChromeOS
72 HW Lab, it only allows access to the toolchain team lab machines, as
73 defined in toolchain-utils/crosperf/default_remotes. By default it will
74 look for a local server on chrotomation2.mtv.corp.google.com, but an
75 alternative local AFE server can be supplied, if desired.
76
77 !!!IMPORTANT NOTE!!! The AFE server can only be called from the main
78 thread/process of a program. If you launch threads and try to call it
79 from a thread, you will get an error. This has to do with restrictions
80 in the Python virtual machine (and signal handling) and cannot be changed.
81 """
82
83 LOCAL_SERVER = 'chrotomation2.mtv.corp.google.com'
84
85 def __init__(self, remotes, force_option, chromeos_root, local_server,
86 local=True, log=None):
87 """Initializes an AFELockManager object.
88
89 Args:
90 remotes: A list of machine names or ip addresses to be managed. Names
91 and ip addresses should be represented as strings. If the list is empty,
92 the lock manager will get all known machines.
93 force_option: A Boolean indicating whether or not to force an unlock of
94 a machine that was locked by someone else.
95 chromeos_root: The ChromeOS chroot to use for the autotest scripts.
96 local_server: A string containing the name or ip address of the machine
97 that is running an AFE server, which is to be used for managing
98 machines that are not in the ChromeOS HW lab.
99 local: A Boolean indicating whether or not to use/allow a local AFE
100 server to be used (see local_server argument).
101 log: If not None, this is the logger object to be used for writing out
102 informational output messages. It is expected to be an instance of
103 Logger class from utils/logger.py.
104 """
105 self.chromeos_root = chromeos_root
106 self.user = getpass.getuser()
107 self.logger = log or logger.GetLogger()
108 autotest_path = os.path.join(chromeos_root,
109 'src/third_party/autotest/files')
110
cmticed1172b42015-06-12 15:14:09 -0700111 sys.path.append(chromeos_root)
cmticee5bc63b2015-05-27 16:59:37 -0700112 sys.path.append(autotest_path)
113 sys.path.append(os.path.join(autotest_path, 'server', 'cros'))
114
115 # We have to wait to do these imports until the paths above have
116 # been fixed.
117 from client import setup_modules
118 setup_modules.setup(base_path=autotest_path,
119 root_module_name='autotest_lib')
120
121 from dynamic_suite import frontend_wrappers
122
123 self.afe = frontend_wrappers.RetryingAFE(timeout_min=30,
124 delay_sec=10,
cmticed1172b42015-06-12 15:14:09 -0700125 debug=False,
126 server='cautotest')
cmticee5bc63b2015-05-27 16:59:37 -0700127 if not local:
128 self.local_afe = None
129 else:
130 dargs = {}
131 dargs['server'] = local_server or AFELockManager.LOCAL_SERVER
132 # Make sure local server is pingable.
133 error_msg = ('Local autotest server machine %s not responding to ping.'
134 % dargs['server'])
135 self.CheckMachine(dargs['server'], error_msg)
136 self.local_afe = frontend_wrappers.RetryingAFE(timeout_min=30,
137 delay_sec=10,
138 debug=False,
139 **dargs)
140 self.local = local
141 self.machines = list(set(remotes)) or []
142 self.force = force_option
143 self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
144 if not self.machines:
145 self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines()
146
147 def CheckMachine(self, machine, error_msg):
148 """Verifies that machine is responding to ping.
149
150 Args:
151 machine: String containing the name or ip address of machine to check.
152 error_msg: Message to print if ping fails.
153
154 Raises:
155 MachineNotPingable: If machine is not responding to 'ping'
156 """
157 if not machines.MachineIsPingable(machine, logging_level='none'):
158 raise MachineNotPingable(error_msg)
159
160 def MachineIsKnown(self, machine):
161 """Checks to see if either AFE server knows the given machine.
162
163 Args:
164 machine: String containing name or ip address of machine to check.
165
166 Returns:
167 Boolean indicating if the machine is in the list of known machines for
168 either AFE server.
169 """
170 if machine in self.toolchain_lab_machines:
171 return True
172 elif self.local_afe and machine in self.GetAllNonlabMachines():
173 return True
174
175 return False
176
177 def GetAllToolchainLabMachines(self):
178 """Gets a list of all the toolchain machines in the ChromeOS HW lab.
179
180 Returns:
181 A list of names of the toolchain machines in the ChromeOS HW lab.
182 """
Han Shen441c9492015-06-11 13:56:08 -0700183 machines_file = os.path.join(os.path.dirname(__file__),
184 'crosperf', 'default_remotes')
cmticee5bc63b2015-05-27 16:59:37 -0700185 machine_list = []
186 with open(machines_file, 'r') as input_file:
187 lines = input_file.readlines()
188 for line in lines:
189 board, remotes = line.split(':')
190 remotes = remotes.strip()
191 for r in remotes.split():
192 machine_list.append(r.strip())
193 return machine_list
194
195 def GetAllNonlabMachines(self):
196 """Gets a list of all known machines on the local AFE server.
197
198 Returns:
199 A list of the names of the machines on the local AFE server.
200 """
201 non_lab_machines = []
202 if self.local_afe:
203 non_lab_machines = self.local_afe.get_hostnames()
204 return non_lab_machines
205
206 def PrintStatusHeader(self, is_lab_machine):
207 """Prints the status header lines for machines.
208
209 Args: Boolean indicating whether to print HW Lab header or local
210 machine header (different spacing).
211 """
212 if is_lab_machine:
213 print '\nMachine (Board)\t\t\t\tStatus'
214 print '---------------\t\t\t\t------\n'
215 else:
216 print '\nMachine (Board)\t\tStatus'
217 print '---------------\t\t------\n'
218
219 def RemoveLocalMachine(self, m):
220 """Removes a machine from the local AFE server.
221
222 Args:
223 m: The machine to remove.
224
225 Raises:
226 MissingHostInfo: Can't find machine to be removed.
227 """
228 if self.local_afe:
229 host_info = self.local_afe.get_hosts(hostname=m)
230 if host_info:
231 host_info = host_info[0]
232 host_info.delete()
233 else:
234 raise MissingHostInfo('Cannot find/delete machine %s.' % m)
235
236 def AddLocalMachine(self, m):
237 """Adds a machine to the local AFE server.
238
239 Args:
240 m: The machine to be added.
241 """
242 if self.local_afe:
243 error_msg = 'Machine %s is not responding to ping.' % m
244 self.CheckMachine(m, error_msg)
245 host = self.local_afe.create_host(m)
246
247 def AddMachinesToLocalServer(self):
248 """Adds one or more machines to the local AFE server.
249
250 Verify that the requested machines are legal to add to the local server,
251 i.e. that they are not ChromeOS HW lab machines, and they are not already
252 on the local server. Call AddLocalMachine for each valid machine.
253
254 Raises:
255 DuplicateAdd: Attempt to add a machine that is already on the server.
256 UpdateNonLocalMachine: Attempt to add a ChromeOS HW lab machine.
257 UpdateServerError: Something went wrong while attempting to add a
258 machine.
259 """
260 for m in self.machines:
261 if m in self.toolchain_lab_machines:
262 raise UpdateNonLocalMachine('Machine %s is already in the ChromeOS HW '
263 'Lab. Cannot add it to local server.' % m)
264 host_info = self.local_afe.get_hosts(hostname=m)
265 if host_info:
266 raise DuplicateAdd('Machine %s is already on the local server.' % m)
267 try:
268 self.AddLocalMachine(m)
269 self.logger.LogOutput('Successfully added %s to local server.' % m)
270 except Exception as e:
271 traceback.print_exc()
272 raise UpdateServerError('Error occurred while attempting to add %s. %s'
273 % (m, str(e)))
274
275 def RemoveMachinesFromLocalServer(self):
276 """Removes one or more machines from the local AFE server.
277
278 Verify that the requested machines are legal to remove from the local
279 server, i.e. that they are not ChromeOS HW lab machines. Call
280 RemoveLocalMachine for each valid machine.
281
282 Raises:
283 UpdateServerError: Something went wrong while attempting to remove a
284 machine.
285 """
286 for m in self.machines:
287 if m in self.toolchain_lab_machines:
288 raise UpdateNonLocalMachine('Machine %s is in the ChromeOS HW Lab. '
289 'This script cannot remove lab machines.'
290 % m)
291 try:
292 self.RemoveLocalMachine(m)
293 self.logger.LogOutput('Successfully removed %s from local server.' % m)
294 except Exception as e:
295 traceback.print_exc()
296 raise UpdateServerError('Error occurred while attempting to remove %s '
297 '(%s).' % (m, str(e)))
298
299 def ListMachineStates(self, machine_states):
300 """Gets and prints the current status for a list of machines.
301
302 Prints out the current status for all of the machines in the current
303 AFELockManager's list of machines (set when the object is initialized).
304
305 Args:
306 machine_states: A dictionary of the current state of every machine in
307 the current AFELockManager's list of machines. Normally obtained by
308 calling AFELockManager::GetMachineStates.
309 """
310 local_machines = []
311 printed_hdr = False
312 for m in machine_states:
313 cros_name = m + '.cros'
314 if (m in self.toolchain_lab_machines or
315 cros_name in self.toolchain_lab_machines):
316 if not printed_hdr:
317 self.PrintStatusHeader(True)
318 printed_hdr = True
319 state = machine_states[m]
320 if state['locked']:
321 print ('%s (%s)\tlocked by %s since %s' %
322 (m, state['board'], state['locked_by'], state['lock_time']))
323 else:
324 print '%s (%s)\tunlocked' % (m, state['board'])
325 else:
326 local_machines.append(m)
327
328 if local_machines:
329 self.PrintStatusHeader(False)
330 for m in local_machines:
331 state = machine_states[m]
332 if state['locked']:
333 print ('%s (%s)\tlocked by %s since %s' %
334 (m, state['board'], state['locked_by'], state['lock_time']))
335 else:
336 print '%s (%s)\tunlocked' % (m, state['board'])
337
338
339 def UpdateLockInAFE(self, should_lock_machine, machine):
340 """Calls an AFE server to lock/unlock a machine.
341
342 Args:
343 should_lock_machine: Boolean indicating whether to lock the machine (True)
344 or unlock the machine (False).
345 machine: The machine to update.
346
347 Raises:
348 LockingError: An error occurred while attempting to update the machine
349 state.
350 """
351 action = 'lock'
352 if not should_lock_machine:
353 action = 'unlock'
354 kwargs = {'locked': should_lock_machine}
355
356 if machine in self.toolchain_lab_machines:
357 m = machine.split('.')[0]
358 kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user
359 afe_server = self.afe
360 else:
361 m = machine
362 afe_server = self.local_afe
363
364 try:
365 afe_server.run('modify_hosts',
366 host_filter_data={'hostname__in': [m]},
367 update_data=kwargs)
368 except Exception as e:
369 traceback.print_exc()
370 raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e)))
371
372 def UpdateMachines(self, lock_machines):
373 """Sets the locked state of the machines to the requested value.
374
375 The machines updated are the ones in self.machines (specified when the
376 class object was intialized).
377
378 Args:
379 lock_machines: Boolean indicating whether to lock the machines (True) or
380 unlock the machines (False).
381 """
382 for m in self.machines:
383 self.UpdateLockInAFE(lock_machines, m)
384
385 # Since we returned from self.UpdateLockInAFE we assume the request
386 # succeeded.
387 if lock_machines:
388 self.logger.LogOutput('Locked machine(s) %s.' % m)
389 else:
390 self.logger.LogOutput('Unlocked machine(s) %s.' % m)
391
392 def CheckMachineLocks(self, machine_states, cmd):
393 """Check that every machine in requested list is in the proper state.
394
395 If the cmd is 'unlock' verify that every machine is locked by requestor.
396 If the cmd is 'lock' verify that every machine is currently unlocked.
397
398 Args:
399 machine_states: A dictionary of the current state of every machine in
400 the current AFELockManager's list of machines. Normally obtained by
401 calling AFELockManager::GetMachineStates.
402 cmd: 'lock' or 'unlock'. The user-requested action for the machines.
403
404 Raises:
405 DuplicateLock: A machine requested to be locked is already locked.
406 DuplicateUnlock: A machine requested to be unlocked is already unlocked.
407 DontOwnLock: The lock on a requested machine is owned by someone else.
408 """
409 for k, state in machine_states.iteritems():
410 if cmd == 'unlock':
411 if not state['locked']:
412 raise DuplicateUnlock('Attempt to unlock already unlocked machine '
413 '(%s).' % k)
414
415 if state['locked_by'] != self.user:
416 raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
417 'else (%s).' % (k, state['locked_by']))
418 elif cmd == 'lock':
419 if state['locked']:
420 raise DuplicateLock('Attempt to lock already locked machine (%s)' % k)
421
422 def HasAFEServer(self, local):
423 """Verifies that the AFELockManager has appropriate AFE server.
424
425 Args:
426 local: Boolean indicating whether we are checking for the local server
427 (True) or for the global server (False).
428
429 Returns:
430 A boolean indicating if the AFELockManager has the requested AFE server.
431 """
432 if local:
433 return self.local_afe is not None
434 else:
435 return self.afe is not None
436
437 def GetMachineStates(self, cmd=''):
438 """Gets the current state of all the requested machines.
439
440 Gets the current state of all the requested machines, both from the HW lab
441 sever and from the local server. Stores the data in a dictionary keyed
442 by machine name.
443
444 Args:
445 cmd: The command for which we are getting the machine states. This is
446 important because if one of the requested machines is missing we raise
447 an exception, unless the requested command is 'add'.
448
449 Returns:
450 A dictionary of machine states for all the machines in the AFELockManager
451 object.
452
453 Raises:
454 NoAFEServer: Cannot find the HW Lab or local AFE server.
455 AFEAccessError: An error occurred when querying the server about a
456 machine.
457 """
458 if not self.HasAFEServer(False):
459 raise NoAFEServer('Error: Cannot connect to main AFE server.')
460
461 if self.local and not self.HasAFEServer(True):
462 raise NoAFEServer('Error: Cannot connect to local AFE server.')
463
464 machines = {}
465 for m in self.machines:
466 host_info = None
467 if m in self.toolchain_lab_machines:
468 mod_host = m.split('.')[0]
469 host_info = self.afe.get_hosts(hostname=mod_host)
470 if not host_info:
471 raise AFEAccessError('Unable to get information about %s from main'
472 ' autotest server.' % m)
473 else:
474 host_info = self.local_afe.get_hosts(hostname=m)
475 if not host_info and cmd != 'add':
476 raise AFEAccessError('Unable to get information about %s from '
477 'local autotest server.' % m)
478 if host_info:
479 host_info = host_info[0]
480 name = host_info.hostname
481 values = {}
482 values['board'] = host_info.platform if host_info.platform else '??'
483 values['locked'] = host_info.locked
484 if host_info.locked:
485 values['locked_by'] = host_info.locked_by
486 values['lock_time'] = host_info.lock_time
487 else:
488 values['locked_by'] = ''
489 values['lock_time'] = ''
490 machines[name] = values
491 else:
492 machines[m] = {}
493 return machines
494
495
496def Main(argv):
497 """
498 Parse the options, initialize lock manager and dispatch proper method.
499
500 Args:
501 argv: The options with which this script was invoked.
502
503 Returns:
504 0 unless an exception is raised.
505 """
506 parser = argparse.ArgumentParser()
507
508 parser.add_argument('--list', dest='cmd', action='store_const',
509 const='status',
510 help='List current status of all known machines.')
511 parser.add_argument('--lock', dest='cmd', action='store_const',
512 const='lock', help='Lock given machine(s).')
513 parser.add_argument('--unlock', dest='cmd', action='store_const',
514 const='unlock', help='Unlock given machine(s).')
515 parser.add_argument('--status', dest='cmd', action='store_const',
516 const='status',
517 help='List current status of given machine(s).')
518 parser.add_argument('--add_machine', dest='cmd', action='store_const',
519 const='add',
520 help='Add machine to local machine server.')
521 parser.add_argument('--remove_machine', dest='cmd',
522 action='store_const', const='remove',
523 help='Remove machine from the local machine server.')
524 parser.add_argument('--nolocal', dest='local',
525 action='store_false', default=True,
526 help='Do not try to use local machine server.')
527 parser.add_argument('--remote', dest='remote',
528 help='machines on which to operate')
529 parser.add_argument('--chromeos_root', dest='chromeos_root', required=True,
530 help='ChromeOS root to use for autotest scripts.')
531 parser.add_argument('--local_server', dest='local_server', default=None,
532 help='Alternate local autotest server to use.')
533 parser.add_argument('--force', dest='force', action='store_true',
534 default=False,
535 help='Force lock/unlock of machines, even if not'
536 ' current lock owner.')
537
538 options = parser.parse_args(argv)
539
540 if not options.remote and options.cmd != 'status':
541 parser.error('No machines specified for operation.')
542
543 if not os.path.isdir(options.chromeos_root):
544 parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
545
546 if not options.cmd:
547 parser.error('No operation selected (--list, --status, --lock, --unlock,'
548 ' --add_machine, --remove_machine).')
549
550 machine_list = []
551 if options.remote:
552 machine_list = options.remote.split()
553
554 lock_manager = AFELockManager(machine_list, options.force,
555 options.chromeos_root, options.local_server,
556 options.local)
557
558 machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
559 cmd = options.cmd
560
561 if cmd == 'status':
562 lock_manager.ListMachineStates(machine_states)
563
564 elif cmd == 'lock':
565 if not lock_manager.force:
566 lock_manager.CheckMachineLocks(machine_states, cmd)
567 lock_manager.UpdateMachines(True)
568
569 elif cmd == 'unlock':
570 if not lock_manager.force:
571 lock_manager.CheckMachineLocks(machine_states, cmd)
572 lock_manager.UpdateMachines(False)
573
574 elif cmd == 'add':
575 lock_manager.AddMachinesToLocalServer()
576
577 elif cmd == 'remove':
578 lock_manager.RemoveMachinesFromLocalServer()
579
580 return 0
581
582
583if __name__ == '__main__':
584 sys.exit(Main(sys.argv[1:]))