blob: 4b0326cf5e1af9f6e1da58fe96ad9d55c3b8d33e [file] [log] [blame]
MK Ryua9f73772015-07-27 17:05:44 -07001#!/usr/bin/python
2#
3# Copyright (c) 2015 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7
8"""Script to check the number of long-running processes.
9
10This script gets the number of processes for "gsutil" and "autoserv"
11that are running more than 24 hours, and throws the number to stats
12dashboard.
13
14This script depends on the "etimes" user-defined format of "ps".
15Goobuntu 14.04 has the version of ps that supports etimes, but not
16Goobuntu 12.04.
17"""
18
19
20import subprocess
21
Ningning Xiac0c10612016-11-23 13:15:26 -080022from autotest_lib.server import site_utils
Dan Shi5e2efb72017-02-07 11:40:23 -080023
24try:
25 from chromite.lib import metrics
26except ImportError:
27 metrics = site_utils.metrics_mock
MK Ryua9f73772015-07-27 17:05:44 -070028
29
Ningning Xiac0c10612016-11-23 13:15:26 -080030PROGRAM_TO_CHECK_SET = set(['gsutil', 'autoserv'])
MK Ryua9f73772015-07-27 17:05:44 -070031
32def check_proc(prog, max_elapsed_sec):
33 """Check the number of long-running processes for a given program.
34
35 Finds out the number of processes for a given program that have run
36 more than a given elapsed time.
37 Sends out the number to stats dashboard.
38
39 @param prog: Program name.
40 @param max_elapsed_sec: Max elapsed time in seconds. Processes that
41 have run more than this value will be caught.
42 """
43 cmd = ('ps -eo etimes,args | grep "%s" | awk \'{if($1 > %d) print $0}\' | '
44 'wc -l' % (prog, max_elapsed_sec))
45 count = int(subprocess.check_output(cmd, shell = True))
Ningning Xiac0c10612016-11-23 13:15:26 -080046
47 if prog not in PROGRAM_TO_CHECK_SET:
48 prog = 'unknown'
49
50 metrics.Gauge('chromeos/autotest/hung_processes').set(
51 count, fields={'program': prog}
52 )
MK Ryua9f73772015-07-27 17:05:44 -070053
54
55def main():
Ningning Xiac0c10612016-11-23 13:15:26 -080056 """Main script. """
57 with site_utils.SetupTsMonGlobalState('check_hung_proc', short_lived=True):
58 for p in PROGRAM_TO_CHECK_SET:
59 check_proc(p, 86400)
MK Ryua9f73772015-07-27 17:05:44 -070060
61
62if __name__ == '__main__':
63 main()