Mike Frysinger | d03e6b5 | 2019-08-03 12:49:01 -0400 | [diff] [blame] | 1 | #!/usr/bin/python2 |
MK Ryu | a9f7377 | 2015-07-27 17:05:44 -0700 | [diff] [blame] | 2 | # |
| 3 | # Copyright (c) 2015 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | |
| 8 | """Script to check the number of long-running processes. |
| 9 | |
| 10 | This script gets the number of processes for "gsutil" and "autoserv" |
| 11 | that are running more than 24 hours, and throws the number to stats |
| 12 | dashboard. |
| 13 | |
| 14 | This script depends on the "etimes" user-defined format of "ps". |
| 15 | Goobuntu 14.04 has the version of ps that supports etimes, but not |
| 16 | Goobuntu 12.04. |
| 17 | """ |
| 18 | |
| 19 | |
| 20 | import subprocess |
| 21 | |
Ningning Xia | c0c1061 | 2016-11-23 13:15:26 -0800 | [diff] [blame] | 22 | from autotest_lib.server import site_utils |
Dan Shi | 5e2efb7 | 2017-02-07 11:40:23 -0800 | [diff] [blame] | 23 | |
| 24 | try: |
| 25 | from chromite.lib import metrics |
| 26 | except ImportError: |
| 27 | metrics = site_utils.metrics_mock |
MK Ryu | a9f7377 | 2015-07-27 17:05:44 -0700 | [diff] [blame] | 28 | |
| 29 | |
Ningning Xia | c0c1061 | 2016-11-23 13:15:26 -0800 | [diff] [blame] | 30 | PROGRAM_TO_CHECK_SET = set(['gsutil', 'autoserv']) |
MK Ryu | a9f7377 | 2015-07-27 17:05:44 -0700 | [diff] [blame] | 31 | |
| 32 | def check_proc(prog, max_elapsed_sec): |
| 33 | """Check the number of long-running processes for a given program. |
| 34 | |
| 35 | Finds out the number of processes for a given program that have run |
| 36 | more than a given elapsed time. |
| 37 | Sends out the number to stats dashboard. |
| 38 | |
| 39 | @param prog: Program name. |
| 40 | @param max_elapsed_sec: Max elapsed time in seconds. Processes that |
| 41 | have run more than this value will be caught. |
| 42 | """ |
| 43 | cmd = ('ps -eo etimes,args | grep "%s" | awk \'{if($1 > %d) print $0}\' | ' |
| 44 | 'wc -l' % (prog, max_elapsed_sec)) |
| 45 | count = int(subprocess.check_output(cmd, shell = True)) |
Ningning Xia | c0c1061 | 2016-11-23 13:15:26 -0800 | [diff] [blame] | 46 | |
| 47 | if prog not in PROGRAM_TO_CHECK_SET: |
| 48 | prog = 'unknown' |
| 49 | |
| 50 | metrics.Gauge('chromeos/autotest/hung_processes').set( |
| 51 | count, fields={'program': prog} |
| 52 | ) |
MK Ryu | a9f7377 | 2015-07-27 17:05:44 -0700 | [diff] [blame] | 53 | |
| 54 | |
| 55 | def main(): |
Ningning Xia | c0c1061 | 2016-11-23 13:15:26 -0800 | [diff] [blame] | 56 | """Main script. """ |
| 57 | with site_utils.SetupTsMonGlobalState('check_hung_proc', short_lived=True): |
| 58 | for p in PROGRAM_TO_CHECK_SET: |
| 59 | check_proc(p, 86400) |
MK Ryu | a9f7377 | 2015-07-27 17:05:44 -0700 | [diff] [blame] | 60 | |
| 61 | |
| 62 | if __name__ == '__main__': |
| 63 | main() |