Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
Scott Zawalski | cb2e2b7 | 2012-04-17 12:10:05 -0400 | [diff] [blame] | 3 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 7 | """Script to archive old Autotest results to Google Storage. |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 8 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 9 | Uses gsutil to archive files to the configured Google Storage bucket. |
| 10 | Upon successful copy, the local results directory is deleted. |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 11 | """ |
| 12 | |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 13 | import base64 |
Simran Basi | bd9ded0 | 2013-11-04 15:49:11 -0800 | [diff] [blame] | 14 | import datetime |
Dan Shi | faf50db | 2015-09-25 13:40:45 -0700 | [diff] [blame] | 15 | import errno |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 16 | import glob |
| 17 | import gzip |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 18 | import logging |
Simran Basi | a253228 | 2014-12-04 13:28:16 -0800 | [diff] [blame] | 19 | import logging.handlers |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 20 | import os |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 21 | import re |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 22 | import shutil |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 23 | import signal |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 24 | import socket |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 25 | import subprocess |
| 26 | import sys |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 27 | import tempfile |
| 28 | import time |
Scott Zawalski | cb2e2b7 | 2012-04-17 12:10:05 -0400 | [diff] [blame] | 29 | |
Simran Basi | 7d9a149 | 2012-10-25 13:51:54 -0700 | [diff] [blame] | 30 | from optparse import OptionParser |
| 31 | |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 32 | import common |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 33 | from autotest_lib.client.common_lib import error |
Simran Basi | dd12997 | 2014-09-11 14:34:49 -0700 | [diff] [blame] | 34 | from autotest_lib.client.common_lib import utils |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 35 | from autotest_lib.site_utils import job_directories |
Ningning Xia | 2d981ee | 2016-07-06 17:59:54 -0700 | [diff] [blame] | 36 | from autotest_lib.tko import models |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 37 | |
Alex Miller | c900b34 | 2014-06-09 16:52:07 -0700 | [diff] [blame] | 38 | try: |
| 39 | # Does not exist, nor is needed, on moblab. |
| 40 | import psutil |
| 41 | except ImportError: |
| 42 | psutil = None |
| 43 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 44 | import job_directories |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 45 | import pubsub_utils |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 46 | from autotest_lib.client.common_lib import global_config |
Gabe Black | 1e1c41b | 2015-02-04 23:55:15 -0800 | [diff] [blame] | 47 | from autotest_lib.client.common_lib.cros.graphite import autotest_stats |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 48 | from autotest_lib.scheduler import email_manager |
Fang Deng | 970b6a7 | 2013-04-09 11:59:16 -0700 | [diff] [blame] | 49 | from chromite.lib import parallel |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 50 | |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 51 | |
Simran Basi | f3e305f | 2014-10-03 14:43:53 -0700 | [diff] [blame] | 52 | GS_OFFLOADING_ENABLED = global_config.global_config.get_config_value( |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 53 | 'CROS', 'gs_offloading_enabled', type=bool, default=True) |
Simran Basi | f3e305f | 2014-10-03 14:43:53 -0700 | [diff] [blame] | 54 | |
Dan Shi | e27e50f | 2015-08-27 15:11:29 -0700 | [diff] [blame] | 55 | STATS_KEY = 'gs_offloader.%s' % socket.gethostname().replace('.', '_') |
| 56 | METADATA_TYPE = 'result_dir_size' |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 57 | |
Gabe Black | 1e1c41b | 2015-02-04 23:55:15 -0800 | [diff] [blame] | 58 | timer = autotest_stats.Timer(STATS_KEY) |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 59 | |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 60 | # Nice setting for process, the higher the number the lower the priority. |
| 61 | NICENESS = 10 |
| 62 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 63 | # Maximum number of seconds to allow for offloading a single |
| 64 | # directory. |
J. Richard Barnette | 7e0f859 | 2014-09-03 17:00:55 -0700 | [diff] [blame] | 65 | OFFLOAD_TIMEOUT_SECS = 60 * 60 |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 66 | |
Simran Basi | 392d4a5 | 2012-12-14 10:29:44 -0800 | [diff] [blame] | 67 | # Sleep time per loop. |
| 68 | SLEEP_TIME_SECS = 5 |
| 69 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 70 | # Minimum number of seconds between e-mail reports. |
| 71 | REPORT_INTERVAL_SECS = 60 * 60 |
| 72 | |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 73 | # Location of Autotest results on disk. |
| 74 | RESULTS_DIR = '/usr/local/autotest/results' |
| 75 | |
Simran Basi | 31d561d | 2012-07-31 13:44:40 -0700 | [diff] [blame] | 76 | # Hosts sub-directory that contains cleanup, verify and repair jobs. |
| 77 | HOSTS_SUB_DIR = 'hosts' |
| 78 | |
Alex Miller | 0c8db6d | 2013-02-15 15:41:00 -0800 | [diff] [blame] | 79 | LOG_LOCATION = '/usr/local/autotest/logs/' |
| 80 | LOG_FILENAME_FORMAT = 'gs_offloader_%s_log_%s.txt' |
| 81 | LOG_TIMESTAMP_FORMAT = '%Y%m%d_%H%M%S' |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 82 | LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s' |
| 83 | |
Alex Miller | 0c8db6d | 2013-02-15 15:41:00 -0800 | [diff] [blame] | 84 | # pylint: disable=E1120 |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 85 | NOTIFY_ADDRESS = global_config.global_config.get_config_value( |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 86 | 'SCHEDULER', 'notify_email', default='') |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 87 | |
Kevin Cheng | 686ae8c | 2015-09-09 11:56:38 -0700 | [diff] [blame] | 88 | ERROR_EMAIL_HELPER_URL = 'http://go/cros-triage-gsoffloader' |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 89 | ERROR_EMAIL_SUBJECT_FORMAT = 'GS Offloader notifications from %s' |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 90 | ERROR_EMAIL_REPORT_FORMAT = '''\ |
| 91 | gs_offloader is failing to offload results directories. |
Simran Basi | 981a927 | 2012-11-14 10:46:03 -0800 | [diff] [blame] | 92 | |
Kevin Cheng | 686ae8c | 2015-09-09 11:56:38 -0700 | [diff] [blame] | 93 | Check %s to triage the issue. |
| 94 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 95 | First failure Count Directory name |
| 96 | =================== ====== ============================== |
Kevin Cheng | 686ae8c | 2015-09-09 11:56:38 -0700 | [diff] [blame] | 97 | ''' % ERROR_EMAIL_HELPER_URL |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 98 | # --+----1----+---- ----+ ----+----1----+----2----+----3 |
| 99 | |
| 100 | ERROR_EMAIL_DIRECTORY_FORMAT = '%19s %5d %-1s\n' |
| 101 | ERROR_EMAIL_TIME_FORMAT = '%Y-%m-%d %H:%M:%S' |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 102 | |
Jakob Juelich | 24f22c2 | 2014-09-26 11:46:11 -0700 | [diff] [blame] | 103 | USE_RSYNC_ENABLED = global_config.global_config.get_config_value( |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 104 | 'CROS', 'gs_offloader_use_rsync', type=bool, default=False) |
Jakob Juelich | 24f22c2 | 2014-09-26 11:46:11 -0700 | [diff] [blame] | 105 | |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 106 | # According to https://cloud.google.com/storage/docs/bucket-naming#objectnames |
| 107 | INVALID_GS_CHARS = ['[', ']', '*', '?', '#'] |
| 108 | INVALID_GS_CHAR_RANGE = [(0x00, 0x1F), (0x7F, 0x84), (0x86, 0xFF)] |
Jakob Juelich | 24f22c2 | 2014-09-26 11:46:11 -0700 | [diff] [blame] | 109 | |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 110 | # Maximum number of files in the folder. |
| 111 | MAX_FILE_COUNT = 500 |
| 112 | FOLDERS_NEVER_ZIP = ['debug', 'ssp_logs'] |
| 113 | LIMIT_FILE_COUNT = global_config.global_config.get_config_value( |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 114 | 'CROS', 'gs_offloader_limit_file_count', type=bool, default=False) |
| 115 | |
Michael Tang | 0df2eb4 | 2016-05-13 19:06:54 -0700 | [diff] [blame] | 116 | # Use multiprocessing for gsutil uploading. |
| 117 | GS_OFFLOADER_MULTIPROCESSING = global_config.global_config.get_config_value( |
| 118 | 'CROS', 'gs_offloader_multiprocessing', type=bool, default=False) |
| 119 | |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 120 | D = '[0-9][0-9]' |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 121 | TIMESTAMP_PATTERN = '%s%s.%s.%s_%s.%s.%s' % (D, D, D, D, D, D, D) |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 122 | CTS_RESULT_PATTERN = 'testResult.xml' |
Ningning Xia | 2d88eec | 2016-07-25 23:18:46 -0700 | [diff] [blame] | 123 | GTS_RESULT_PATTERN = 'xtsTestResult.xml' |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 124 | # Google Storage bucket URI to store results in. |
| 125 | DEFAULT_CTS_RESULTS_GSURI = global_config.global_config.get_config_value( |
| 126 | 'CROS', 'cts_results_server', default='') |
Ningning Xia | 205a1d4 | 2016-06-21 16:46:28 -0700 | [diff] [blame] | 127 | DEFAULT_CTS_APFE_GSURI = global_config.global_config.get_config_value( |
| 128 | 'CROS', 'cts_apfe_server', default='') |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 129 | |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 130 | _PUBSUB_ENABLED = global_config.global_config.get_config_value( |
| 131 | 'CROS', 'cloud_notification_enabled:', type=bool, default=False) |
| 132 | _PUBSUB_TOPIC = global_config.global_config.get_config_value( |
| 133 | 'CROS', 'cloud_notification_topic::', type='string', default=None) |
| 134 | |
| 135 | # the message data for new test result notification. |
| 136 | NEW_TEST_RESULT_MESSAGE = 'NEW_TEST_RESULT' |
| 137 | |
| 138 | |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 139 | class TimeoutException(Exception): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 140 | """Exception raised by the timeout_handler.""" |
| 141 | pass |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 142 | |
| 143 | |
| 144 | def timeout_handler(_signum, _frame): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 145 | """Handler for SIGALRM when the offloading process times out. |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 146 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 147 | @param _signum: Signal number of the signal that was just caught. |
| 148 | 14 for SIGALRM. |
| 149 | @param _frame: Current stack frame. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 150 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 151 | @raise TimeoutException: Automatically raises so that the time out |
| 152 | is caught by the try/except surrounding the |
| 153 | Popen call. |
| 154 | """ |
| 155 | raise TimeoutException('Process Timed Out') |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 156 | |
| 157 | |
MK Ryu | e93c857 | 2015-08-11 11:53:00 -0700 | [diff] [blame] | 158 | def get_cmd_list(multiprocessing, dir_entry, gs_path): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 159 | """Return the command to offload a specified directory. |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 160 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 161 | @param multiprocessing: True to turn on -m option for gsutil. |
| 162 | @param dir_entry: Directory entry/path that which we need a cmd_list |
| 163 | to offload. |
| 164 | @param gs_path: Location in google storage where we will |
| 165 | offload the directory. |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 166 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 167 | @return A command list to be executed by Popen. |
| 168 | """ |
| 169 | cmd = ['gsutil'] |
| 170 | if multiprocessing: |
| 171 | cmd.append('-m') |
| 172 | if USE_RSYNC_ENABLED: |
| 173 | cmd.append('rsync') |
| 174 | target = os.path.join(gs_path, os.path.basename(dir_entry)) |
| 175 | else: |
| 176 | cmd.append('cp') |
| 177 | target = gs_path |
| 178 | cmd += ['-eR', dir_entry, target] |
| 179 | return cmd |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 180 | |
Jakob Juelich | 24f22c2 | 2014-09-26 11:46:11 -0700 | [diff] [blame] | 181 | |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 182 | def get_directory_size_kibibytes_cmd_list(directory): |
| 183 | """Returns command to get a directory's total size.""" |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 184 | # Having this in its own method makes it easier to mock in |
| 185 | # unittests. |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 186 | return ['du', '-sk', directory] |
| 187 | |
| 188 | |
| 189 | def get_directory_size_kibibytes(directory): |
| 190 | """Calculate the total size of a directory with all its contents. |
| 191 | |
| 192 | @param directory: Path to the directory |
| 193 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 194 | @return Size of the directory in kibibytes. |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 195 | """ |
| 196 | cmd = get_directory_size_kibibytes_cmd_list(directory) |
| 197 | process = subprocess.Popen(cmd, |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 198 | stdout=subprocess.PIPE, |
| 199 | stderr=subprocess.PIPE) |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 200 | stdout_data, stderr_data = process.communicate() |
| 201 | |
| 202 | if process.returncode != 0: |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 203 | # This function is used for statistics only, if it fails, |
| 204 | # nothing else should crash. |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 205 | logging.warning('Getting size of %s failed. Stderr:', directory) |
| 206 | logging.warning(stderr_data) |
| 207 | return 0 |
| 208 | |
| 209 | return int(stdout_data.split('\t', 1)[0]) |
| 210 | |
| 211 | |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 212 | def get_sanitized_name(name): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 213 | """Get a string with all invalid characters in the name being replaced. |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 214 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 215 | @param name: Name to be processed. |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 216 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 217 | @return A string with all invalid characters in the name being |
| 218 | replaced. |
| 219 | """ |
| 220 | match_pattern = ''.join([re.escape(c) for c in INVALID_GS_CHARS]) |
| 221 | match_pattern += ''.join([r'\x%02x-\x%02x' % (r[0], r[1]) |
| 222 | for r in INVALID_GS_CHAR_RANGE]) |
| 223 | invalid = re.compile('[%s]' % match_pattern) |
| 224 | return invalid.sub(lambda x: '%%%02x' % ord(x.group(0)), name) |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 225 | |
| 226 | |
| 227 | def sanitize_dir(dir_entry): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 228 | """Replace all invalid characters in folder and file names with valid ones. |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 229 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 230 | @param dir_entry: Directory entry to be sanitized. |
| 231 | """ |
| 232 | if not os.path.exists(dir_entry): |
| 233 | return |
| 234 | renames = [] |
| 235 | for root, dirs, files in os.walk(dir_entry): |
| 236 | sanitized_root = get_sanitized_name(root) |
| 237 | for name in dirs + files: |
| 238 | sanitized_name = get_sanitized_name(name) |
| 239 | if name != sanitized_name: |
| 240 | orig_path = os.path.join(sanitized_root, name) |
| 241 | rename_path = os.path.join(sanitized_root, |
| 242 | sanitized_name) |
| 243 | renames.append((orig_path, rename_path)) |
| 244 | for src, dest in renames: |
| 245 | logging.warn('Invalid character found. Renaming %s to %s.', |
| 246 | src, dest) |
| 247 | shutil.move(src, dest) |
| 248 | |
| 249 | |
| 250 | def _get_zippable_folders(dir_entry): |
| 251 | folders_list = [] |
| 252 | for folder in os.listdir(dir_entry): |
| 253 | folder_path = os.path.join(dir_entry, folder) |
| 254 | if (not os.path.isfile(folder_path) and |
| 255 | not folder in FOLDERS_NEVER_ZIP): |
| 256 | folders_list.append(folder_path) |
| 257 | return folders_list |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 258 | |
| 259 | |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 260 | def limit_file_count(dir_entry): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 261 | """Limit the number of files in given directory. |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 262 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 263 | The method checks the total number of files in the given directory. |
| 264 | If the number is greater than MAX_FILE_COUNT, the method will |
| 265 | compress each folder in the given directory, except folders in |
| 266 | FOLDERS_NEVER_ZIP. |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 267 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 268 | @param dir_entry: Directory entry to be checked. |
| 269 | """ |
| 270 | count = utils.run('find "%s" | wc -l' % dir_entry, |
| 271 | ignore_status=True).stdout.strip() |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 272 | try: |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 273 | count = int(count) |
| 274 | except ValueError, TypeError: |
| 275 | logging.warn('Fail to get the file count in folder %s.', |
| 276 | dir_entry) |
| 277 | return |
| 278 | if count < MAX_FILE_COUNT: |
| 279 | return |
| 280 | |
| 281 | # For test job, zip folders in a second level, e.g. 123-debug/host1. |
| 282 | # This is to allow autoserv debug folder still be accessible. |
| 283 | # For special task, it does not need to dig one level deeper. |
| 284 | is_special_task = re.match(job_directories.SPECIAL_TASK_PATTERN, |
| 285 | dir_entry) |
| 286 | |
| 287 | folders = _get_zippable_folders(dir_entry) |
| 288 | if not is_special_task: |
| 289 | subfolders = [] |
| 290 | for folder in folders: |
| 291 | subfolders.extend(_get_zippable_folders(folder)) |
| 292 | folders = subfolders |
| 293 | |
| 294 | for folder in folders: |
| 295 | try: |
| 296 | zip_name = '%s.tgz' % folder |
| 297 | utils.run('tar -cz -C "%s" -f "%s" "%s"' % |
| 298 | (os.path.dirname(folder), zip_name, |
| 299 | os.path.basename(folder))) |
| 300 | except error.CmdError as e: |
| 301 | logging.error('Fail to compress folder %s. Error: %s', |
| 302 | folder, e) |
| 303 | continue |
| 304 | shutil.rmtree(folder) |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 305 | |
| 306 | |
Dan Shi | e4a4f9f | 2015-07-20 09:00:25 -0700 | [diff] [blame] | 307 | def correct_results_folder_permission(dir_entry): |
| 308 | """Make sure the results folder has the right permission settings. |
| 309 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 310 | For tests running with server-side packaging, the results folder has |
| 311 | the owner of root. This must be changed to the user running the |
| 312 | autoserv process, so parsing job can access the results folder. |
Dan Shi | e4a4f9f | 2015-07-20 09:00:25 -0700 | [diff] [blame] | 313 | |
| 314 | @param dir_entry: Path to the results folder. |
Dan Shi | e4a4f9f | 2015-07-20 09:00:25 -0700 | [diff] [blame] | 315 | """ |
| 316 | if not dir_entry: |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 317 | return |
Dan Shi | e4a4f9f | 2015-07-20 09:00:25 -0700 | [diff] [blame] | 318 | try: |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 319 | subprocess.check_call( |
| 320 | ['sudo', '-n', 'chown', '-R', str(os.getuid()), dir_entry]) |
| 321 | subprocess.check_call( |
| 322 | ['sudo', '-n', 'chgrp', '-R', str(os.getgid()), dir_entry]) |
Dan Shi | e4a4f9f | 2015-07-20 09:00:25 -0700 | [diff] [blame] | 323 | except subprocess.CalledProcessError as e: |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 324 | logging.error('Failed to modify permission for %s: %s', |
| 325 | dir_entry, e) |
Dan Shi | e4a4f9f | 2015-07-20 09:00:25 -0700 | [diff] [blame] | 326 | |
| 327 | |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 328 | def upload_testresult_files(dir_entry, multiprocessing): |
Ningning Xia | 2d88eec | 2016-07-25 23:18:46 -0700 | [diff] [blame] | 329 | """Upload test results to separate gs buckets. |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 330 | |
Ningning Xia | 2d88eec | 2016-07-25 23:18:46 -0700 | [diff] [blame] | 331 | Upload testResult.xml.gz/xtsTestResult.xml.gz file to cts_results_bucket. |
Ningning Xia | 205a1d4 | 2016-06-21 16:46:28 -0700 | [diff] [blame] | 332 | Upload timestamp.zip to cts_apfe_bucket. |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 333 | @param dir_entry: Path to the results folder. |
| 334 | @param multiprocessing: True to turn on -m option for gsutil. |
| 335 | """ |
Ningning Xia | 2d981ee | 2016-07-06 17:59:54 -0700 | [diff] [blame] | 336 | for host in glob.glob(os.path.join(dir_entry, '*')): |
Ningning Xia | 2d88eec | 2016-07-25 23:18:46 -0700 | [diff] [blame] | 337 | cts_path = os.path.join(host, 'cheets_CTS.*', 'results', '*', |
| 338 | TIMESTAMP_PATTERN) |
| 339 | gts_path = os.path.join(host, 'cheets_GTS.*', 'results', '*', |
| 340 | TIMESTAMP_PATTERN) |
| 341 | for result_path, result_pattern in [(cts_path, CTS_RESULT_PATTERN), |
| 342 | (gts_path, GTS_RESULT_PATTERN)]: |
| 343 | for path in glob.glob(result_path): |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 344 | try: |
| 345 | _upload_files(host, path, result_pattern, multiprocessing) |
| 346 | except Exception as e: |
| 347 | logging.error('ERROR uploading test results %s to GS: %s', |
| 348 | path, e) |
Ningning Xia | 205a1d4 | 2016-06-21 16:46:28 -0700 | [diff] [blame] | 349 | |
Ningning Xia | 205a1d4 | 2016-06-21 16:46:28 -0700 | [diff] [blame] | 350 | |
Ningning Xia | 21922c8 | 2016-07-29 11:03:15 -0700 | [diff] [blame] | 351 | def _is_release_build(build): |
| 352 | """Check if it's a release build.""" |
| 353 | return re.match(r'(?!trybot-).*-release/.*', build) |
| 354 | |
| 355 | |
Ningning Xia | 2d88eec | 2016-07-25 23:18:46 -0700 | [diff] [blame] | 356 | def _upload_files(host, path, result_pattern, multiprocessing): |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 357 | keyval = models.test.parse_job_keyval(host) |
| 358 | build = keyval['build'] |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 359 | |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 360 | if not _is_release_build(build): |
| 361 | # Only upload results for release builds. |
| 362 | return |
Ningning Xia | 21922c8 | 2016-07-29 11:03:15 -0700 | [diff] [blame] | 363 | |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 364 | parent_job_id = str(keyval['parent_job_id']) |
Ningning Xia | 21922c8 | 2016-07-29 11:03:15 -0700 | [diff] [blame] | 365 | |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 366 | folders = path.split(os.sep) |
| 367 | job_id = folders[-6] |
| 368 | package = folders[-4] |
| 369 | timestamp = folders[-1] |
Ningning Xia | 2d981ee | 2016-07-06 17:59:54 -0700 | [diff] [blame] | 370 | |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 371 | # Path: bucket/build/parent_job_id/cheets_CTS.*/job_id_timestamp/ |
| 372 | # or bucket/build/parent_job_id/cheets_GTS.*/job_id_timestamp/ |
| 373 | cts_apfe_gs_path = os.path.join( |
| 374 | DEFAULT_CTS_APFE_GSURI, build, parent_job_id, |
| 375 | package, job_id + '_' + timestamp) + '/' |
Ningning Xia | 2d981ee | 2016-07-06 17:59:54 -0700 | [diff] [blame] | 376 | |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 377 | # Path: bucket/cheets_CTS.*/job_id_timestamp/ |
| 378 | # or bucket/cheets_GTS.*/job_id_timestamp/ |
| 379 | test_result_gs_path = os.path.join( |
| 380 | DEFAULT_CTS_RESULTS_GSURI, package, |
| 381 | job_id + '_' + timestamp) + '/' |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 382 | |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 383 | for zip_file in glob.glob(os.path.join('%s.zip' % path)): |
| 384 | utils.run(' '.join(get_cmd_list( |
| 385 | multiprocessing, zip_file, cts_apfe_gs_path))) |
| 386 | logging.debug('Upload %s to %s ', zip_file, cts_apfe_gs_path) |
Ningning Xia | 2d88eec | 2016-07-25 23:18:46 -0700 | [diff] [blame] | 387 | |
Ningning Xia | 0c27d9b | 2016-08-04 14:02:39 -0700 | [diff] [blame] | 388 | for test_result_file in glob.glob(os.path.join(path, result_pattern)): |
| 389 | # gzip test_result_file(testResult.xml/xtsTestResult.xml) |
| 390 | test_result_file_gz = '%s.gz' % test_result_file |
| 391 | with open(test_result_file, 'r') as f_in, ( |
| 392 | gzip.open(test_result_file_gz, 'w')) as f_out: |
| 393 | shutil.copyfileobj(f_in, f_out) |
| 394 | utils.run(' '.join(get_cmd_list( |
| 395 | multiprocessing, test_result_file_gz, test_result_gs_path))) |
| 396 | logging.debug('Zip and upload %s to %s', |
| 397 | test_result_file_gz, test_result_gs_path) |
| 398 | # Remove test_result_file_gz(estResult.xml.gz/xtsTestResult.xml.gz) |
| 399 | os.remove(test_result_file_gz) |
| 400 | |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 401 | |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 402 | def _create_test_result_notification(gs_path): |
| 403 | """Construct a test result notification. |
| 404 | |
| 405 | @param gs_path: The test result Google Cloud Storage URI. |
| 406 | |
| 407 | @returns The notification message. |
| 408 | """ |
| 409 | data = base64.b64encode(NEW_TEST_RESULT_MESSAGE) |
| 410 | msg_payload = {'data': data} |
| 411 | msg_attributes = {} |
| 412 | msg_attributes['gcs_uri'] = gs_path |
| 413 | msg_payload['attributes'] = msg_attributes |
| 414 | |
| 415 | return msg_payload |
| 416 | |
| 417 | |
| 418 | def get_offload_dir_func(gs_uri, multiprocessing, pubsub_topic=None): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 419 | """Returns the offload directory function for the given gs_uri |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 420 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 421 | @param gs_uri: Google storage bucket uri to offload to. |
| 422 | @param multiprocessing: True to turn on -m option for gsutil. |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 423 | @param pubsub_topic: The pubsub topic to publish notificaitons. If None, |
| 424 | pubsub is not enabled. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 425 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 426 | @return offload_dir function to perform the offload. |
Simran Basi | dd12997 | 2014-09-11 14:34:49 -0700 | [diff] [blame] | 427 | """ |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 428 | @timer.decorate |
| 429 | def offload_dir(dir_entry, dest_path): |
| 430 | """Offload the specified directory entry to Google storage. |
Jakob Juelich | c17f311 | 2014-09-11 14:32:30 -0700 | [diff] [blame] | 431 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 432 | @param dir_entry: Directory entry to offload. |
| 433 | @param dest_path: Location in google storage where we will |
| 434 | offload the directory. |
Dan Shi | affb922 | 2015-04-15 17:05:47 -0700 | [diff] [blame] | 435 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 436 | """ |
| 437 | try: |
| 438 | counter = autotest_stats.Counter(STATS_KEY) |
| 439 | counter.increment('jobs_offload_started') |
Dan Shi | 1b4c7c3 | 2015-10-05 10:38:57 -0700 | [diff] [blame] | 440 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 441 | sanitize_dir(dir_entry) |
Ningning Xia | 4211124 | 2016-06-15 14:35:58 -0700 | [diff] [blame] | 442 | if DEFAULT_CTS_RESULTS_GSURI: |
Dan Shi | db9d053 | 2016-06-22 14:35:41 -0700 | [diff] [blame] | 443 | upload_testresult_files(dir_entry, multiprocessing) |
Simran Basi | dd12997 | 2014-09-11 14:34:49 -0700 | [diff] [blame] | 444 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 445 | if LIMIT_FILE_COUNT: |
| 446 | limit_file_count(dir_entry) |
Simran Basi | dd12997 | 2014-09-11 14:34:49 -0700 | [diff] [blame] | 447 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 448 | error = False |
| 449 | stdout_file = tempfile.TemporaryFile('w+') |
| 450 | stderr_file = tempfile.TemporaryFile('w+') |
| 451 | process = None |
| 452 | signal.alarm(OFFLOAD_TIMEOUT_SECS) |
| 453 | gs_path = '%s%s' % (gs_uri, dest_path) |
| 454 | process = subprocess.Popen( |
| 455 | get_cmd_list(multiprocessing, dir_entry, gs_path), |
| 456 | stdout=stdout_file, stderr=stderr_file) |
| 457 | process.wait() |
| 458 | signal.alarm(0) |
| 459 | |
| 460 | if process.returncode == 0: |
| 461 | dir_size = get_directory_size_kibibytes(dir_entry) |
| 462 | |
| 463 | counter.increment('kibibytes_transferred_total', |
| 464 | dir_size) |
| 465 | metadata = { |
| 466 | '_type': METADATA_TYPE, |
| 467 | 'size_KB': dir_size, |
Dan Shi | e27e50f | 2015-08-27 15:11:29 -0700 | [diff] [blame] | 468 | 'result_dir': dir_entry, |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 469 | 'drone': socket.gethostname().replace('.', '_') |
| 470 | } |
| 471 | autotest_stats.Gauge(STATS_KEY, metadata=metadata).send( |
| 472 | 'kibibytes_transferred', dir_size) |
| 473 | counter.increment('jobs_offloaded') |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 474 | |
| 475 | if pubsub_topic: |
| 476 | message = _create_test_result_notification(gs_path) |
| 477 | msg_ids = pubsub_utils.publish_notifications( |
| 478 | pubsub_topic, [message]) |
| 479 | if not msg_ids: |
| 480 | error = True |
| 481 | |
| 482 | if not error: |
| 483 | shutil.rmtree(dir_entry) |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 484 | else: |
| 485 | error = True |
| 486 | except TimeoutException: |
| 487 | # If we finished the call to Popen(), we may need to |
| 488 | # terminate the child process. We don't bother calling |
| 489 | # process.poll(); that inherently races because the child |
| 490 | # can die any time it wants. |
| 491 | if process: |
| 492 | try: |
| 493 | process.terminate() |
| 494 | except OSError: |
| 495 | # We don't expect any error other than "No such |
| 496 | # process". |
| 497 | pass |
| 498 | logging.error('Offloading %s timed out after waiting %d ' |
| 499 | 'seconds.', dir_entry, OFFLOAD_TIMEOUT_SECS) |
| 500 | error = True |
| 501 | except OSError as e: |
| 502 | # The wrong file permission can lead call |
| 503 | # `shutil.rmtree(dir_entry)` to raise OSError with message |
| 504 | # 'Permission denied'. Details can be found in |
| 505 | # crbug.com/536151 |
| 506 | if e.errno == errno.EACCES: |
| 507 | logging.warn('Try to correct file permission of %s.', dir_entry) |
| 508 | correct_results_folder_permission(dir_entry) |
| 509 | finally: |
| 510 | signal.alarm(0) |
| 511 | if error: |
| 512 | # Rewind the log files for stdout and stderr and log |
| 513 | # their contents. |
| 514 | stdout_file.seek(0) |
| 515 | stderr_file.seek(0) |
| 516 | stderr_content = stderr_file.read() |
| 517 | logging.error('Error occurred when offloading %s:', |
| 518 | dir_entry) |
| 519 | logging.error('Stdout:\n%s \nStderr:\n%s', |
| 520 | stdout_file.read(), stderr_content) |
| 521 | # Some result files may have wrong file permission. Try |
| 522 | # to correct such error so later try can success. |
| 523 | # TODO(dshi): The code is added to correct result files |
| 524 | # with wrong file permission caused by bug 511778. After |
| 525 | # this code is pushed to lab and run for a while to |
| 526 | # clean up these files, following code and function |
| 527 | # correct_results_folder_permission can be deleted. |
| 528 | if 'CommandException: Error opening file' in stderr_content: |
| 529 | logging.warn('Try to correct file permission of %s.', |
| 530 | dir_entry) |
| 531 | correct_results_folder_permission(dir_entry) |
| 532 | stdout_file.close() |
| 533 | stderr_file.close() |
| 534 | return offload_dir |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 535 | |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 536 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 537 | def delete_files(dir_entry, dest_path): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 538 | """Simply deletes the dir_entry from the filesystem. |
Simran Basi | bd9ded0 | 2013-11-04 15:49:11 -0800 | [diff] [blame] | 539 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 540 | Uses same arguments as offload_dir so that it can be used in replace |
| 541 | of it on systems that only want to delete files instead of |
| 542 | offloading them. |
Simran Basi | bd9ded0 | 2013-11-04 15:49:11 -0800 | [diff] [blame] | 543 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 544 | @param dir_entry: Directory entry to offload. |
| 545 | @param dest_path: NOT USED. |
| 546 | """ |
| 547 | shutil.rmtree(dir_entry) |
Simran Basi | bd9ded0 | 2013-11-04 15:49:11 -0800 | [diff] [blame] | 548 | |
| 549 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 550 | def report_offload_failures(joblist): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 551 | """Generate e-mail notification for failed offloads. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 552 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 553 | The e-mail report will include data from all jobs in `joblist`. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 554 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 555 | @param joblist List of jobs to be reported in the message. |
| 556 | """ |
| 557 | def _format_job(job): |
| 558 | d = datetime.datetime.fromtimestamp(job.get_failure_time()) |
| 559 | data = (d.strftime(ERROR_EMAIL_TIME_FORMAT), |
| 560 | job.get_failure_count(), |
| 561 | job.get_job_directory()) |
| 562 | return ERROR_EMAIL_DIRECTORY_FORMAT % data |
| 563 | joblines = [_format_job(job) for job in joblist] |
| 564 | joblines.sort() |
| 565 | email_subject = ERROR_EMAIL_SUBJECT_FORMAT % socket.gethostname() |
| 566 | email_message = ERROR_EMAIL_REPORT_FORMAT + ''.join(joblines) |
| 567 | email_manager.manager.send_email(NOTIFY_ADDRESS, email_subject, |
| 568 | email_message) |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 569 | |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 570 | |
Simran Basi | ac0edb2 | 2015-04-23 16:15:51 -0700 | [diff] [blame] | 571 | def wait_for_gs_write_access(gs_uri): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 572 | """Verify and wait until we have write access to Google Storage. |
Simran Basi | ac0edb2 | 2015-04-23 16:15:51 -0700 | [diff] [blame] | 573 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 574 | @param gs_uri: The Google Storage URI we are trying to offload to. |
| 575 | """ |
| 576 | # TODO (sbasi) Try to use the gsutil command to check write access. |
| 577 | # Ensure we have write access to gs_uri. |
| 578 | dummy_file = tempfile.NamedTemporaryFile() |
| 579 | test_cmd = get_cmd_list(False, dummy_file.name, gs_uri) |
| 580 | while True: |
| 581 | try: |
| 582 | subprocess.check_call(test_cmd) |
| 583 | subprocess.check_call( |
| 584 | ['gsutil', 'rm', |
| 585 | os.path.join(gs_uri, |
| 586 | os.path.basename(dummy_file.name))]) |
| 587 | break |
| 588 | except subprocess.CalledProcessError: |
| 589 | logging.debug('Unable to offload to %s, sleeping.', gs_uri) |
| 590 | time.sleep(120) |
Simran Basi | ac0edb2 | 2015-04-23 16:15:51 -0700 | [diff] [blame] | 591 | |
| 592 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 593 | class Offloader(object): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 594 | """State of the offload process. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 595 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 596 | Contains the following member fields: |
| 597 | * _offload_func: Function to call for each attempt to offload |
| 598 | a job directory. |
| 599 | * _jobdir_classes: List of classes of job directory to be |
| 600 | offloaded. |
| 601 | * _processes: Maximum number of outstanding offload processes |
| 602 | to allow during an offload cycle. |
| 603 | * _age_limit: Minimum age in days at which a job may be |
| 604 | offloaded. |
| 605 | * _open_jobs: a dictionary mapping directory paths to Job |
| 606 | objects. |
| 607 | * _next_report_time: Earliest time that we should send e-mail |
| 608 | if there are failures to be reported. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 609 | """ |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 610 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 611 | def __init__(self, options): |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 612 | self._pubsub_topic = None |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 613 | if options.delete_only: |
| 614 | self._offload_func = delete_files |
| 615 | else: |
| 616 | self.gs_uri = utils.get_offload_gsuri() |
| 617 | logging.debug('Offloading to: %s', self.gs_uri) |
Michael Tang | 0df2eb4 | 2016-05-13 19:06:54 -0700 | [diff] [blame] | 618 | multiprocessing = False |
| 619 | if options.multiprocessing: |
| 620 | multiprocessing = True |
| 621 | elif options.multiprocessing is None: |
| 622 | multiprocessing = GS_OFFLOADER_MULTIPROCESSING |
| 623 | logging.info( |
| 624 | 'Offloader multiprocessing is set to:%r', multiprocessing) |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 625 | if options.pubsub_topic_for_job_upload: |
| 626 | self._pubsub_topic = options.pubsub_topic_for_job_upload |
| 627 | elif _PUBSUB_ENABLED: |
| 628 | self._pubsub_topic = _PUBSUB_TOPIC |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 629 | self._offload_func = get_offload_dir_func( |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 630 | self.gs_uri, multiprocessing, self._pubsub_topic) |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 631 | classlist = [] |
| 632 | if options.process_hosts_only or options.process_all: |
| 633 | classlist.append(job_directories.SpecialJobDirectory) |
| 634 | if not options.process_hosts_only: |
| 635 | classlist.append(job_directories.RegularJobDirectory) |
| 636 | self._jobdir_classes = classlist |
| 637 | assert self._jobdir_classes |
| 638 | self._processes = options.parallelism |
| 639 | self._age_limit = options.days_old |
| 640 | self._open_jobs = {} |
| 641 | self._next_report_time = time.time() |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 642 | self._pusub_topic = None |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 643 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 644 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 645 | def _add_new_jobs(self): |
| 646 | """Find new job directories that need offloading. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 647 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 648 | Go through the file system looking for valid job directories |
| 649 | that are currently not in `self._open_jobs`, and add them in. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 650 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 651 | """ |
| 652 | new_job_count = 0 |
| 653 | for cls in self._jobdir_classes: |
| 654 | for resultsdir in cls.get_job_directories(): |
| 655 | if resultsdir in self._open_jobs: |
| 656 | continue |
| 657 | self._open_jobs[resultsdir] = cls(resultsdir) |
| 658 | new_job_count += 1 |
| 659 | logging.debug('Start of offload cycle - found %d new jobs', |
| 660 | new_job_count) |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 661 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 662 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 663 | def _remove_offloaded_jobs(self): |
| 664 | """Removed offloaded jobs from `self._open_jobs`.""" |
| 665 | removed_job_count = 0 |
| 666 | for jobkey, job in self._open_jobs.items(): |
| 667 | if job.is_offloaded(): |
| 668 | del self._open_jobs[jobkey] |
| 669 | removed_job_count += 1 |
| 670 | logging.debug('End of offload cycle - cleared %d new jobs, ' |
| 671 | 'carrying %d open jobs', |
| 672 | removed_job_count, len(self._open_jobs)) |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 673 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 674 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 675 | def _have_reportable_errors(self): |
| 676 | """Return whether any jobs need reporting via e-mail. |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 677 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 678 | @return True if there are reportable jobs in `self._open_jobs`, |
| 679 | or False otherwise. |
| 680 | """ |
| 681 | for job in self._open_jobs.values(): |
| 682 | if job.is_reportable(): |
| 683 | return True |
| 684 | return False |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 685 | |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 686 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 687 | def _update_offload_results(self): |
| 688 | """Check and report status after attempting offload. |
| 689 | |
| 690 | This function processes all jobs in `self._open_jobs`, assuming |
| 691 | an attempt has just been made to offload all of them. |
| 692 | |
| 693 | Any jobs that have been successfully offloaded are removed. |
| 694 | |
| 695 | If any jobs have reportable errors, and we haven't generated |
| 696 | an e-mail report in the last `REPORT_INTERVAL_SECS` seconds, |
| 697 | send new e-mail describing the failures. |
| 698 | |
| 699 | """ |
| 700 | self._remove_offloaded_jobs() |
| 701 | if self._have_reportable_errors(): |
| 702 | # N.B. We include all jobs that have failed at least once, |
| 703 | # which may include jobs that aren't otherwise reportable. |
| 704 | failed_jobs = [j for j in self._open_jobs.values() |
| 705 | if j.get_failure_time()] |
| 706 | logging.debug('Currently there are %d jobs with offload ' |
| 707 | 'failures', len(failed_jobs)) |
| 708 | if time.time() >= self._next_report_time: |
| 709 | logging.debug('Reporting failures by e-mail') |
| 710 | report_offload_failures(failed_jobs) |
| 711 | self._next_report_time = ( |
| 712 | time.time() + REPORT_INTERVAL_SECS) |
| 713 | |
| 714 | |
| 715 | def offload_once(self): |
| 716 | """Perform one offload cycle. |
| 717 | |
| 718 | Find all job directories for new jobs that we haven't seen |
| 719 | before. Then, attempt to offload the directories for any |
| 720 | jobs that have finished running. Offload of multiple jobs |
| 721 | is done in parallel, up to `self._processes` at a time. |
| 722 | |
| 723 | After we've tried uploading all directories, go through the list |
| 724 | checking the status of all uploaded directories. If necessary, |
| 725 | report failures via e-mail. |
| 726 | |
| 727 | """ |
| 728 | self._add_new_jobs() |
| 729 | with parallel.BackgroundTaskRunner( |
| 730 | self._offload_func, processes=self._processes) as queue: |
| 731 | for job in self._open_jobs.values(): |
| 732 | job.enqueue_offload(queue, self._age_limit) |
| 733 | self._update_offload_results() |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 734 | |
| 735 | |
Simran Basi | 7d9a149 | 2012-10-25 13:51:54 -0700 | [diff] [blame] | 736 | def parse_options(): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 737 | """Parse the args passed into gs_offloader.""" |
| 738 | defaults = 'Defaults:\n Destination: %s\n Results Path: %s' % ( |
| 739 | utils.DEFAULT_OFFLOAD_GSURI, RESULTS_DIR) |
| 740 | usage = 'usage: %prog [options]\n' + defaults |
| 741 | parser = OptionParser(usage) |
| 742 | parser.add_option('-a', '--all', dest='process_all', |
| 743 | action='store_true', |
| 744 | help='Offload all files in the results directory.') |
| 745 | parser.add_option('-s', '--hosts', dest='process_hosts_only', |
| 746 | action='store_true', |
| 747 | help='Offload only the special tasks result files ' |
| 748 | 'located in the results/hosts subdirectory') |
| 749 | parser.add_option('-p', '--parallelism', dest='parallelism', |
| 750 | type='int', default=1, |
| 751 | help='Number of parallel workers to use.') |
| 752 | parser.add_option('-o', '--delete_only', dest='delete_only', |
| 753 | action='store_true', |
| 754 | help='GS Offloader will only the delete the ' |
| 755 | 'directories and will not offload them to google ' |
| 756 | 'storage. NOTE: If global_config variable ' |
| 757 | 'CROS.gs_offloading_enabled is False, --delete_only ' |
| 758 | 'is automatically True.', |
| 759 | default=not GS_OFFLOADING_ENABLED) |
| 760 | parser.add_option('-d', '--days_old', dest='days_old', |
| 761 | help='Minimum job age in days before a result can be ' |
| 762 | 'offloaded.', type='int', default=0) |
Michael Tang | 97d188c | 2016-06-25 11:18:42 -0700 | [diff] [blame] | 763 | parser.add_option('-t', '--pubsub_topic_for_job_upload', |
| 764 | dest='pubsub_topic_for_job_upload', |
| 765 | help='The pubsub topic to send notifciations for ' |
| 766 | 'new job upload', |
| 767 | action='store', type='string', default=None) |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 768 | parser.add_option('-l', '--log_size', dest='log_size', |
| 769 | help='Limit the offloader logs to a specified ' |
| 770 | 'number of Mega Bytes.', type='int', default=0) |
| 771 | parser.add_option('-m', dest='multiprocessing', action='store_true', |
Michael Tang | 0df2eb4 | 2016-05-13 19:06:54 -0700 | [diff] [blame] | 772 | help='Turn on -m option for gsutil. If not set, the ' |
| 773 | 'global config setting gs_offloader_multiprocessing ' |
| 774 | 'under CROS section is applied.') |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 775 | options = parser.parse_args()[0] |
| 776 | if options.process_all and options.process_hosts_only: |
| 777 | parser.print_help() |
| 778 | print ('Cannot process all files and only the hosts ' |
| 779 | 'subdirectory. Please remove an argument.') |
| 780 | sys.exit(1) |
| 781 | return options |
Scott Zawalski | cb2e2b7 | 2012-04-17 12:10:05 -0400 | [diff] [blame] | 782 | |
Simran Basi | 9523eaa | 2012-06-28 17:18:45 -0700 | [diff] [blame] | 783 | |
| 784 | def main(): |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 785 | """Main method of gs_offloader.""" |
| 786 | options = parse_options() |
Alex Miller | 0c8db6d | 2013-02-15 15:41:00 -0800 | [diff] [blame] | 787 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 788 | if options.process_all: |
| 789 | offloader_type = 'all' |
| 790 | elif options.process_hosts_only: |
| 791 | offloader_type = 'hosts' |
| 792 | else: |
| 793 | offloader_type = 'jobs' |
Alex Miller | 0c8db6d | 2013-02-15 15:41:00 -0800 | [diff] [blame] | 794 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 795 | log_timestamp = time.strftime(LOG_TIMESTAMP_FORMAT) |
| 796 | if options.log_size > 0: |
| 797 | log_timestamp = '' |
| 798 | log_basename = LOG_FILENAME_FORMAT % (offloader_type, log_timestamp) |
| 799 | log_filename = os.path.join(LOG_LOCATION, log_basename) |
| 800 | log_formatter = logging.Formatter(LOGGING_FORMAT) |
| 801 | # Replace the default logging handler with a RotatingFileHandler. If |
| 802 | # options.log_size is 0, the file size will not be limited. Keeps |
| 803 | # one backup just in case. |
| 804 | handler = logging.handlers.RotatingFileHandler( |
| 805 | log_filename, maxBytes=1024 * options.log_size, backupCount=1) |
| 806 | handler.setFormatter(log_formatter) |
| 807 | logger = logging.getLogger() |
| 808 | logger.setLevel(logging.DEBUG) |
| 809 | logger.addHandler(handler) |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 810 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 811 | # Nice our process (carried to subprocesses) so we don't overload |
| 812 | # the system. |
| 813 | logging.debug('Set process to nice value: %d', NICENESS) |
| 814 | os.nice(NICENESS) |
| 815 | if psutil: |
| 816 | proc = psutil.Process() |
| 817 | logging.debug('Set process to ionice IDLE') |
| 818 | proc.ionice(psutil.IOPRIO_CLASS_IDLE) |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 819 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 820 | # os.listdir returns relative paths, so change to where we need to |
| 821 | # be to avoid an os.path.join on each loop. |
| 822 | logging.debug('Offloading Autotest results in %s', RESULTS_DIR) |
| 823 | os.chdir(RESULTS_DIR) |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 824 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 825 | signal.signal(signal.SIGALRM, timeout_handler) |
J. Richard Barnette | ea78536 | 2014-03-17 16:00:53 -0700 | [diff] [blame] | 826 | |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 827 | offloader = Offloader(options) |
| 828 | if not options.delete_only: |
| 829 | wait_for_gs_write_access(offloader.gs_uri) |
| 830 | while True: |
| 831 | offloader.offload_once() |
| 832 | time.sleep(SLEEP_TIME_SECS) |
Scott Zawalski | cb2e2b7 | 2012-04-17 12:10:05 -0400 | [diff] [blame] | 833 | |
| 834 | |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 835 | if __name__ == '__main__': |
J. Richard Barnette | 2c41e1e | 2015-12-08 16:21:10 -0800 | [diff] [blame] | 836 | main() |