blob: a85700e95d582fea5875ee9f8294d287cf4f6e7f [file] [log] [blame]
Scott Zawalski20a9b582011-11-21 11:49:40 -08001#!/usr/bin/python
2#
Scott Zawalskicb2e2b72012-04-17 12:10:05 -04003# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Scott Zawalski20a9b582011-11-21 11:49:40 -08004# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
J. Richard Barnetteea785362014-03-17 16:00:53 -07007"""Script to archive old Autotest results to Google Storage.
Scott Zawalski20a9b582011-11-21 11:49:40 -08008
J. Richard Barnetteea785362014-03-17 16:00:53 -07009Uses gsutil to archive files to the configured Google Storage bucket.
10Upon successful copy, the local results directory is deleted.
Scott Zawalski20a9b582011-11-21 11:49:40 -080011"""
12
Allen Lib41527d2017-06-22 17:28:00 -070013import abc
Simran Basibd9ded02013-11-04 15:49:11 -080014import datetime
Dan Shifaf50db2015-09-25 13:40:45 -070015import errno
Ningning Xia42111242016-06-15 14:35:58 -070016import glob
17import gzip
Simran Basi9523eaa2012-06-28 17:18:45 -070018import logging
Simran Basia2532282014-12-04 13:28:16 -080019import logging.handlers
Scott Zawalski20a9b582011-11-21 11:49:40 -080020import os
Dan Shiaffb9222015-04-15 17:05:47 -070021import re
Scott Zawalski20a9b582011-11-21 11:49:40 -080022import shutil
Laurence Goodbyca7726d2017-02-14 17:09:07 -080023import stat
Scott Zawalski20a9b582011-11-21 11:49:40 -080024import subprocess
25import sys
Allen Lib41527d2017-06-22 17:28:00 -070026import tarfile
Simran Basi9523eaa2012-06-28 17:18:45 -070027import tempfile
28import time
Scott Zawalskicb2e2b72012-04-17 12:10:05 -040029
Simran Basi7d9a1492012-10-25 13:51:54 -070030from optparse import OptionParser
31
Simran Basi981a9272012-11-14 10:46:03 -080032import common
Dan Shi96c3bdc2017-05-24 11:34:30 -070033from autotest_lib.client.common_lib import file_utils
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080034from autotest_lib.client.common_lib import global_config
Simran Basidd129972014-09-11 14:34:49 -070035from autotest_lib.client.common_lib import utils
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080036from autotest_lib.site_utils import job_directories
Michael Tange8bc9592017-07-06 10:59:32 -070037# For unittest, the cloud_console.proto is not compiled yet.
38try:
39 from autotest_lib.site_utils import cloud_console_client
40except ImportError:
41 cloud_console_client = None
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080042from autotest_lib.tko import models
Dan Shib2751fc2017-05-16 11:05:15 -070043from autotest_lib.utils import labellib
Allen Lib41527d2017-06-22 17:28:00 -070044from autotest_lib.utils import gslib
Allen Lib41527d2017-06-22 17:28:00 -070045from chromite.lib import timeout_util
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080046
Dan Shi5e2efb72017-02-07 11:40:23 -080047# Autotest requires the psutil module from site-packages, so it must be imported
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -080048# after "import common".
Simran Basi627a75e2017-02-08 11:07:20 -080049try:
50 # Does not exist, nor is needed, on moblab.
51 import psutil
52except ImportError:
53 psutil = None
Scott Zawalski20a9b582011-11-21 11:49:40 -080054
Dan Shi5e2efb72017-02-07 11:40:23 -080055from chromite.lib import parallel
56try:
57 from chromite.lib import metrics
58 from chromite.lib import ts_mon_config
59except ImportError:
Paul Hobbscd10e482017-08-28 12:00:06 -070060 metrics = utils.metrics_mock
61 ts_mon_config = utils.metrics_mock
Dan Shi5e2efb72017-02-07 11:40:23 -080062
Scott Zawalski20a9b582011-11-21 11:49:40 -080063
Simran Basif3e305f2014-10-03 14:43:53 -070064GS_OFFLOADING_ENABLED = global_config.global_config.get_config_value(
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -080065 'CROS', 'gs_offloading_enabled', type=bool, default=True)
Simran Basif3e305f2014-10-03 14:43:53 -070066
Scott Zawalski20a9b582011-11-21 11:49:40 -080067# Nice setting for process, the higher the number the lower the priority.
68NICENESS = 10
69
J. Richard Barnetteea785362014-03-17 16:00:53 -070070# Maximum number of seconds to allow for offloading a single
71# directory.
J. Richard Barnette7e0f8592014-09-03 17:00:55 -070072OFFLOAD_TIMEOUT_SECS = 60 * 60
Simran Basi9523eaa2012-06-28 17:18:45 -070073
Simran Basi392d4a52012-12-14 10:29:44 -080074# Sleep time per loop.
75SLEEP_TIME_SECS = 5
76
J. Richard Barnetteea785362014-03-17 16:00:53 -070077# Minimum number of seconds between e-mail reports.
78REPORT_INTERVAL_SECS = 60 * 60
79
Scott Zawalski20a9b582011-11-21 11:49:40 -080080# Location of Autotest results on disk.
81RESULTS_DIR = '/usr/local/autotest/results'
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -080082FAILED_OFFLOADS_FILE = os.path.join(RESULTS_DIR, 'FAILED_OFFLOADS')
Scott Zawalski20a9b582011-11-21 11:49:40 -080083
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -080084FAILED_OFFLOADS_FILE_HEADER = '''
85This is the list of gs_offloader failed jobs.
86Last offloader attempt at %s failed to offload %d files.
87Check http://go/cros-triage-gsoffloader to triage the issue
88
89
90First failure Count Directory name
91=================== ====== ==============================
92'''
93# --+----1----+---- ----+ ----+----1----+----2----+----3
94
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -080095FAILED_OFFLOADS_LINE_FORMAT = '%19s %5d %-1s\n'
96FAILED_OFFLOADS_TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
Simran Basi9523eaa2012-06-28 17:18:45 -070097
Jakob Juelich24f22c22014-09-26 11:46:11 -070098USE_RSYNC_ENABLED = global_config.global_config.get_config_value(
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -080099 'CROS', 'gs_offloader_use_rsync', type=bool, default=False)
Jakob Juelich24f22c22014-09-26 11:46:11 -0700100
Dan Shi1b4c7c32015-10-05 10:38:57 -0700101LIMIT_FILE_COUNT = global_config.global_config.get_config_value(
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800102 'CROS', 'gs_offloader_limit_file_count', type=bool, default=False)
103
Michael Tang0df2eb42016-05-13 19:06:54 -0700104# Use multiprocessing for gsutil uploading.
105GS_OFFLOADER_MULTIPROCESSING = global_config.global_config.get_config_value(
106 'CROS', 'gs_offloader_multiprocessing', type=bool, default=False)
107
Ningning Xia42111242016-06-15 14:35:58 -0700108D = '[0-9][0-9]'
Michael Tang97d188c2016-06-25 11:18:42 -0700109TIMESTAMP_PATTERN = '%s%s.%s.%s_%s.%s.%s' % (D, D, D, D, D, D, D)
Ningning Xia42111242016-06-15 14:35:58 -0700110CTS_RESULT_PATTERN = 'testResult.xml'
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800111CTS_V2_RESULT_PATTERN = 'test_result.xml'
Ningning Xia42111242016-06-15 14:35:58 -0700112# Google Storage bucket URI to store results in.
113DEFAULT_CTS_RESULTS_GSURI = global_config.global_config.get_config_value(
114 'CROS', 'cts_results_server', default='')
Ningning Xia205a1d42016-06-21 16:46:28 -0700115DEFAULT_CTS_APFE_GSURI = global_config.global_config.get_config_value(
116 'CROS', 'cts_apfe_server', default='')
Rohit Makasanaea337c52018-04-11 18:03:41 -0700117DEFAULT_CTS_DELTA_RESULTS_GSURI = global_config.global_config.get_config_value(
118 'CROS', 'ctsdelta_results_server', default='')
119DEFAULT_CTS_DELTA_APFE_GSURI = global_config.global_config.get_config_value(
120 'CROS', 'ctsdelta_apfe_server', default='')
Rohit Makasana8d868c92018-06-08 11:29:50 -0700121DEFAULT_CTS_BVT_APFE_GSURI = global_config.global_config.get_config_value(
122 'CROS', 'ctsbvt_apfe_server', default='')
Dan Shi1b4c7c32015-10-05 10:38:57 -0700123
Dan Shib2751fc2017-05-16 11:05:15 -0700124# metadata type
125GS_OFFLOADER_SUCCESS_TYPE = 'gs_offloader_success'
126GS_OFFLOADER_FAILURE_TYPE = 'gs_offloader_failure'
Michael Tang97d188c2016-06-25 11:18:42 -0700127
Rohit Makasana6a7b14d2017-08-23 13:51:44 -0700128# Autotest test to collect list of CTS tests
129TEST_LIST_COLLECTOR = 'tradefed-run-collect-tests-only'
Simran Basi9523eaa2012-06-28 17:18:45 -0700130
Dan Shib2751fc2017-05-16 11:05:15 -0700131def _get_metrics_fields(dir_entry):
132 """Get metrics fields for the given test result directory, including board
133 and milestone.
134
135 @param dir_entry: Directory entry to offload.
136 @return A dictionary for the metrics data to be uploaded.
137 """
138 fields = {'board': 'unknown',
139 'milestone': 'unknown'}
140 if dir_entry:
141 # There could be multiple hosts in the job directory, use the first one
142 # available.
143 for host in glob.glob(os.path.join(dir_entry, '*')):
Dan Shi23109012017-05-28 20:23:48 -0700144 try:
145 keyval = models.test.parse_job_keyval(host)
146 except ValueError:
147 continue
Dan Shib2751fc2017-05-16 11:05:15 -0700148 build = keyval.get('build')
149 if build:
Dan Shi02dd0662017-05-23 11:24:32 -0700150 try:
151 cros_version = labellib.parse_cros_version(build)
152 fields['board'] = cros_version.board
153 fields['milestone'] = cros_version.milestone
154 break
155 except ValueError:
156 # Ignore version parsing error so it won't crash
157 # gs_offloader.
158 pass
Dan Shib2751fc2017-05-16 11:05:15 -0700159
160 return fields;
161
162
Allen Lib41527d2017-06-22 17:28:00 -0700163def _get_cmd_list(multiprocessing, dir_entry, gs_path):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800164 """Return the command to offload a specified directory.
Simran Basi9523eaa2012-06-28 17:18:45 -0700165
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800166 @param multiprocessing: True to turn on -m option for gsutil.
167 @param dir_entry: Directory entry/path that which we need a cmd_list
168 to offload.
169 @param gs_path: Location in google storage where we will
170 offload the directory.
Simran Basi9523eaa2012-06-28 17:18:45 -0700171
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800172 @return A command list to be executed by Popen.
173 """
Dan Shi365049f2017-05-28 08:00:02 +0000174 cmd = ['gsutil']
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800175 if multiprocessing:
176 cmd.append('-m')
177 if USE_RSYNC_ENABLED:
178 cmd.append('rsync')
179 target = os.path.join(gs_path, os.path.basename(dir_entry))
180 else:
181 cmd.append('cp')
182 target = gs_path
183 cmd += ['-eR', dir_entry, target]
184 return cmd
Simran Basi9523eaa2012-06-28 17:18:45 -0700185
Jakob Juelich24f22c22014-09-26 11:46:11 -0700186
Allen Lib41527d2017-06-22 17:28:00 -0700187def sanitize_dir(dirpath):
188 """Sanitize directory for gs upload.
Dan Shiaffb9222015-04-15 17:05:47 -0700189
Allen Lib41527d2017-06-22 17:28:00 -0700190 Symlinks and FIFOS are converted to regular files to fix bugs.
Dan Shiaffb9222015-04-15 17:05:47 -0700191
Allen Lib41527d2017-06-22 17:28:00 -0700192 @param dirpath: Directory entry to be sanitized.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800193 """
Allen Lib41527d2017-06-22 17:28:00 -0700194 if not os.path.exists(dirpath):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800195 return
Allen Lib41527d2017-06-22 17:28:00 -0700196 _escape_rename(dirpath)
197 _escape_rename_dir_contents(dirpath)
198 _sanitize_fifos(dirpath)
199 _sanitize_symlinks(dirpath)
200
201
202def _escape_rename_dir_contents(dirpath):
203 """Recursively rename directory to escape filenames for gs upload.
204
205 @param dirpath: Directory path string.
206 """
207 for filename in os.listdir(dirpath):
208 path = os.path.join(dirpath, filename)
209 _escape_rename(path)
210 for filename in os.listdir(dirpath):
211 path = os.path.join(dirpath, filename)
212 if os.path.isdir(path):
213 _escape_rename_dir_contents(path)
214
215
216def _escape_rename(path):
217 """Rename file to escape filenames for gs upload.
218
219 @param path: File path string.
220 """
221 dirpath, filename = os.path.split(path)
222 sanitized_filename = gslib.escape(filename)
223 sanitized_path = os.path.join(dirpath, sanitized_filename)
224 os.rename(path, sanitized_path)
225
226
227def _sanitize_fifos(dirpath):
228 """Convert fifos to regular files (fixes crbug.com/684122).
229
230 @param dirpath: Directory path string.
231 """
232 for root, _, files in os.walk(dirpath):
233 for filename in files:
234 path = os.path.join(root, filename)
235 file_stat = os.lstat(path)
Laurence Goodbyca7726d2017-02-14 17:09:07 -0800236 if stat.S_ISFIFO(file_stat.st_mode):
Allen Lib41527d2017-06-22 17:28:00 -0700237 _replace_fifo_with_file(path)
238
239
240def _replace_fifo_with_file(path):
241 """Replace a fifo with a normal file.
242
243 @param path: Fifo path string.
244 """
245 logging.debug('Removing fifo %s', path)
246 os.remove(path)
247 logging.debug('Creating marker %s', path)
248 with open(path, 'w') as f:
249 f.write('<FIFO>')
250
251
252def _sanitize_symlinks(dirpath):
253 """Convert Symlinks to regular files (fixes crbug.com/692788).
254
255 @param dirpath: Directory path string.
256 """
257 for root, _, files in os.walk(dirpath):
258 for filename in files:
259 path = os.path.join(root, filename)
260 file_stat = os.lstat(path)
261 if stat.S_ISLNK(file_stat.st_mode):
262 _replace_symlink_with_file(path)
263
264
265def _replace_symlink_with_file(path):
266 """Replace a symlink with a normal file.
267
268 @param path: Symlink path string.
269 """
270 target = os.readlink(path)
271 logging.debug('Removing symlink %s', path)
272 os.remove(path)
273 logging.debug('Creating marker %s', path)
274 with open(path, 'w') as f:
275 f.write('<symlink to %s>' % target)
276
277
278# Maximum number of files in the folder.
Shuhei Takahashia9cc41e2018-06-04 16:22:25 +0900279_MAX_FILE_COUNT = 3000
Allen Lib41527d2017-06-22 17:28:00 -0700280_FOLDERS_NEVER_ZIP = ['debug', 'ssp_logs', 'autoupdate_logs']
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800281
282
283def _get_zippable_folders(dir_entry):
284 folders_list = []
285 for folder in os.listdir(dir_entry):
286 folder_path = os.path.join(dir_entry, folder)
287 if (not os.path.isfile(folder_path) and
Allen Lib41527d2017-06-22 17:28:00 -0700288 not folder in _FOLDERS_NEVER_ZIP):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800289 folders_list.append(folder_path)
290 return folders_list
Dan Shiaffb9222015-04-15 17:05:47 -0700291
292
Dan Shi1b4c7c32015-10-05 10:38:57 -0700293def limit_file_count(dir_entry):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800294 """Limit the number of files in given directory.
Dan Shi1b4c7c32015-10-05 10:38:57 -0700295
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800296 The method checks the total number of files in the given directory.
Allen Lib41527d2017-06-22 17:28:00 -0700297 If the number is greater than _MAX_FILE_COUNT, the method will
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800298 compress each folder in the given directory, except folders in
Allen Lib41527d2017-06-22 17:28:00 -0700299 _FOLDERS_NEVER_ZIP.
Dan Shi1b4c7c32015-10-05 10:38:57 -0700300
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800301 @param dir_entry: Directory entry to be checked.
302 """
Dan Shi1b4c7c32015-10-05 10:38:57 -0700303 try:
Allen Lib41527d2017-06-22 17:28:00 -0700304 count = _count_files(dir_entry)
305 except ValueError:
Prathmesh Prabhu8f85cd22017-02-01 13:04:58 -0800306 logging.warning('Fail to get the file count in folder %s.', dir_entry)
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800307 return
Allen Lib41527d2017-06-22 17:28:00 -0700308 if count < _MAX_FILE_COUNT:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800309 return
310
311 # For test job, zip folders in a second level, e.g. 123-debug/host1.
312 # This is to allow autoserv debug folder still be accessible.
313 # For special task, it does not need to dig one level deeper.
314 is_special_task = re.match(job_directories.SPECIAL_TASK_PATTERN,
315 dir_entry)
316
317 folders = _get_zippable_folders(dir_entry)
318 if not is_special_task:
319 subfolders = []
320 for folder in folders:
321 subfolders.extend(_get_zippable_folders(folder))
322 folders = subfolders
323
324 for folder in folders:
Allen Lib41527d2017-06-22 17:28:00 -0700325 _make_into_tarball(folder)
326
327
328def _count_files(dirpath):
329 """Count the number of files in a directory recursively.
330
331 @param dirpath: Directory path string.
332 """
333 return sum(len(files) for _path, _dirs, files in os.walk(dirpath))
334
335
336def _make_into_tarball(dirpath):
337 """Make directory into tarball.
338
339 @param dirpath: Directory path string.
340 """
341 tarpath = '%s.tgz' % dirpath
342 with tarfile.open(tarpath, 'w:gz') as tar:
343 tar.add(dirpath, arcname=os.path.basename(dirpath))
344 shutil.rmtree(dirpath)
Dan Shi1b4c7c32015-10-05 10:38:57 -0700345
346
Dan Shie4a4f9f2015-07-20 09:00:25 -0700347def correct_results_folder_permission(dir_entry):
348 """Make sure the results folder has the right permission settings.
349
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800350 For tests running with server-side packaging, the results folder has
351 the owner of root. This must be changed to the user running the
352 autoserv process, so parsing job can access the results folder.
Dan Shie4a4f9f2015-07-20 09:00:25 -0700353
354 @param dir_entry: Path to the results folder.
Dan Shie4a4f9f2015-07-20 09:00:25 -0700355 """
356 if not dir_entry:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800357 return
Prathmesh Prabhu6c4ed332017-01-30 15:51:43 -0800358
359 logging.info('Trying to correct file permission of %s.', dir_entry)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700360 try:
Dan Shiebcd8732017-10-09 14:54:52 -0700361 owner = '%s:%s' % (os.getuid(), os.getgid())
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800362 subprocess.check_call(
Dan Shiebcd8732017-10-09 14:54:52 -0700363 ['sudo', '-n', 'chown', '-R', owner, dir_entry])
Richard Barnette2979df32018-01-09 14:59:58 -0800364 subprocess.check_call(['chmod', '-R', 'u+r', dir_entry])
365 subprocess.check_call(
366 ['find', dir_entry, '-type', 'd',
367 '-exec', 'chmod', 'u+x', '{}', ';'])
Dan Shie4a4f9f2015-07-20 09:00:25 -0700368 except subprocess.CalledProcessError as e:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800369 logging.error('Failed to modify permission for %s: %s',
370 dir_entry, e)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700371
372
Allen Lib41527d2017-06-22 17:28:00 -0700373def _upload_cts_testresult(dir_entry, multiprocessing):
Ningning Xia2d88eec2016-07-25 23:18:46 -0700374 """Upload test results to separate gs buckets.
Ningning Xia42111242016-06-15 14:35:58 -0700375
Ilja H. Friedelbfa63142017-01-26 00:56:29 -0800376 Upload testResult.xml.gz/test_result.xml.gz file to cts_results_bucket.
Ningning Xia205a1d42016-06-21 16:46:28 -0700377 Upload timestamp.zip to cts_apfe_bucket.
Ningning Xia8db632f2016-08-19 11:01:35 -0700378
Ningning Xia42111242016-06-15 14:35:58 -0700379 @param dir_entry: Path to the results folder.
380 @param multiprocessing: True to turn on -m option for gsutil.
381 """
Ningning Xia2d981ee2016-07-06 17:59:54 -0700382 for host in glob.glob(os.path.join(dir_entry, '*')):
Ningning Xia2d88eec2016-07-25 23:18:46 -0700383 cts_path = os.path.join(host, 'cheets_CTS.*', 'results', '*',
384 TIMESTAMP_PATTERN)
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800385 cts_v2_path = os.path.join(host, 'cheets_CTS_*', 'results', '*',
386 TIMESTAMP_PATTERN)
Rohit Makasanaea337c52018-04-11 18:03:41 -0700387 gts_v2_path = os.path.join(host, 'cheets_GTS*', 'results', '*',
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800388 TIMESTAMP_PATTERN)
Ningning Xia2d88eec2016-07-25 23:18:46 -0700389 for result_path, result_pattern in [(cts_path, CTS_RESULT_PATTERN),
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800390 (cts_v2_path, CTS_V2_RESULT_PATTERN),
391 (gts_v2_path, CTS_V2_RESULT_PATTERN)]:
Ningning Xia2d88eec2016-07-25 23:18:46 -0700392 for path in glob.glob(result_path):
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700393 try:
Rohit Makasana8d868c92018-06-08 11:29:50 -0700394 # CTS results from bvt-arc suites need to be only uploaded
395 # to APFE from its designated gs bucket for early EDI
396 # entries in APFE. These results need to copied only into
397 # APFE bucket. Copying to results bucket is not required.
398 if 'bvt-arc' in path:
399 _upload_files(host, path, result_pattern,
400 multiprocessing,
401 None,
402 DEFAULT_CTS_BVT_APFE_GSURI)
403 return
404 # Non-bvt CTS results need to be uploaded to standard gs
405 # buckets.
406 _upload_files(host, path, result_pattern,
407 multiprocessing,
408 DEFAULT_CTS_RESULTS_GSURI,
409 DEFAULT_CTS_APFE_GSURI)
Rohit Makasanaea337c52018-04-11 18:03:41 -0700410 # TODO(rohitbm): make better comparison using regex.
Rohit Makasana8d868c92018-06-08 11:29:50 -0700411 # plan_follower CTS results go to plan_follower specific
412 # gs buckets apart from standard gs buckets.
Rohit Makasanaea337c52018-04-11 18:03:41 -0700413 if 'plan_follower' in path:
Rohit Makasana8d868c92018-06-08 11:29:50 -0700414 _upload_files(host, path, result_pattern,
415 multiprocessing,
Rohit Makasanaea337c52018-04-11 18:03:41 -0700416 DEFAULT_CTS_DELTA_RESULTS_GSURI,
417 DEFAULT_CTS_DELTA_APFE_GSURI)
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700418 except Exception as e:
419 logging.error('ERROR uploading test results %s to GS: %s',
420 path, e)
Ningning Xia205a1d42016-06-21 16:46:28 -0700421
Ningning Xia205a1d42016-06-21 16:46:28 -0700422
Ningning Xia8db632f2016-08-19 11:01:35 -0700423def _is_valid_result(build, result_pattern, suite):
424 """Check if the result should be uploaded to CTS/GTS buckets.
425
426 @param build: Builder name.
427 @param result_pattern: XML result file pattern.
428 @param suite: Test suite name.
429
430 @returns: Bool flag indicating whether a valid result.
431 """
432 if build is None or suite is None:
433 return False
434
435 # Not valid if it's not a release build.
436 if not re.match(r'(?!trybot-).*-release/.*', build):
437 return False
438
Ilja H. Friedelad6d8792016-11-28 21:53:44 -0800439 # Not valid if it's cts result but not 'arc-cts*' or 'test_that_wrapper'
440 # suite.
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800441 result_patterns = [CTS_RESULT_PATTERN, CTS_V2_RESULT_PATTERN]
Ilja H. Friedel61a70d32017-05-20 01:43:02 -0700442 if result_pattern in result_patterns and not (
Rohit Makasana8d868c92018-06-08 11:29:50 -0700443 suite.startswith('arc-cts') or
444 suite.startswith('arc-gts') or
445 suite.startswith('bvt-arc') or
Ilja H. Friedel61a70d32017-05-20 01:43:02 -0700446 suite.startswith('test_that_wrapper')):
Ningning Xia8db632f2016-08-19 11:01:35 -0700447 return False
448
449 return True
Ningning Xia21922c82016-07-29 11:03:15 -0700450
451
Rohit Makasana6a7b14d2017-08-23 13:51:44 -0700452def _is_test_collector(package):
453 """Returns true if the test run is just to collect list of CTS tests.
454
455 @param package: Autotest package name. e.g. cheets_CTS_N.CtsGraphicsTestCase
456
457 @return Bool flag indicating a test package is CTS list generator or not.
458 """
459 return TEST_LIST_COLLECTOR in package
460
461
Rohit Makasanaea337c52018-04-11 18:03:41 -0700462def _upload_files(host, path, result_pattern, multiprocessing,
463 result_gs_bucket, apfe_gs_bucket):
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700464 keyval = models.test.parse_job_keyval(host)
Ningning Xia8db632f2016-08-19 11:01:35 -0700465 build = keyval.get('build')
466 suite = keyval.get('suite')
Ningning Xia42111242016-06-15 14:35:58 -0700467
Ningning Xia8db632f2016-08-19 11:01:35 -0700468 if not _is_valid_result(build, result_pattern, suite):
469 # No need to upload current folder, return.
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700470 return
Ningning Xia21922c82016-07-29 11:03:15 -0700471
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700472 parent_job_id = str(keyval['parent_job_id'])
Ningning Xia21922c82016-07-29 11:03:15 -0700473
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700474 folders = path.split(os.sep)
475 job_id = folders[-6]
476 package = folders[-4]
477 timestamp = folders[-1]
Ningning Xia2d981ee2016-07-06 17:59:54 -0700478
Rohit Makasana6a7b14d2017-08-23 13:51:44 -0700479 # Results produced by CTS test list collector are dummy results.
480 # They don't need to be copied to APFE bucket which is mainly being used for
481 # CTS APFE submission.
482 if not _is_test_collector(package):
483 # Path: bucket/build/parent_job_id/cheets_CTS.*/job_id_timestamp/
484 # or bucket/build/parent_job_id/cheets_GTS.*/job_id_timestamp/
485 cts_apfe_gs_path = os.path.join(
Rohit Makasanaea337c52018-04-11 18:03:41 -0700486 apfe_gs_bucket, build, parent_job_id,
Rohit Makasana6a7b14d2017-08-23 13:51:44 -0700487 package, job_id + '_' + timestamp) + '/'
488
489 for zip_file in glob.glob(os.path.join('%s.zip' % path)):
490 utils.run(' '.join(_get_cmd_list(
491 multiprocessing, zip_file, cts_apfe_gs_path)))
492 logging.debug('Upload %s to %s ', zip_file, cts_apfe_gs_path)
493 else:
494 logging.debug('%s is a CTS Test collector Autotest test run.', package)
495 logging.debug('Skipping CTS results upload to APFE gs:// bucket.')
Ningning Xia2d981ee2016-07-06 17:59:54 -0700496
Rohit Makasana8d868c92018-06-08 11:29:50 -0700497 if result_gs_bucket:
498 # Path: bucket/cheets_CTS.*/job_id_timestamp/
499 # or bucket/cheets_GTS.*/job_id_timestamp/
500 test_result_gs_path = os.path.join(
501 result_gs_bucket, package, job_id + '_' + timestamp) + '/'
Ningning Xia42111242016-06-15 14:35:58 -0700502
Rohit Makasana8d868c92018-06-08 11:29:50 -0700503 for test_result_file in glob.glob(os.path.join(path, result_pattern)):
504 # gzip test_result_file(testResult.xml/test_result.xml)
Ilja H. Friedelbfa63142017-01-26 00:56:29 -0800505
Rohit Makasana8d868c92018-06-08 11:29:50 -0700506 test_result_file_gz = '%s.gz' % test_result_file
507 with open(test_result_file, 'r') as f_in, (
508 gzip.open(test_result_file_gz, 'w')) as f_out:
509 shutil.copyfileobj(f_in, f_out)
510 utils.run(' '.join(_get_cmd_list(
511 multiprocessing, test_result_file_gz, test_result_gs_path)))
512 logging.debug('Zip and upload %s to %s',
513 test_result_file_gz, test_result_gs_path)
514 # Remove test_result_file_gz(testResult.xml.gz/test_result.xml.gz)
515 os.remove(test_result_file_gz)
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700516
Ningning Xia42111242016-06-15 14:35:58 -0700517
Aviv Keshet1d8df7d2017-04-20 12:35:31 -0700518def _emit_gs_returncode_metric(returncode):
519 """Increment the gs_returncode counter based on |returncode|."""
520 m_gs_returncode = 'chromeos/autotest/gs_offloader/gs_returncode'
521 rcode = int(returncode)
522 if rcode < 0 or rcode > 255:
523 rcode = -1
524 metrics.Counter(m_gs_returncode).increment(fields={'return_code': rcode})
525
526
Dan Shiebcd8732017-10-09 14:54:52 -0700527def _handle_dir_os_error(dir_entry, fix_permission=False):
528 """Try to fix the result directory's permission issue if needed.
529
530 @param dir_entry: Directory entry to offload.
531 @param fix_permission: True to change the directory's owner to the same one
532 running gs_offloader.
533 """
534 if fix_permission:
535 correct_results_folder_permission(dir_entry)
536 m_permission_error = ('chromeos/autotest/errors/gs_offloader/'
537 'wrong_permissions_count')
538 metrics_fields = _get_metrics_fields(dir_entry)
539 metrics.Counter(m_permission_error).increment(fields=metrics_fields)
540
541
Allen Lib41527d2017-06-22 17:28:00 -0700542class BaseGSOffloader(object):
Simran Basi9523eaa2012-06-28 17:18:45 -0700543
Allen Lib41527d2017-06-22 17:28:00 -0700544 """Google Storage offloader interface."""
J. Richard Barnetteea785362014-03-17 16:00:53 -0700545
Allen Lib41527d2017-06-22 17:28:00 -0700546 __metaclass__ = abc.ABCMeta
547
Allen Lib41527d2017-06-22 17:28:00 -0700548 def offload(self, dir_entry, dest_path, job_complete_time):
Richard Barnettedd8726b2018-01-09 15:23:03 -0800549 """Safely offload a directory entry to Google Storage.
550
551 This method is responsible for copying the contents of
552 `dir_entry` to Google storage at `dest_path`.
553
554 When successful, the method must delete all of `dir_entry`.
555 On failure, `dir_entry` should be left undisturbed, in order
556 to allow for retry.
557
558 Errors are conveyed simply and solely by two methods:
559 * At the time of failure, write enough information to the log
560 to allow later debug, if necessary.
561 * Don't delete the content.
562
563 In order to guarantee robustness, this method must not raise any
564 exceptions.
565
566 @param dir_entry: Directory entry to offload.
567 @param dest_path: Location in google storage where we will
568 offload the directory.
569 @param job_complete_time: The complete time of the job from the AFE
570 database.
571 """
572 try:
573 self._full_offload(dir_entry, dest_path, job_complete_time)
574 except Exception as e:
575 logging.debug('Exception in offload for %s', dir_entry)
576 logging.debug('Ignoring this error: %s', str(e))
577
578 @abc.abstractmethod
579 def _full_offload(self, dir_entry, dest_path, job_complete_time):
Allen Lib41527d2017-06-22 17:28:00 -0700580 """Offload a directory entry to Google Storage.
581
Richard Barnettedd8726b2018-01-09 15:23:03 -0800582 This method implements the actual offload behavior of its
583 subclass. To guarantee effective debug, this method should
584 catch all exceptions, and perform any reasonable diagnosis
585 or other handling.
586
Allen Lib41527d2017-06-22 17:28:00 -0700587 @param dir_entry: Directory entry to offload.
588 @param dest_path: Location in google storage where we will
589 offload the directory.
590 @param job_complete_time: The complete time of the job from the AFE
591 database.
592 """
593
594
595class GSOffloader(BaseGSOffloader):
596 """Google Storage Offloader."""
597
Michael Tang0f553bd2017-06-16 17:38:45 -0700598 def __init__(self, gs_uri, multiprocessing, delete_age,
599 console_client=None):
Allen Lib41527d2017-06-22 17:28:00 -0700600 """Returns the offload directory function for the given gs_uri
601
602 @param gs_uri: Google storage bucket uri to offload to.
603 @param multiprocessing: True to turn on -m option for gsutil.
Michael Tang0f553bd2017-06-16 17:38:45 -0700604 @param console_client: The cloud console client. If None,
605 cloud console APIs are not called.
Allen Lib41527d2017-06-22 17:28:00 -0700606 """
607 self._gs_uri = gs_uri
608 self._multiprocessing = multiprocessing
609 self._delete_age = delete_age
Michael Tang0f553bd2017-06-16 17:38:45 -0700610 self._console_client = console_client
Dan Shib2751fc2017-05-16 11:05:15 -0700611
Prathmesh Prabhueeaa7ef2017-01-30 17:17:06 -0800612 @metrics.SecondsTimerDecorator(
613 'chromeos/autotest/gs_offloader/job_offload_duration')
Richard Barnettedd8726b2018-01-09 15:23:03 -0800614 def _full_offload(self, dir_entry, dest_path, job_complete_time):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800615 """Offload the specified directory entry to Google storage.
Jakob Juelichc17f3112014-09-11 14:32:30 -0700616
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800617 @param dir_entry: Directory entry to offload.
618 @param dest_path: Location in google storage where we will
619 offload the directory.
Keith Haddow5ba5fb82016-11-09 11:39:36 -0800620 @param job_complete_time: The complete time of the job from the AFE
621 database.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800622 """
Allen Lib41527d2017-06-22 17:28:00 -0700623 with tempfile.TemporaryFile('w+') as stdout_file, \
624 tempfile.TemporaryFile('w+') as stderr_file:
625 try:
Dan Shiebcd8732017-10-09 14:54:52 -0700626 try:
Richard Barnettedd8726b2018-01-09 15:23:03 -0800627 self._try_offload(dir_entry, dest_path, stdout_file,
628 stderr_file)
Dan Shiebcd8732017-10-09 14:54:52 -0700629 except OSError as e:
630 # Correct file permission error of the directory, then raise
631 # the exception so gs_offloader can retry later.
632 _handle_dir_os_error(dir_entry, e.errno==errno.EACCES)
633 # Try again after the permission issue is fixed.
Richard Barnettedd8726b2018-01-09 15:23:03 -0800634 self._try_offload(dir_entry, dest_path, stdout_file,
635 stderr_file)
Allen Lib41527d2017-06-22 17:28:00 -0700636 except _OffloadError as e:
637 metrics_fields = _get_metrics_fields(dir_entry)
Aviv Keshet1d8df7d2017-04-20 12:35:31 -0700638 m_any_error = 'chromeos/autotest/errors/gs_offloader/any_error'
Dan Shib2751fc2017-05-16 11:05:15 -0700639 metrics.Counter(m_any_error).increment(fields=metrics_fields)
640
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800641 # Rewind the log files for stdout and stderr and log
642 # their contents.
643 stdout_file.seek(0)
644 stderr_file.seek(0)
645 stderr_content = stderr_file.read()
Prathmesh Prabhu867cec52017-01-30 15:58:12 -0800646 logging.warning('Error occurred when offloading %s:', dir_entry)
647 logging.warning('Stdout:\n%s \nStderr:\n%s', stdout_file.read(),
648 stderr_content)
Dan Shib2751fc2017-05-16 11:05:15 -0700649
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800650 # Some result files may have wrong file permission. Try
651 # to correct such error so later try can success.
652 # TODO(dshi): The code is added to correct result files
653 # with wrong file permission caused by bug 511778. After
654 # this code is pushed to lab and run for a while to
655 # clean up these files, following code and function
656 # correct_results_folder_permission can be deleted.
657 if 'CommandException: Error opening file' in stderr_content:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800658 correct_results_folder_permission(dir_entry)
Allen Lib41527d2017-06-22 17:28:00 -0700659 else:
660 self._prune(dir_entry, job_complete_time)
Dan Shib2751fc2017-05-16 11:05:15 -0700661
Richard Barnettedd8726b2018-01-09 15:23:03 -0800662 def _try_offload(self, dir_entry, dest_path,
Allen Lib41527d2017-06-22 17:28:00 -0700663 stdout_file, stderr_file):
664 """Offload the specified directory entry to Google storage.
665
666 @param dir_entry: Directory entry to offload.
667 @param dest_path: Location in google storage where we will
668 offload the directory.
669 @param job_complete_time: The complete time of the job from the AFE
670 database.
671 @param stdout_file: Log file.
672 @param stderr_file: Log file.
673 """
674 if _is_uploaded(dir_entry):
675 return
676 start_time = time.time()
677 metrics_fields = _get_metrics_fields(dir_entry)
Prathmesh Prabhuf92502b2018-05-11 17:40:12 -0700678 error_obj = _OffloadError(start_time)
Allen Lib41527d2017-06-22 17:28:00 -0700679 try:
680 sanitize_dir(dir_entry)
681 if DEFAULT_CTS_RESULTS_GSURI:
682 _upload_cts_testresult(dir_entry, self._multiprocessing)
683
684 if LIMIT_FILE_COUNT:
685 limit_file_count(dir_entry)
Allen Lib41527d2017-06-22 17:28:00 -0700686
687 process = None
688 with timeout_util.Timeout(OFFLOAD_TIMEOUT_SECS):
689 gs_path = '%s%s' % (self._gs_uri, dest_path)
690 process = subprocess.Popen(
691 _get_cmd_list(self._multiprocessing, dir_entry, gs_path),
692 stdout=stdout_file, stderr=stderr_file)
693 process.wait()
694
695 _emit_gs_returncode_metric(process.returncode)
696 if process.returncode != 0:
697 raise error_obj
698 _emit_offload_metrics(dir_entry)
Allen Lib41527d2017-06-22 17:28:00 -0700699
Michael Tang0f553bd2017-06-16 17:38:45 -0700700 if self._console_client:
701 gcs_uri = os.path.join(gs_path,
702 os.path.basename(dir_entry))
703 if not self._console_client.send_test_job_offloaded_message(
704 gcs_uri):
Allen Lib41527d2017-06-22 17:28:00 -0700705 raise error_obj
Michael Tang0f553bd2017-06-16 17:38:45 -0700706
Allen Lib41527d2017-06-22 17:28:00 -0700707 _mark_uploaded(dir_entry)
708 except timeout_util.TimeoutError:
709 m_timeout = 'chromeos/autotest/errors/gs_offloader/timed_out_count'
710 metrics.Counter(m_timeout).increment(fields=metrics_fields)
711 # If we finished the call to Popen(), we may need to
712 # terminate the child process. We don't bother calling
713 # process.poll(); that inherently races because the child
714 # can die any time it wants.
715 if process:
716 try:
717 process.terminate()
718 except OSError:
719 # We don't expect any error other than "No such
720 # process".
721 pass
722 logging.error('Offloading %s timed out after waiting %d '
723 'seconds.', dir_entry, OFFLOAD_TIMEOUT_SECS)
724 raise error_obj
725
726 def _prune(self, dir_entry, job_complete_time):
727 """Prune directory if it is uploaded and expired.
728
729 @param dir_entry: Directory entry to offload.
730 @param job_complete_time: The complete time of the job from the AFE
731 database.
732 """
733 if not (_is_uploaded(dir_entry)
734 and job_directories.is_job_expired(self._delete_age,
735 job_complete_time)):
736 return
737 try:
738 shutil.rmtree(dir_entry)
739 except OSError as e:
Dan Shiebcd8732017-10-09 14:54:52 -0700740 # The wrong file permission can lead call `shutil.rmtree(dir_entry)`
741 # to raise OSError with message 'Permission denied'. Details can be
742 # found in crbug.com/536151
743 _handle_dir_os_error(dir_entry, e.errno==errno.EACCES)
744 # Try again after the permission issue is fixed.
745 shutil.rmtree(dir_entry)
Simran Basi9523eaa2012-06-28 17:18:45 -0700746
Scott Zawalski20a9b582011-11-21 11:49:40 -0800747
Allen Lib41527d2017-06-22 17:28:00 -0700748class _OffloadError(Exception):
749 """Google Storage offload failed."""
Simran Basibd9ded02013-11-04 15:49:11 -0800750
Prathmesh Prabhuf92502b2018-05-11 17:40:12 -0700751 def __init__(self, start_time):
752 super(_OffloadError, self).__init__(start_time)
Allen Lib41527d2017-06-22 17:28:00 -0700753 self.start_time = start_time
Simran Basibd9ded02013-11-04 15:49:11 -0800754
Allen Lib41527d2017-06-22 17:28:00 -0700755
756
757class FakeGSOffloader(BaseGSOffloader):
758
759 """Fake Google Storage Offloader that only deletes directories."""
760
Richard Barnettedd8726b2018-01-09 15:23:03 -0800761 def _full_offload(self, dir_entry, dest_path, job_complete_time):
Allen Lib41527d2017-06-22 17:28:00 -0700762 """Pretend to offload a directory and delete it.
763
764 @param dir_entry: Directory entry to offload.
765 @param dest_path: Location in google storage where we will
766 offload the directory.
767 @param job_complete_time: The complete time of the job from the AFE
768 database.
769 """
770 shutil.rmtree(dir_entry)
771
772
773def _is_expired(job, age_limit):
774 """Return whether job directory is expired for uploading
775
776 @param job: _JobDirectory instance.
777 @param age_limit: Minimum age in days at which a job may be offloaded.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800778 """
Allen Lib41527d2017-06-22 17:28:00 -0700779 job_timestamp = job.get_timestamp_if_finished()
780 if not job_timestamp:
781 return False
782 return job_directories.is_job_expired(age_limit, job_timestamp)
783
784
785def _emit_offload_metrics(dirpath):
786 """Emit gs offload metrics.
787
788 @param dirpath: Offloaded directory path.
789 """
790 dir_size = file_utils.get_directory_size_kibibytes(dirpath)
791 metrics_fields = _get_metrics_fields(dirpath)
792
793 m_offload_count = (
794 'chromeos/autotest/gs_offloader/jobs_offloaded')
795 metrics.Counter(m_offload_count).increment(
796 fields=metrics_fields)
797 m_offload_size = ('chromeos/autotest/gs_offloader/'
798 'kilobytes_transferred')
799 metrics.Counter(m_offload_size).increment_by(
800 dir_size, fields=metrics_fields)
Simran Basibd9ded02013-11-04 15:49:11 -0800801
802
Allen Li9579b382017-05-05 17:07:43 -0700803def _is_uploaded(dirpath):
804 """Return whether directory has been uploaded.
805
806 @param dirpath: Directory path string.
807 """
808 return os.path.isfile(_get_uploaded_marker_file(dirpath))
809
810
811def _mark_uploaded(dirpath):
812 """Mark directory as uploaded.
813
814 @param dirpath: Directory path string.
815 """
816 with open(_get_uploaded_marker_file(dirpath), 'a'):
817 pass
818
819
820def _get_uploaded_marker_file(dirpath):
821 """Return path to upload marker file for directory.
822
823 @param dirpath: Directory path string.
824 """
825 return '%s/.GS_UPLOADED' % (dirpath,)
826
827
Prathmesh Prabhufda271a2017-01-30 17:53:12 -0800828def _format_job_for_failure_reporting(job):
829 """Formats a _JobDirectory for reporting / logging.
830
831 @param job: The _JobDirectory to format.
832 """
Allen Lib41527d2017-06-22 17:28:00 -0700833 d = datetime.datetime.fromtimestamp(job.first_offload_start)
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -0800834 data = (d.strftime(FAILED_OFFLOADS_TIME_FORMAT),
Allen Lib41527d2017-06-22 17:28:00 -0700835 job.offload_count,
836 job.dirname)
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -0800837 return FAILED_OFFLOADS_LINE_FORMAT % data
Prathmesh Prabhufda271a2017-01-30 17:53:12 -0800838
839
Simran Basiac0edb22015-04-23 16:15:51 -0700840def wait_for_gs_write_access(gs_uri):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800841 """Verify and wait until we have write access to Google Storage.
Simran Basiac0edb22015-04-23 16:15:51 -0700842
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800843 @param gs_uri: The Google Storage URI we are trying to offload to.
844 """
845 # TODO (sbasi) Try to use the gsutil command to check write access.
846 # Ensure we have write access to gs_uri.
847 dummy_file = tempfile.NamedTemporaryFile()
Allen Lib41527d2017-06-22 17:28:00 -0700848 test_cmd = _get_cmd_list(False, dummy_file.name, gs_uri)
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800849 while True:
850 try:
851 subprocess.check_call(test_cmd)
852 subprocess.check_call(
Dan Shi365049f2017-05-28 08:00:02 +0000853 ['gsutil', 'rm',
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800854 os.path.join(gs_uri,
855 os.path.basename(dummy_file.name))])
856 break
857 except subprocess.CalledProcessError:
858 logging.debug('Unable to offload to %s, sleeping.', gs_uri)
859 time.sleep(120)
Simran Basiac0edb22015-04-23 16:15:51 -0700860
861
J. Richard Barnetteea785362014-03-17 16:00:53 -0700862class Offloader(object):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800863 """State of the offload process.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700864
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800865 Contains the following member fields:
Allen Lib41527d2017-06-22 17:28:00 -0700866 * _gs_offloader: _BaseGSOffloader to use to offload a job directory.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800867 * _jobdir_classes: List of classes of job directory to be
868 offloaded.
869 * _processes: Maximum number of outstanding offload processes
870 to allow during an offload cycle.
871 * _age_limit: Minimum age in days at which a job may be
872 offloaded.
873 * _open_jobs: a dictionary mapping directory paths to Job
874 objects.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700875 """
J. Richard Barnetteea785362014-03-17 16:00:53 -0700876
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800877 def __init__(self, options):
Keith Haddow5ba5fb82016-11-09 11:39:36 -0800878 self._upload_age_limit = options.age_to_upload
879 self._delete_age_limit = options.age_to_delete
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800880 if options.delete_only:
Allen Lib41527d2017-06-22 17:28:00 -0700881 self._gs_offloader = FakeGSOffloader()
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800882 else:
883 self.gs_uri = utils.get_offload_gsuri()
884 logging.debug('Offloading to: %s', self.gs_uri)
Michael Tang0df2eb42016-05-13 19:06:54 -0700885 multiprocessing = False
886 if options.multiprocessing:
887 multiprocessing = True
888 elif options.multiprocessing is None:
889 multiprocessing = GS_OFFLOADER_MULTIPROCESSING
890 logging.info(
891 'Offloader multiprocessing is set to:%r', multiprocessing)
Michael Tang0f553bd2017-06-16 17:38:45 -0700892 console_client = None
Michael Tange8bc9592017-07-06 10:59:32 -0700893 if (cloud_console_client and
894 cloud_console_client.is_cloud_notification_enabled()):
Michael Tang0f553bd2017-06-16 17:38:45 -0700895 console_client = cloud_console_client.PubSubBasedClient()
Allen Lib41527d2017-06-22 17:28:00 -0700896 self._gs_offloader = GSOffloader(
Keith Haddow5ba5fb82016-11-09 11:39:36 -0800897 self.gs_uri, multiprocessing, self._delete_age_limit,
Michael Tang0f553bd2017-06-16 17:38:45 -0700898 console_client)
Allen Li7402f092018-06-26 15:42:21 -0700899 classlist = [
900 job_directories.SwarmingJobDirectory,
901 ]
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800902 if options.process_hosts_only or options.process_all:
903 classlist.append(job_directories.SpecialJobDirectory)
904 if not options.process_hosts_only:
905 classlist.append(job_directories.RegularJobDirectory)
906 self._jobdir_classes = classlist
907 assert self._jobdir_classes
908 self._processes = options.parallelism
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800909 self._open_jobs = {}
Michael Tang97d188c2016-06-25 11:18:42 -0700910 self._pusub_topic = None
Allen Li0be2f2d2017-05-15 15:53:21 -0700911 self._offload_count_limit = 3
J. Richard Barnetteea785362014-03-17 16:00:53 -0700912
J. Richard Barnetteea785362014-03-17 16:00:53 -0700913
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800914 def _add_new_jobs(self):
915 """Find new job directories that need offloading.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700916
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800917 Go through the file system looking for valid job directories
918 that are currently not in `self._open_jobs`, and add them in.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700919
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800920 """
921 new_job_count = 0
922 for cls in self._jobdir_classes:
923 for resultsdir in cls.get_job_directories():
Keith Haddow3bc3be02018-07-13 10:36:08 -0700924 if resultsdir in self._open_jobs:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800925 continue
926 self._open_jobs[resultsdir] = cls(resultsdir)
927 new_job_count += 1
928 logging.debug('Start of offload cycle - found %d new jobs',
929 new_job_count)
J. Richard Barnetteea785362014-03-17 16:00:53 -0700930
J. Richard Barnetteea785362014-03-17 16:00:53 -0700931
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800932 def _remove_offloaded_jobs(self):
933 """Removed offloaded jobs from `self._open_jobs`."""
934 removed_job_count = 0
935 for jobkey, job in self._open_jobs.items():
Allen Lib41527d2017-06-22 17:28:00 -0700936 if (
937 not os.path.exists(job.dirname)
938 or _is_uploaded(job.dirname)):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800939 del self._open_jobs[jobkey]
940 removed_job_count += 1
941 logging.debug('End of offload cycle - cleared %d new jobs, '
942 'carrying %d open jobs',
943 removed_job_count, len(self._open_jobs))
J. Richard Barnetteea785362014-03-17 16:00:53 -0700944
J. Richard Barnetteea785362014-03-17 16:00:53 -0700945
Allen Lib41527d2017-06-22 17:28:00 -0700946 def _report_failed_jobs(self):
947 """Report status after attempting offload.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800948
949 This function processes all jobs in `self._open_jobs`, assuming
950 an attempt has just been made to offload all of them.
951
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800952 If any jobs have reportable errors, and we haven't generated
953 an e-mail report in the last `REPORT_INTERVAL_SECS` seconds,
954 send new e-mail describing the failures.
955
956 """
Prathmesh Prabhu343d1712017-01-30 16:54:15 -0800957 failed_jobs = [j for j in self._open_jobs.values() if
Allen Lib41527d2017-06-22 17:28:00 -0700958 j.first_offload_start]
Prathmesh Prabhuea869732017-01-30 17:04:25 -0800959 self._report_failed_jobs_count(failed_jobs)
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -0800960 self._log_failed_jobs_locally(failed_jobs)
Prathmesh Prabhu343d1712017-01-30 16:54:15 -0800961
962
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800963 def offload_once(self):
964 """Perform one offload cycle.
965
966 Find all job directories for new jobs that we haven't seen
967 before. Then, attempt to offload the directories for any
968 jobs that have finished running. Offload of multiple jobs
969 is done in parallel, up to `self._processes` at a time.
970
971 After we've tried uploading all directories, go through the list
972 checking the status of all uploaded directories. If necessary,
973 report failures via e-mail.
974
975 """
976 self._add_new_jobs()
Prathmesh Prabhuc9856852017-01-30 16:52:59 -0800977 self._report_current_jobs_count()
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800978 with parallel.BackgroundTaskRunner(
Allen Lib41527d2017-06-22 17:28:00 -0700979 self._gs_offloader.offload, processes=self._processes) as queue:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800980 for job in self._open_jobs.values():
Allen Lib41527d2017-06-22 17:28:00 -0700981 _enqueue_offload(job, queue, self._upload_age_limit)
Allen Li0be2f2d2017-05-15 15:53:21 -0700982 self._give_up_on_jobs_over_limit()
Allen Lib41527d2017-06-22 17:28:00 -0700983 self._remove_offloaded_jobs()
984 self._report_failed_jobs()
Scott Zawalski20a9b582011-11-21 11:49:40 -0800985
986
Allen Li0be2f2d2017-05-15 15:53:21 -0700987 def _give_up_on_jobs_over_limit(self):
988 """Give up on jobs that have gone over the offload limit.
989
990 We mark them as uploaded as we won't try to offload them any more.
991 """
992 for job in self._open_jobs.values():
Allen Li808828b2017-06-23 13:36:41 -0700993 if job.offload_count >= self._offload_count_limit:
Allen Li0be2f2d2017-05-15 15:53:21 -0700994 _mark_uploaded(job.dirname)
995
996
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -0800997 def _log_failed_jobs_locally(self, failed_jobs,
998 log_file=FAILED_OFFLOADS_FILE):
999 """Updates a local file listing all the failed jobs.
1000
1001 The dropped file can be used by the developers to list jobs that we have
1002 failed to upload.
1003
1004 @param failed_jobs: A list of failed _JobDirectory objects.
1005 @param log_file: The file to log the failed jobs to.
1006 """
1007 now = datetime.datetime.now()
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -08001008 now_str = now.strftime(FAILED_OFFLOADS_TIME_FORMAT)
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -08001009 formatted_jobs = [_format_job_for_failure_reporting(job)
1010 for job in failed_jobs]
1011 formatted_jobs.sort()
1012
1013 with open(log_file, 'w') as logfile:
1014 logfile.write(FAILED_OFFLOADS_FILE_HEADER %
1015 (now_str, len(failed_jobs)))
1016 logfile.writelines(formatted_jobs)
1017
1018
Prathmesh Prabhuc9856852017-01-30 16:52:59 -08001019 def _report_current_jobs_count(self):
1020 """Report the number of outstanding jobs to monarch."""
1021 metrics.Gauge('chromeos/autotest/gs_offloader/current_jobs_count').set(
1022 len(self._open_jobs))
1023
1024
Prathmesh Prabhuea869732017-01-30 17:04:25 -08001025 def _report_failed_jobs_count(self, failed_jobs):
1026 """Report the number of outstanding failed offload jobs to monarch.
1027
1028 @param: List of failed jobs.
1029 """
1030 metrics.Gauge('chromeos/autotest/gs_offloader/failed_jobs_count').set(
1031 len(failed_jobs))
1032
1033
Allen Lib41527d2017-06-22 17:28:00 -07001034def _enqueue_offload(job, queue, age_limit):
1035 """Enqueue the job for offload, if it's eligible.
1036
1037 The job is eligible for offloading if the database has marked
1038 it finished, and the job is older than the `age_limit`
1039 parameter.
1040
1041 If the job is eligible, offload processing is requested by
1042 passing the `queue` parameter's `put()` method a sequence with
1043 the job's `dirname` attribute and its directory name.
1044
1045 @param job _JobDirectory instance to offload.
1046 @param queue If the job should be offloaded, put the offload
1047 parameters into this queue for processing.
1048 @param age_limit Minimum age for a job to be offloaded. A value
1049 of 0 means that the job will be offloaded as
1050 soon as it is finished.
1051
1052 """
1053 if not job.offload_count:
1054 if not _is_expired(job, age_limit):
1055 return
1056 job.first_offload_start = time.time()
1057 job.offload_count += 1
1058 if job.process_gs_instructions():
1059 timestamp = job.get_timestamp_if_finished()
1060 queue.put([job.dirname, os.path.dirname(job.dirname), timestamp])
1061
1062
Simran Basi7d9a1492012-10-25 13:51:54 -07001063def parse_options():
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001064 """Parse the args passed into gs_offloader."""
1065 defaults = 'Defaults:\n Destination: %s\n Results Path: %s' % (
1066 utils.DEFAULT_OFFLOAD_GSURI, RESULTS_DIR)
1067 usage = 'usage: %prog [options]\n' + defaults
1068 parser = OptionParser(usage)
1069 parser.add_option('-a', '--all', dest='process_all',
1070 action='store_true',
1071 help='Offload all files in the results directory.')
1072 parser.add_option('-s', '--hosts', dest='process_hosts_only',
1073 action='store_true',
1074 help='Offload only the special tasks result files '
1075 'located in the results/hosts subdirectory')
1076 parser.add_option('-p', '--parallelism', dest='parallelism',
1077 type='int', default=1,
1078 help='Number of parallel workers to use.')
1079 parser.add_option('-o', '--delete_only', dest='delete_only',
1080 action='store_true',
1081 help='GS Offloader will only the delete the '
1082 'directories and will not offload them to google '
1083 'storage. NOTE: If global_config variable '
1084 'CROS.gs_offloading_enabled is False, --delete_only '
1085 'is automatically True.',
1086 default=not GS_OFFLOADING_ENABLED)
1087 parser.add_option('-d', '--days_old', dest='days_old',
1088 help='Minimum job age in days before a result can be '
1089 'offloaded.', type='int', default=0)
1090 parser.add_option('-l', '--log_size', dest='log_size',
1091 help='Limit the offloader logs to a specified '
1092 'number of Mega Bytes.', type='int', default=0)
1093 parser.add_option('-m', dest='multiprocessing', action='store_true',
Michael Tang0df2eb42016-05-13 19:06:54 -07001094 help='Turn on -m option for gsutil. If not set, the '
1095 'global config setting gs_offloader_multiprocessing '
1096 'under CROS section is applied.')
Keith Haddow44b5e4b2016-10-14 11:25:57 -07001097 parser.add_option('-i', '--offload_once', dest='offload_once',
1098 action='store_true',
1099 help='Upload all available results and then exit.')
1100 parser.add_option('-y', '--normal_priority', dest='normal_priority',
1101 action='store_true',
1102 help='Upload using normal process priority.')
Keith Haddow5ba5fb82016-11-09 11:39:36 -08001103 parser.add_option('-u', '--age_to_upload', dest='age_to_upload',
1104 help='Minimum job age in days before a result can be '
1105 'offloaded, but not removed from local storage',
1106 type='int', default=None)
1107 parser.add_option('-n', '--age_to_delete', dest='age_to_delete',
1108 help='Minimum job age in days before a result can be '
1109 'removed from local storage',
1110 type='int', default=None)
Prathmesh Prabhuf6b3add2017-11-29 15:25:43 -08001111 parser.add_option(
1112 '--metrics-file',
1113 help='If provided, drop metrics to this local file instead of '
1114 'reporting to ts_mon',
1115 type=str,
1116 default=None,
1117 )
Keith Haddow5ba5fb82016-11-09 11:39:36 -08001118
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001119 options = parser.parse_args()[0]
1120 if options.process_all and options.process_hosts_only:
1121 parser.print_help()
1122 print ('Cannot process all files and only the hosts '
1123 'subdirectory. Please remove an argument.')
1124 sys.exit(1)
Keith Haddow5ba5fb82016-11-09 11:39:36 -08001125
1126 if options.days_old and (options.age_to_upload or options.age_to_delete):
1127 parser.print_help()
1128 print('Use the days_old option or the age_to_* options but not both')
1129 sys.exit(1)
1130
1131 if options.age_to_upload == None:
1132 options.age_to_upload = options.days_old
1133 if options.age_to_delete == None:
1134 options.age_to_delete = options.days_old
1135
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001136 return options
Scott Zawalskicb2e2b72012-04-17 12:10:05 -04001137
Simran Basi9523eaa2012-06-28 17:18:45 -07001138
1139def main():
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001140 """Main method of gs_offloader."""
1141 options = parse_options()
Alex Miller0c8db6d2013-02-15 15:41:00 -08001142
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001143 if options.process_all:
1144 offloader_type = 'all'
1145 elif options.process_hosts_only:
1146 offloader_type = 'hosts'
1147 else:
1148 offloader_type = 'jobs'
Alex Miller0c8db6d2013-02-15 15:41:00 -08001149
Allen Lib41527d2017-06-22 17:28:00 -07001150 _setup_logging(options, offloader_type)
J. Richard Barnetteea785362014-03-17 16:00:53 -07001151
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001152 # Nice our process (carried to subprocesses) so we don't overload
1153 # the system.
Keith Haddow44b5e4b2016-10-14 11:25:57 -07001154 if not options.normal_priority:
1155 logging.debug('Set process to nice value: %d', NICENESS)
1156 os.nice(NICENESS)
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001157 if psutil:
1158 proc = psutil.Process()
1159 logging.debug('Set process to ionice IDLE')
1160 proc.ionice(psutil.IOPRIO_CLASS_IDLE)
J. Richard Barnetteea785362014-03-17 16:00:53 -07001161
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001162 # os.listdir returns relative paths, so change to where we need to
1163 # be to avoid an os.path.join on each loop.
1164 logging.debug('Offloading Autotest results in %s', RESULTS_DIR)
1165 os.chdir(RESULTS_DIR)
J. Richard Barnetteea785362014-03-17 16:00:53 -07001166
Aviv Keshet6fe79f02017-04-27 16:38:46 -07001167 service_name = 'gs_offloader(%s)' % offloader_type
1168 with ts_mon_config.SetupTsMonGlobalState(service_name, indirect=True,
Prathmesh Prabhuf6b3add2017-11-29 15:25:43 -08001169 short_lived=False,
1170 debug_file=options.metrics_file):
Don Garrettfb984d52017-10-27 13:08:57 -07001171 with metrics.SuccessCounter('chromeos/autotest/gs_offloader/exit'):
1172 offloader = Offloader(options)
1173 if not options.delete_only:
1174 wait_for_gs_write_access(offloader.gs_uri)
1175 while True:
1176 offloader.offload_once()
1177 if options.offload_once:
1178 break
1179 time.sleep(SLEEP_TIME_SECS)
Scott Zawalskicb2e2b72012-04-17 12:10:05 -04001180
1181
Allen Lib41527d2017-06-22 17:28:00 -07001182_LOG_LOCATION = '/usr/local/autotest/logs/'
1183_LOG_FILENAME_FORMAT = 'gs_offloader_%s_log_%s.txt'
1184_LOG_TIMESTAMP_FORMAT = '%Y%m%d_%H%M%S'
1185_LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
1186
1187
1188def _setup_logging(options, offloader_type):
1189 """Set up logging.
1190
1191 @param options: Parsed options.
1192 @param offloader_type: Type of offloader action as string.
1193 """
1194 log_filename = _get_log_filename(options, offloader_type)
1195 log_formatter = logging.Formatter(_LOGGING_FORMAT)
1196 # Replace the default logging handler with a RotatingFileHandler. If
1197 # options.log_size is 0, the file size will not be limited. Keeps
1198 # one backup just in case.
1199 handler = logging.handlers.RotatingFileHandler(
1200 log_filename, maxBytes=1024 * options.log_size, backupCount=1)
1201 handler.setFormatter(log_formatter)
1202 logger = logging.getLogger()
1203 logger.setLevel(logging.DEBUG)
1204 logger.addHandler(handler)
1205
1206
1207def _get_log_filename(options, offloader_type):
1208 """Get log filename.
1209
1210 @param options: Parsed options.
1211 @param offloader_type: Type of offloader action as string.
1212 """
1213 if options.log_size > 0:
1214 log_timestamp = ''
1215 else:
1216 log_timestamp = time.strftime(_LOG_TIMESTAMP_FORMAT)
1217 log_basename = _LOG_FILENAME_FORMAT % (offloader_type, log_timestamp)
1218 return os.path.join(_LOG_LOCATION, log_basename)
1219
1220
Scott Zawalski20a9b582011-11-21 11:49:40 -08001221if __name__ == '__main__':
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001222 main()