blob: b26381de99787e7b81c044885313c521fde0bf8f [file] [log] [blame]
Scott Zawalski20a9b582011-11-21 11:49:40 -08001#!/usr/bin/python
2#
Scott Zawalskicb2e2b72012-04-17 12:10:05 -04003# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Scott Zawalski20a9b582011-11-21 11:49:40 -08004# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
J. Richard Barnetteea785362014-03-17 16:00:53 -07007"""Script to archive old Autotest results to Google Storage.
Scott Zawalski20a9b582011-11-21 11:49:40 -08008
J. Richard Barnetteea785362014-03-17 16:00:53 -07009Uses gsutil to archive files to the configured Google Storage bucket.
10Upon successful copy, the local results directory is deleted.
Scott Zawalski20a9b582011-11-21 11:49:40 -080011"""
12
Allen Lib41527d2017-06-22 17:28:00 -070013import abc
Simran Basibd9ded02013-11-04 15:49:11 -080014import datetime
Dan Shifaf50db2015-09-25 13:40:45 -070015import errno
Ningning Xia42111242016-06-15 14:35:58 -070016import glob
17import gzip
Simran Basi9523eaa2012-06-28 17:18:45 -070018import logging
Simran Basia2532282014-12-04 13:28:16 -080019import logging.handlers
Scott Zawalski20a9b582011-11-21 11:49:40 -080020import os
Dan Shiaffb9222015-04-15 17:05:47 -070021import re
Scott Zawalski20a9b582011-11-21 11:49:40 -080022import shutil
Dan Shib2751fc2017-05-16 11:05:15 -070023import socket
Laurence Goodbyca7726d2017-02-14 17:09:07 -080024import stat
Scott Zawalski20a9b582011-11-21 11:49:40 -080025import subprocess
26import sys
Allen Lib41527d2017-06-22 17:28:00 -070027import tarfile
Simran Basi9523eaa2012-06-28 17:18:45 -070028import tempfile
29import time
Scott Zawalskicb2e2b72012-04-17 12:10:05 -040030
Simran Basi7d9a1492012-10-25 13:51:54 -070031from optparse import OptionParser
32
Simran Basi981a9272012-11-14 10:46:03 -080033import common
Dan Shi96c3bdc2017-05-24 11:34:30 -070034from autotest_lib.client.common_lib import file_utils
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080035from autotest_lib.client.common_lib import global_config
Simran Basidd129972014-09-11 14:34:49 -070036from autotest_lib.client.common_lib import utils
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080037from autotest_lib.site_utils import job_directories
Michael Tange8bc9592017-07-06 10:59:32 -070038# For unittest, the cloud_console.proto is not compiled yet.
39try:
40 from autotest_lib.site_utils import cloud_console_client
41except ImportError:
42 cloud_console_client = None
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080043from autotest_lib.tko import models
Dan Shib2751fc2017-05-16 11:05:15 -070044from autotest_lib.utils import labellib
Allen Lib41527d2017-06-22 17:28:00 -070045from autotest_lib.utils import gslib
Allen Lib41527d2017-06-22 17:28:00 -070046from chromite.lib import timeout_util
Prathmesh Prabhubeb9e012017-01-30 16:18:39 -080047
Dan Shi5e2efb72017-02-07 11:40:23 -080048# Autotest requires the psutil module from site-packages, so it must be imported
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -080049# after "import common".
Simran Basi627a75e2017-02-08 11:07:20 -080050try:
51 # Does not exist, nor is needed, on moblab.
52 import psutil
53except ImportError:
54 psutil = None
Scott Zawalski20a9b582011-11-21 11:49:40 -080055
Dan Shi5e2efb72017-02-07 11:40:23 -080056from chromite.lib import parallel
57try:
58 from chromite.lib import metrics
59 from chromite.lib import ts_mon_config
60except ImportError:
Paul Hobbscd10e482017-08-28 12:00:06 -070061 metrics = utils.metrics_mock
62 ts_mon_config = utils.metrics_mock
Dan Shi5e2efb72017-02-07 11:40:23 -080063
Scott Zawalski20a9b582011-11-21 11:49:40 -080064
Simran Basif3e305f2014-10-03 14:43:53 -070065GS_OFFLOADING_ENABLED = global_config.global_config.get_config_value(
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -080066 'CROS', 'gs_offloading_enabled', type=bool, default=True)
Simran Basif3e305f2014-10-03 14:43:53 -070067
Scott Zawalski20a9b582011-11-21 11:49:40 -080068# Nice setting for process, the higher the number the lower the priority.
69NICENESS = 10
70
J. Richard Barnetteea785362014-03-17 16:00:53 -070071# Maximum number of seconds to allow for offloading a single
72# directory.
J. Richard Barnette7e0f8592014-09-03 17:00:55 -070073OFFLOAD_TIMEOUT_SECS = 60 * 60
Simran Basi9523eaa2012-06-28 17:18:45 -070074
Simran Basi392d4a52012-12-14 10:29:44 -080075# Sleep time per loop.
76SLEEP_TIME_SECS = 5
77
J. Richard Barnetteea785362014-03-17 16:00:53 -070078# Minimum number of seconds between e-mail reports.
79REPORT_INTERVAL_SECS = 60 * 60
80
Scott Zawalski20a9b582011-11-21 11:49:40 -080081# Location of Autotest results on disk.
82RESULTS_DIR = '/usr/local/autotest/results'
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -080083FAILED_OFFLOADS_FILE = os.path.join(RESULTS_DIR, 'FAILED_OFFLOADS')
Scott Zawalski20a9b582011-11-21 11:49:40 -080084
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -080085FAILED_OFFLOADS_FILE_HEADER = '''
86This is the list of gs_offloader failed jobs.
87Last offloader attempt at %s failed to offload %d files.
88Check http://go/cros-triage-gsoffloader to triage the issue
89
90
91First failure Count Directory name
92=================== ====== ==============================
93'''
94# --+----1----+---- ----+ ----+----1----+----2----+----3
95
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -080096FAILED_OFFLOADS_LINE_FORMAT = '%19s %5d %-1s\n'
97FAILED_OFFLOADS_TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
Simran Basi9523eaa2012-06-28 17:18:45 -070098
Jakob Juelich24f22c22014-09-26 11:46:11 -070099USE_RSYNC_ENABLED = global_config.global_config.get_config_value(
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800100 'CROS', 'gs_offloader_use_rsync', type=bool, default=False)
Jakob Juelich24f22c22014-09-26 11:46:11 -0700101
Dan Shi1b4c7c32015-10-05 10:38:57 -0700102LIMIT_FILE_COUNT = global_config.global_config.get_config_value(
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800103 'CROS', 'gs_offloader_limit_file_count', type=bool, default=False)
104
Michael Tang0df2eb42016-05-13 19:06:54 -0700105# Use multiprocessing for gsutil uploading.
106GS_OFFLOADER_MULTIPROCESSING = global_config.global_config.get_config_value(
107 'CROS', 'gs_offloader_multiprocessing', type=bool, default=False)
108
Ningning Xia42111242016-06-15 14:35:58 -0700109D = '[0-9][0-9]'
Michael Tang97d188c2016-06-25 11:18:42 -0700110TIMESTAMP_PATTERN = '%s%s.%s.%s_%s.%s.%s' % (D, D, D, D, D, D, D)
Ningning Xia42111242016-06-15 14:35:58 -0700111CTS_RESULT_PATTERN = 'testResult.xml'
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800112CTS_V2_RESULT_PATTERN = 'test_result.xml'
Ningning Xia42111242016-06-15 14:35:58 -0700113# Google Storage bucket URI to store results in.
114DEFAULT_CTS_RESULTS_GSURI = global_config.global_config.get_config_value(
115 'CROS', 'cts_results_server', default='')
Ningning Xia205a1d42016-06-21 16:46:28 -0700116DEFAULT_CTS_APFE_GSURI = global_config.global_config.get_config_value(
117 'CROS', 'cts_apfe_server', default='')
Dan Shi1b4c7c32015-10-05 10:38:57 -0700118
Dan Shib2751fc2017-05-16 11:05:15 -0700119# metadata type
120GS_OFFLOADER_SUCCESS_TYPE = 'gs_offloader_success'
121GS_OFFLOADER_FAILURE_TYPE = 'gs_offloader_failure'
Michael Tang97d188c2016-06-25 11:18:42 -0700122
Rohit Makasana6a7b14d2017-08-23 13:51:44 -0700123# Autotest test to collect list of CTS tests
124TEST_LIST_COLLECTOR = 'tradefed-run-collect-tests-only'
Simran Basi9523eaa2012-06-28 17:18:45 -0700125
Dan Shib2751fc2017-05-16 11:05:15 -0700126def _get_metrics_fields(dir_entry):
127 """Get metrics fields for the given test result directory, including board
128 and milestone.
129
130 @param dir_entry: Directory entry to offload.
131 @return A dictionary for the metrics data to be uploaded.
132 """
133 fields = {'board': 'unknown',
134 'milestone': 'unknown'}
135 if dir_entry:
136 # There could be multiple hosts in the job directory, use the first one
137 # available.
138 for host in glob.glob(os.path.join(dir_entry, '*')):
Dan Shi23109012017-05-28 20:23:48 -0700139 try:
140 keyval = models.test.parse_job_keyval(host)
141 except ValueError:
142 continue
Dan Shib2751fc2017-05-16 11:05:15 -0700143 build = keyval.get('build')
144 if build:
Dan Shi02dd0662017-05-23 11:24:32 -0700145 try:
146 cros_version = labellib.parse_cros_version(build)
147 fields['board'] = cros_version.board
148 fields['milestone'] = cros_version.milestone
149 break
150 except ValueError:
151 # Ignore version parsing error so it won't crash
152 # gs_offloader.
153 pass
Dan Shib2751fc2017-05-16 11:05:15 -0700154
155 return fields;
156
157
158def _get_es_metadata(dir_entry):
159 """Get ES metadata for the given test result directory.
160
161 @param dir_entry: Directory entry to offload.
162 @return A dictionary for the metadata to be uploaded.
163 """
164 fields = _get_metrics_fields(dir_entry)
165 fields['hostname'] = socket.gethostname()
166 # Include more data about the test job in metadata.
167 if dir_entry:
168 fields['dir_entry'] = dir_entry
169 fields['job_id'] = job_directories.get_job_id_or_task_id(dir_entry)
170
171 return fields
172
173
Allen Lib41527d2017-06-22 17:28:00 -0700174def _get_cmd_list(multiprocessing, dir_entry, gs_path):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800175 """Return the command to offload a specified directory.
Simran Basi9523eaa2012-06-28 17:18:45 -0700176
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800177 @param multiprocessing: True to turn on -m option for gsutil.
178 @param dir_entry: Directory entry/path that which we need a cmd_list
179 to offload.
180 @param gs_path: Location in google storage where we will
181 offload the directory.
Simran Basi9523eaa2012-06-28 17:18:45 -0700182
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800183 @return A command list to be executed by Popen.
184 """
Dan Shi365049f2017-05-28 08:00:02 +0000185 cmd = ['gsutil']
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800186 if multiprocessing:
187 cmd.append('-m')
188 if USE_RSYNC_ENABLED:
189 cmd.append('rsync')
190 target = os.path.join(gs_path, os.path.basename(dir_entry))
191 else:
192 cmd.append('cp')
193 target = gs_path
194 cmd += ['-eR', dir_entry, target]
195 return cmd
Simran Basi9523eaa2012-06-28 17:18:45 -0700196
Jakob Juelich24f22c22014-09-26 11:46:11 -0700197
Allen Lib41527d2017-06-22 17:28:00 -0700198def sanitize_dir(dirpath):
199 """Sanitize directory for gs upload.
Dan Shiaffb9222015-04-15 17:05:47 -0700200
Allen Lib41527d2017-06-22 17:28:00 -0700201 Symlinks and FIFOS are converted to regular files to fix bugs.
Dan Shiaffb9222015-04-15 17:05:47 -0700202
Allen Lib41527d2017-06-22 17:28:00 -0700203 @param dirpath: Directory entry to be sanitized.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800204 """
Allen Lib41527d2017-06-22 17:28:00 -0700205 if not os.path.exists(dirpath):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800206 return
Allen Lib41527d2017-06-22 17:28:00 -0700207 _escape_rename(dirpath)
208 _escape_rename_dir_contents(dirpath)
209 _sanitize_fifos(dirpath)
210 _sanitize_symlinks(dirpath)
211
212
213def _escape_rename_dir_contents(dirpath):
214 """Recursively rename directory to escape filenames for gs upload.
215
216 @param dirpath: Directory path string.
217 """
218 for filename in os.listdir(dirpath):
219 path = os.path.join(dirpath, filename)
220 _escape_rename(path)
221 for filename in os.listdir(dirpath):
222 path = os.path.join(dirpath, filename)
223 if os.path.isdir(path):
224 _escape_rename_dir_contents(path)
225
226
227def _escape_rename(path):
228 """Rename file to escape filenames for gs upload.
229
230 @param path: File path string.
231 """
232 dirpath, filename = os.path.split(path)
233 sanitized_filename = gslib.escape(filename)
234 sanitized_path = os.path.join(dirpath, sanitized_filename)
235 os.rename(path, sanitized_path)
236
237
238def _sanitize_fifos(dirpath):
239 """Convert fifos to regular files (fixes crbug.com/684122).
240
241 @param dirpath: Directory path string.
242 """
243 for root, _, files in os.walk(dirpath):
244 for filename in files:
245 path = os.path.join(root, filename)
246 file_stat = os.lstat(path)
Laurence Goodbyca7726d2017-02-14 17:09:07 -0800247 if stat.S_ISFIFO(file_stat.st_mode):
Allen Lib41527d2017-06-22 17:28:00 -0700248 _replace_fifo_with_file(path)
249
250
251def _replace_fifo_with_file(path):
252 """Replace a fifo with a normal file.
253
254 @param path: Fifo path string.
255 """
256 logging.debug('Removing fifo %s', path)
257 os.remove(path)
258 logging.debug('Creating marker %s', path)
259 with open(path, 'w') as f:
260 f.write('<FIFO>')
261
262
263def _sanitize_symlinks(dirpath):
264 """Convert Symlinks to regular files (fixes crbug.com/692788).
265
266 @param dirpath: Directory path string.
267 """
268 for root, _, files in os.walk(dirpath):
269 for filename in files:
270 path = os.path.join(root, filename)
271 file_stat = os.lstat(path)
272 if stat.S_ISLNK(file_stat.st_mode):
273 _replace_symlink_with_file(path)
274
275
276def _replace_symlink_with_file(path):
277 """Replace a symlink with a normal file.
278
279 @param path: Symlink path string.
280 """
281 target = os.readlink(path)
282 logging.debug('Removing symlink %s', path)
283 os.remove(path)
284 logging.debug('Creating marker %s', path)
285 with open(path, 'w') as f:
286 f.write('<symlink to %s>' % target)
287
288
289# Maximum number of files in the folder.
290_MAX_FILE_COUNT = 500
291_FOLDERS_NEVER_ZIP = ['debug', 'ssp_logs', 'autoupdate_logs']
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800292
293
294def _get_zippable_folders(dir_entry):
295 folders_list = []
296 for folder in os.listdir(dir_entry):
297 folder_path = os.path.join(dir_entry, folder)
298 if (not os.path.isfile(folder_path) and
Allen Lib41527d2017-06-22 17:28:00 -0700299 not folder in _FOLDERS_NEVER_ZIP):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800300 folders_list.append(folder_path)
301 return folders_list
Dan Shiaffb9222015-04-15 17:05:47 -0700302
303
Dan Shi1b4c7c32015-10-05 10:38:57 -0700304def limit_file_count(dir_entry):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800305 """Limit the number of files in given directory.
Dan Shi1b4c7c32015-10-05 10:38:57 -0700306
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800307 The method checks the total number of files in the given directory.
Allen Lib41527d2017-06-22 17:28:00 -0700308 If the number is greater than _MAX_FILE_COUNT, the method will
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800309 compress each folder in the given directory, except folders in
Allen Lib41527d2017-06-22 17:28:00 -0700310 _FOLDERS_NEVER_ZIP.
Dan Shi1b4c7c32015-10-05 10:38:57 -0700311
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800312 @param dir_entry: Directory entry to be checked.
313 """
Dan Shi1b4c7c32015-10-05 10:38:57 -0700314 try:
Allen Lib41527d2017-06-22 17:28:00 -0700315 count = _count_files(dir_entry)
316 except ValueError:
Prathmesh Prabhu8f85cd22017-02-01 13:04:58 -0800317 logging.warning('Fail to get the file count in folder %s.', dir_entry)
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800318 return
Allen Lib41527d2017-06-22 17:28:00 -0700319 if count < _MAX_FILE_COUNT:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800320 return
321
322 # For test job, zip folders in a second level, e.g. 123-debug/host1.
323 # This is to allow autoserv debug folder still be accessible.
324 # For special task, it does not need to dig one level deeper.
325 is_special_task = re.match(job_directories.SPECIAL_TASK_PATTERN,
326 dir_entry)
327
328 folders = _get_zippable_folders(dir_entry)
329 if not is_special_task:
330 subfolders = []
331 for folder in folders:
332 subfolders.extend(_get_zippable_folders(folder))
333 folders = subfolders
334
335 for folder in folders:
Allen Lib41527d2017-06-22 17:28:00 -0700336 _make_into_tarball(folder)
337
338
339def _count_files(dirpath):
340 """Count the number of files in a directory recursively.
341
342 @param dirpath: Directory path string.
343 """
344 return sum(len(files) for _path, _dirs, files in os.walk(dirpath))
345
346
347def _make_into_tarball(dirpath):
348 """Make directory into tarball.
349
350 @param dirpath: Directory path string.
351 """
352 tarpath = '%s.tgz' % dirpath
353 with tarfile.open(tarpath, 'w:gz') as tar:
354 tar.add(dirpath, arcname=os.path.basename(dirpath))
355 shutil.rmtree(dirpath)
Dan Shi1b4c7c32015-10-05 10:38:57 -0700356
357
Dan Shie4a4f9f2015-07-20 09:00:25 -0700358def correct_results_folder_permission(dir_entry):
359 """Make sure the results folder has the right permission settings.
360
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800361 For tests running with server-side packaging, the results folder has
362 the owner of root. This must be changed to the user running the
363 autoserv process, so parsing job can access the results folder.
Dan Shie4a4f9f2015-07-20 09:00:25 -0700364
365 @param dir_entry: Path to the results folder.
Dan Shie4a4f9f2015-07-20 09:00:25 -0700366 """
367 if not dir_entry:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800368 return
Prathmesh Prabhu6c4ed332017-01-30 15:51:43 -0800369
370 logging.info('Trying to correct file permission of %s.', dir_entry)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700371 try:
Dan Shiebcd8732017-10-09 14:54:52 -0700372 owner = '%s:%s' % (os.getuid(), os.getgid())
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800373 subprocess.check_call(
Dan Shiebcd8732017-10-09 14:54:52 -0700374 ['sudo', '-n', 'chown', '-R', owner, dir_entry])
Dan Shie4a4f9f2015-07-20 09:00:25 -0700375 except subprocess.CalledProcessError as e:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800376 logging.error('Failed to modify permission for %s: %s',
377 dir_entry, e)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700378
379
Allen Lib41527d2017-06-22 17:28:00 -0700380def _upload_cts_testresult(dir_entry, multiprocessing):
Ningning Xia2d88eec2016-07-25 23:18:46 -0700381 """Upload test results to separate gs buckets.
Ningning Xia42111242016-06-15 14:35:58 -0700382
Ilja H. Friedelbfa63142017-01-26 00:56:29 -0800383 Upload testResult.xml.gz/test_result.xml.gz file to cts_results_bucket.
Ningning Xia205a1d42016-06-21 16:46:28 -0700384 Upload timestamp.zip to cts_apfe_bucket.
Ningning Xia8db632f2016-08-19 11:01:35 -0700385
Ningning Xia42111242016-06-15 14:35:58 -0700386 @param dir_entry: Path to the results folder.
387 @param multiprocessing: True to turn on -m option for gsutil.
388 """
Ningning Xia2d981ee2016-07-06 17:59:54 -0700389 for host in glob.glob(os.path.join(dir_entry, '*')):
Ningning Xia2d88eec2016-07-25 23:18:46 -0700390 cts_path = os.path.join(host, 'cheets_CTS.*', 'results', '*',
391 TIMESTAMP_PATTERN)
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800392 cts_v2_path = os.path.join(host, 'cheets_CTS_*', 'results', '*',
393 TIMESTAMP_PATTERN)
394 gts_v2_path = os.path.join(host, 'cheets_GTS.*', 'results', '*',
395 TIMESTAMP_PATTERN)
Ningning Xia2d88eec2016-07-25 23:18:46 -0700396 for result_path, result_pattern in [(cts_path, CTS_RESULT_PATTERN),
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800397 (cts_v2_path, CTS_V2_RESULT_PATTERN),
398 (gts_v2_path, CTS_V2_RESULT_PATTERN)]:
Ningning Xia2d88eec2016-07-25 23:18:46 -0700399 for path in glob.glob(result_path):
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700400 try:
401 _upload_files(host, path, result_pattern, multiprocessing)
402 except Exception as e:
403 logging.error('ERROR uploading test results %s to GS: %s',
404 path, e)
Ningning Xia205a1d42016-06-21 16:46:28 -0700405
Ningning Xia205a1d42016-06-21 16:46:28 -0700406
Ningning Xia8db632f2016-08-19 11:01:35 -0700407def _is_valid_result(build, result_pattern, suite):
408 """Check if the result should be uploaded to CTS/GTS buckets.
409
410 @param build: Builder name.
411 @param result_pattern: XML result file pattern.
412 @param suite: Test suite name.
413
414 @returns: Bool flag indicating whether a valid result.
415 """
416 if build is None or suite is None:
417 return False
418
419 # Not valid if it's not a release build.
420 if not re.match(r'(?!trybot-).*-release/.*', build):
421 return False
422
Ilja H. Friedelad6d8792016-11-28 21:53:44 -0800423 # Not valid if it's cts result but not 'arc-cts*' or 'test_that_wrapper'
424 # suite.
Ilja H. Friedel73cf6cd2017-03-01 12:23:00 -0800425 result_patterns = [CTS_RESULT_PATTERN, CTS_V2_RESULT_PATTERN]
Ilja H. Friedel61a70d32017-05-20 01:43:02 -0700426 if result_pattern in result_patterns and not (
427 suite.startswith('arc-cts') or suite.startswith('arc-gts') or
428 suite.startswith('test_that_wrapper')):
Ningning Xia8db632f2016-08-19 11:01:35 -0700429 return False
430
431 return True
Ningning Xia21922c82016-07-29 11:03:15 -0700432
433
Rohit Makasana6a7b14d2017-08-23 13:51:44 -0700434def _is_test_collector(package):
435 """Returns true if the test run is just to collect list of CTS tests.
436
437 @param package: Autotest package name. e.g. cheets_CTS_N.CtsGraphicsTestCase
438
439 @return Bool flag indicating a test package is CTS list generator or not.
440 """
441 return TEST_LIST_COLLECTOR in package
442
443
Ningning Xia2d88eec2016-07-25 23:18:46 -0700444def _upload_files(host, path, result_pattern, multiprocessing):
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700445 keyval = models.test.parse_job_keyval(host)
Ningning Xia8db632f2016-08-19 11:01:35 -0700446 build = keyval.get('build')
447 suite = keyval.get('suite')
Ningning Xia42111242016-06-15 14:35:58 -0700448
Ningning Xia8db632f2016-08-19 11:01:35 -0700449 if not _is_valid_result(build, result_pattern, suite):
450 # No need to upload current folder, return.
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700451 return
Ningning Xia21922c82016-07-29 11:03:15 -0700452
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700453 parent_job_id = str(keyval['parent_job_id'])
Ningning Xia21922c82016-07-29 11:03:15 -0700454
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700455 folders = path.split(os.sep)
456 job_id = folders[-6]
457 package = folders[-4]
458 timestamp = folders[-1]
Ningning Xia2d981ee2016-07-06 17:59:54 -0700459
Rohit Makasana6a7b14d2017-08-23 13:51:44 -0700460 # Results produced by CTS test list collector are dummy results.
461 # They don't need to be copied to APFE bucket which is mainly being used for
462 # CTS APFE submission.
463 if not _is_test_collector(package):
464 # Path: bucket/build/parent_job_id/cheets_CTS.*/job_id_timestamp/
465 # or bucket/build/parent_job_id/cheets_GTS.*/job_id_timestamp/
466 cts_apfe_gs_path = os.path.join(
467 DEFAULT_CTS_APFE_GSURI, build, parent_job_id,
468 package, job_id + '_' + timestamp) + '/'
469
470 for zip_file in glob.glob(os.path.join('%s.zip' % path)):
471 utils.run(' '.join(_get_cmd_list(
472 multiprocessing, zip_file, cts_apfe_gs_path)))
473 logging.debug('Upload %s to %s ', zip_file, cts_apfe_gs_path)
474 else:
475 logging.debug('%s is a CTS Test collector Autotest test run.', package)
476 logging.debug('Skipping CTS results upload to APFE gs:// bucket.')
Ningning Xia2d981ee2016-07-06 17:59:54 -0700477
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700478 # Path: bucket/cheets_CTS.*/job_id_timestamp/
479 # or bucket/cheets_GTS.*/job_id_timestamp/
480 test_result_gs_path = os.path.join(
481 DEFAULT_CTS_RESULTS_GSURI, package,
482 job_id + '_' + timestamp) + '/'
Ningning Xia42111242016-06-15 14:35:58 -0700483
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700484 for test_result_file in glob.glob(os.path.join(path, result_pattern)):
Ilja H. Friedelbfa63142017-01-26 00:56:29 -0800485 # gzip test_result_file(testResult.xml/test_result.xml)
486
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700487 test_result_file_gz = '%s.gz' % test_result_file
488 with open(test_result_file, 'r') as f_in, (
489 gzip.open(test_result_file_gz, 'w')) as f_out:
490 shutil.copyfileobj(f_in, f_out)
Allen Lib41527d2017-06-22 17:28:00 -0700491 utils.run(' '.join(_get_cmd_list(
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700492 multiprocessing, test_result_file_gz, test_result_gs_path)))
493 logging.debug('Zip and upload %s to %s',
494 test_result_file_gz, test_result_gs_path)
Ilja H. Friedelbfa63142017-01-26 00:56:29 -0800495 # Remove test_result_file_gz(testResult.xml.gz/test_result.xml.gz)
Ningning Xia0c27d9b2016-08-04 14:02:39 -0700496 os.remove(test_result_file_gz)
497
Ningning Xia42111242016-06-15 14:35:58 -0700498
Aviv Keshet1d8df7d2017-04-20 12:35:31 -0700499def _emit_gs_returncode_metric(returncode):
500 """Increment the gs_returncode counter based on |returncode|."""
501 m_gs_returncode = 'chromeos/autotest/gs_offloader/gs_returncode'
502 rcode = int(returncode)
503 if rcode < 0 or rcode > 255:
504 rcode = -1
505 metrics.Counter(m_gs_returncode).increment(fields={'return_code': rcode})
506
507
Dan Shiebcd8732017-10-09 14:54:52 -0700508def _handle_dir_os_error(dir_entry, fix_permission=False):
509 """Try to fix the result directory's permission issue if needed.
510
511 @param dir_entry: Directory entry to offload.
512 @param fix_permission: True to change the directory's owner to the same one
513 running gs_offloader.
514 """
515 if fix_permission:
516 correct_results_folder_permission(dir_entry)
517 m_permission_error = ('chromeos/autotest/errors/gs_offloader/'
518 'wrong_permissions_count')
519 metrics_fields = _get_metrics_fields(dir_entry)
520 metrics.Counter(m_permission_error).increment(fields=metrics_fields)
521
522
Allen Lib41527d2017-06-22 17:28:00 -0700523class BaseGSOffloader(object):
Simran Basi9523eaa2012-06-28 17:18:45 -0700524
Allen Lib41527d2017-06-22 17:28:00 -0700525 """Google Storage offloader interface."""
J. Richard Barnetteea785362014-03-17 16:00:53 -0700526
Allen Lib41527d2017-06-22 17:28:00 -0700527 __metaclass__ = abc.ABCMeta
528
529 @abc.abstractmethod
530 def offload(self, dir_entry, dest_path, job_complete_time):
531 """Offload a directory entry to Google Storage.
532
533 @param dir_entry: Directory entry to offload.
534 @param dest_path: Location in google storage where we will
535 offload the directory.
536 @param job_complete_time: The complete time of the job from the AFE
537 database.
538 """
539
540
541class GSOffloader(BaseGSOffloader):
542 """Google Storage Offloader."""
543
Michael Tang0f553bd2017-06-16 17:38:45 -0700544 def __init__(self, gs_uri, multiprocessing, delete_age,
545 console_client=None):
Allen Lib41527d2017-06-22 17:28:00 -0700546 """Returns the offload directory function for the given gs_uri
547
548 @param gs_uri: Google storage bucket uri to offload to.
549 @param multiprocessing: True to turn on -m option for gsutil.
Michael Tang0f553bd2017-06-16 17:38:45 -0700550 @param console_client: The cloud console client. If None,
551 cloud console APIs are not called.
Allen Lib41527d2017-06-22 17:28:00 -0700552 """
553 self._gs_uri = gs_uri
554 self._multiprocessing = multiprocessing
555 self._delete_age = delete_age
Michael Tang0f553bd2017-06-16 17:38:45 -0700556 self._console_client = console_client
Dan Shib2751fc2017-05-16 11:05:15 -0700557
Prathmesh Prabhueeaa7ef2017-01-30 17:17:06 -0800558 @metrics.SecondsTimerDecorator(
559 'chromeos/autotest/gs_offloader/job_offload_duration')
Allen Lib41527d2017-06-22 17:28:00 -0700560 def offload(self, dir_entry, dest_path, job_complete_time):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800561 """Offload the specified directory entry to Google storage.
Jakob Juelichc17f3112014-09-11 14:32:30 -0700562
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800563 @param dir_entry: Directory entry to offload.
564 @param dest_path: Location in google storage where we will
565 offload the directory.
Keith Haddow5ba5fb82016-11-09 11:39:36 -0800566 @param job_complete_time: The complete time of the job from the AFE
567 database.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800568 """
Allen Lib41527d2017-06-22 17:28:00 -0700569 with tempfile.TemporaryFile('w+') as stdout_file, \
570 tempfile.TemporaryFile('w+') as stderr_file:
571 try:
Dan Shiebcd8732017-10-09 14:54:52 -0700572 try:
573 self._offload(dir_entry, dest_path, stdout_file,
574 stderr_file)
575 except OSError as e:
576 # Correct file permission error of the directory, then raise
577 # the exception so gs_offloader can retry later.
578 _handle_dir_os_error(dir_entry, e.errno==errno.EACCES)
579 # Try again after the permission issue is fixed.
580 self._offload(dir_entry, dest_path, stdout_file,
581 stderr_file)
Allen Lib41527d2017-06-22 17:28:00 -0700582 except _OffloadError as e:
583 metrics_fields = _get_metrics_fields(dir_entry)
Aviv Keshet1d8df7d2017-04-20 12:35:31 -0700584 m_any_error = 'chromeos/autotest/errors/gs_offloader/any_error'
Dan Shib2751fc2017-05-16 11:05:15 -0700585 metrics.Counter(m_any_error).increment(fields=metrics_fields)
586
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800587 # Rewind the log files for stdout and stderr and log
588 # their contents.
589 stdout_file.seek(0)
590 stderr_file.seek(0)
591 stderr_content = stderr_file.read()
Prathmesh Prabhu867cec52017-01-30 15:58:12 -0800592 logging.warning('Error occurred when offloading %s:', dir_entry)
593 logging.warning('Stdout:\n%s \nStderr:\n%s', stdout_file.read(),
594 stderr_content)
Dan Shib2751fc2017-05-16 11:05:15 -0700595
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800596 # Some result files may have wrong file permission. Try
597 # to correct such error so later try can success.
598 # TODO(dshi): The code is added to correct result files
599 # with wrong file permission caused by bug 511778. After
600 # this code is pushed to lab and run for a while to
601 # clean up these files, following code and function
602 # correct_results_folder_permission can be deleted.
603 if 'CommandException: Error opening file' in stderr_content:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800604 correct_results_folder_permission(dir_entry)
Allen Lib41527d2017-06-22 17:28:00 -0700605 else:
606 self._prune(dir_entry, job_complete_time)
Dan Shib2751fc2017-05-16 11:05:15 -0700607
Allen Lib41527d2017-06-22 17:28:00 -0700608 def _offload(self, dir_entry, dest_path,
609 stdout_file, stderr_file):
610 """Offload the specified directory entry to Google storage.
611
612 @param dir_entry: Directory entry to offload.
613 @param dest_path: Location in google storage where we will
614 offload the directory.
615 @param job_complete_time: The complete time of the job from the AFE
616 database.
617 @param stdout_file: Log file.
618 @param stderr_file: Log file.
619 """
620 if _is_uploaded(dir_entry):
621 return
622 start_time = time.time()
623 metrics_fields = _get_metrics_fields(dir_entry)
624 es_metadata = _get_es_metadata(dir_entry)
625 error_obj = _OffloadError(start_time, es_metadata)
626 try:
627 sanitize_dir(dir_entry)
628 if DEFAULT_CTS_RESULTS_GSURI:
629 _upload_cts_testresult(dir_entry, self._multiprocessing)
630
631 if LIMIT_FILE_COUNT:
632 limit_file_count(dir_entry)
633 es_metadata['size_kb'] = file_utils.get_directory_size_kibibytes(dir_entry)
634
635 process = None
636 with timeout_util.Timeout(OFFLOAD_TIMEOUT_SECS):
637 gs_path = '%s%s' % (self._gs_uri, dest_path)
638 process = subprocess.Popen(
639 _get_cmd_list(self._multiprocessing, dir_entry, gs_path),
640 stdout=stdout_file, stderr=stderr_file)
641 process.wait()
642
643 _emit_gs_returncode_metric(process.returncode)
644 if process.returncode != 0:
645 raise error_obj
646 _emit_offload_metrics(dir_entry)
Allen Lib41527d2017-06-22 17:28:00 -0700647
Michael Tang0f553bd2017-06-16 17:38:45 -0700648 if self._console_client:
649 gcs_uri = os.path.join(gs_path,
650 os.path.basename(dir_entry))
651 if not self._console_client.send_test_job_offloaded_message(
652 gcs_uri):
Allen Lib41527d2017-06-22 17:28:00 -0700653 raise error_obj
Michael Tang0f553bd2017-06-16 17:38:45 -0700654
Allen Lib41527d2017-06-22 17:28:00 -0700655 _mark_uploaded(dir_entry)
656 except timeout_util.TimeoutError:
657 m_timeout = 'chromeos/autotest/errors/gs_offloader/timed_out_count'
658 metrics.Counter(m_timeout).increment(fields=metrics_fields)
659 # If we finished the call to Popen(), we may need to
660 # terminate the child process. We don't bother calling
661 # process.poll(); that inherently races because the child
662 # can die any time it wants.
663 if process:
664 try:
665 process.terminate()
666 except OSError:
667 # We don't expect any error other than "No such
668 # process".
669 pass
670 logging.error('Offloading %s timed out after waiting %d '
671 'seconds.', dir_entry, OFFLOAD_TIMEOUT_SECS)
672 raise error_obj
673
674 def _prune(self, dir_entry, job_complete_time):
675 """Prune directory if it is uploaded and expired.
676
677 @param dir_entry: Directory entry to offload.
678 @param job_complete_time: The complete time of the job from the AFE
679 database.
680 """
681 if not (_is_uploaded(dir_entry)
682 and job_directories.is_job_expired(self._delete_age,
683 job_complete_time)):
684 return
685 try:
686 shutil.rmtree(dir_entry)
687 except OSError as e:
Dan Shiebcd8732017-10-09 14:54:52 -0700688 # The wrong file permission can lead call `shutil.rmtree(dir_entry)`
689 # to raise OSError with message 'Permission denied'. Details can be
690 # found in crbug.com/536151
691 _handle_dir_os_error(dir_entry, e.errno==errno.EACCES)
692 # Try again after the permission issue is fixed.
693 shutil.rmtree(dir_entry)
Simran Basi9523eaa2012-06-28 17:18:45 -0700694
Scott Zawalski20a9b582011-11-21 11:49:40 -0800695
Allen Lib41527d2017-06-22 17:28:00 -0700696class _OffloadError(Exception):
697 """Google Storage offload failed."""
Simran Basibd9ded02013-11-04 15:49:11 -0800698
Allen Lib41527d2017-06-22 17:28:00 -0700699 def __init__(self, start_time, es_metadata):
700 super(_OffloadError, self).__init__(start_time, es_metadata)
701 self.start_time = start_time
702 self.es_metadata = es_metadata
Simran Basibd9ded02013-11-04 15:49:11 -0800703
Allen Lib41527d2017-06-22 17:28:00 -0700704
705
706class FakeGSOffloader(BaseGSOffloader):
707
708 """Fake Google Storage Offloader that only deletes directories."""
709
710 def offload(self, dir_entry, dest_path, job_complete_time):
711 """Pretend to offload a directory and delete it.
712
713 @param dir_entry: Directory entry to offload.
714 @param dest_path: Location in google storage where we will
715 offload the directory.
716 @param job_complete_time: The complete time of the job from the AFE
717 database.
718 """
719 shutil.rmtree(dir_entry)
720
721
722def _is_expired(job, age_limit):
723 """Return whether job directory is expired for uploading
724
725 @param job: _JobDirectory instance.
726 @param age_limit: Minimum age in days at which a job may be offloaded.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800727 """
Allen Lib41527d2017-06-22 17:28:00 -0700728 job_timestamp = job.get_timestamp_if_finished()
729 if not job_timestamp:
730 return False
731 return job_directories.is_job_expired(age_limit, job_timestamp)
732
733
734def _emit_offload_metrics(dirpath):
735 """Emit gs offload metrics.
736
737 @param dirpath: Offloaded directory path.
738 """
739 dir_size = file_utils.get_directory_size_kibibytes(dirpath)
740 metrics_fields = _get_metrics_fields(dirpath)
741
742 m_offload_count = (
743 'chromeos/autotest/gs_offloader/jobs_offloaded')
744 metrics.Counter(m_offload_count).increment(
745 fields=metrics_fields)
746 m_offload_size = ('chromeos/autotest/gs_offloader/'
747 'kilobytes_transferred')
748 metrics.Counter(m_offload_size).increment_by(
749 dir_size, fields=metrics_fields)
Simran Basibd9ded02013-11-04 15:49:11 -0800750
751
Allen Li9579b382017-05-05 17:07:43 -0700752def _is_uploaded(dirpath):
753 """Return whether directory has been uploaded.
754
755 @param dirpath: Directory path string.
756 """
757 return os.path.isfile(_get_uploaded_marker_file(dirpath))
758
759
760def _mark_uploaded(dirpath):
761 """Mark directory as uploaded.
762
763 @param dirpath: Directory path string.
764 """
765 with open(_get_uploaded_marker_file(dirpath), 'a'):
766 pass
767
768
769def _get_uploaded_marker_file(dirpath):
770 """Return path to upload marker file for directory.
771
772 @param dirpath: Directory path string.
773 """
774 return '%s/.GS_UPLOADED' % (dirpath,)
775
776
Prathmesh Prabhufda271a2017-01-30 17:53:12 -0800777def _format_job_for_failure_reporting(job):
778 """Formats a _JobDirectory for reporting / logging.
779
780 @param job: The _JobDirectory to format.
781 """
Allen Lib41527d2017-06-22 17:28:00 -0700782 d = datetime.datetime.fromtimestamp(job.first_offload_start)
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -0800783 data = (d.strftime(FAILED_OFFLOADS_TIME_FORMAT),
Allen Lib41527d2017-06-22 17:28:00 -0700784 job.offload_count,
785 job.dirname)
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -0800786 return FAILED_OFFLOADS_LINE_FORMAT % data
Prathmesh Prabhufda271a2017-01-30 17:53:12 -0800787
788
Simran Basiac0edb22015-04-23 16:15:51 -0700789def wait_for_gs_write_access(gs_uri):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800790 """Verify and wait until we have write access to Google Storage.
Simran Basiac0edb22015-04-23 16:15:51 -0700791
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800792 @param gs_uri: The Google Storage URI we are trying to offload to.
793 """
794 # TODO (sbasi) Try to use the gsutil command to check write access.
795 # Ensure we have write access to gs_uri.
796 dummy_file = tempfile.NamedTemporaryFile()
Allen Lib41527d2017-06-22 17:28:00 -0700797 test_cmd = _get_cmd_list(False, dummy_file.name, gs_uri)
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800798 while True:
799 try:
800 subprocess.check_call(test_cmd)
801 subprocess.check_call(
Dan Shi365049f2017-05-28 08:00:02 +0000802 ['gsutil', 'rm',
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800803 os.path.join(gs_uri,
804 os.path.basename(dummy_file.name))])
805 break
806 except subprocess.CalledProcessError:
807 logging.debug('Unable to offload to %s, sleeping.', gs_uri)
808 time.sleep(120)
Simran Basiac0edb22015-04-23 16:15:51 -0700809
810
J. Richard Barnetteea785362014-03-17 16:00:53 -0700811class Offloader(object):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800812 """State of the offload process.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700813
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800814 Contains the following member fields:
Allen Lib41527d2017-06-22 17:28:00 -0700815 * _gs_offloader: _BaseGSOffloader to use to offload a job directory.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800816 * _jobdir_classes: List of classes of job directory to be
817 offloaded.
818 * _processes: Maximum number of outstanding offload processes
819 to allow during an offload cycle.
820 * _age_limit: Minimum age in days at which a job may be
821 offloaded.
822 * _open_jobs: a dictionary mapping directory paths to Job
823 objects.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700824 """
J. Richard Barnetteea785362014-03-17 16:00:53 -0700825
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800826 def __init__(self, options):
Keith Haddow5ba5fb82016-11-09 11:39:36 -0800827 self._upload_age_limit = options.age_to_upload
828 self._delete_age_limit = options.age_to_delete
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800829 if options.delete_only:
Allen Lib41527d2017-06-22 17:28:00 -0700830 self._gs_offloader = FakeGSOffloader()
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800831 else:
832 self.gs_uri = utils.get_offload_gsuri()
833 logging.debug('Offloading to: %s', self.gs_uri)
Michael Tang0df2eb42016-05-13 19:06:54 -0700834 multiprocessing = False
835 if options.multiprocessing:
836 multiprocessing = True
837 elif options.multiprocessing is None:
838 multiprocessing = GS_OFFLOADER_MULTIPROCESSING
839 logging.info(
840 'Offloader multiprocessing is set to:%r', multiprocessing)
Michael Tang0f553bd2017-06-16 17:38:45 -0700841 console_client = None
Michael Tange8bc9592017-07-06 10:59:32 -0700842 if (cloud_console_client and
843 cloud_console_client.is_cloud_notification_enabled()):
Michael Tang0f553bd2017-06-16 17:38:45 -0700844 console_client = cloud_console_client.PubSubBasedClient()
Allen Lib41527d2017-06-22 17:28:00 -0700845 self._gs_offloader = GSOffloader(
Keith Haddow5ba5fb82016-11-09 11:39:36 -0800846 self.gs_uri, multiprocessing, self._delete_age_limit,
Michael Tang0f553bd2017-06-16 17:38:45 -0700847 console_client)
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800848 classlist = []
849 if options.process_hosts_only or options.process_all:
850 classlist.append(job_directories.SpecialJobDirectory)
851 if not options.process_hosts_only:
852 classlist.append(job_directories.RegularJobDirectory)
853 self._jobdir_classes = classlist
854 assert self._jobdir_classes
855 self._processes = options.parallelism
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800856 self._open_jobs = {}
Michael Tang97d188c2016-06-25 11:18:42 -0700857 self._pusub_topic = None
Allen Li0be2f2d2017-05-15 15:53:21 -0700858 self._offload_count_limit = 3
J. Richard Barnetteea785362014-03-17 16:00:53 -0700859
J. Richard Barnetteea785362014-03-17 16:00:53 -0700860
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800861 def _add_new_jobs(self):
862 """Find new job directories that need offloading.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700863
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800864 Go through the file system looking for valid job directories
865 that are currently not in `self._open_jobs`, and add them in.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700866
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800867 """
868 new_job_count = 0
869 for cls in self._jobdir_classes:
870 for resultsdir in cls.get_job_directories():
Allen Lib41527d2017-06-22 17:28:00 -0700871 if (
872 resultsdir in self._open_jobs
873 or _is_uploaded(resultsdir)):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800874 continue
875 self._open_jobs[resultsdir] = cls(resultsdir)
876 new_job_count += 1
877 logging.debug('Start of offload cycle - found %d new jobs',
878 new_job_count)
J. Richard Barnetteea785362014-03-17 16:00:53 -0700879
J. Richard Barnetteea785362014-03-17 16:00:53 -0700880
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800881 def _remove_offloaded_jobs(self):
882 """Removed offloaded jobs from `self._open_jobs`."""
883 removed_job_count = 0
884 for jobkey, job in self._open_jobs.items():
Allen Lib41527d2017-06-22 17:28:00 -0700885 if (
886 not os.path.exists(job.dirname)
887 or _is_uploaded(job.dirname)):
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800888 del self._open_jobs[jobkey]
889 removed_job_count += 1
890 logging.debug('End of offload cycle - cleared %d new jobs, '
891 'carrying %d open jobs',
892 removed_job_count, len(self._open_jobs))
J. Richard Barnetteea785362014-03-17 16:00:53 -0700893
J. Richard Barnetteea785362014-03-17 16:00:53 -0700894
Allen Lib41527d2017-06-22 17:28:00 -0700895 def _report_failed_jobs(self):
896 """Report status after attempting offload.
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800897
898 This function processes all jobs in `self._open_jobs`, assuming
899 an attempt has just been made to offload all of them.
900
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800901 If any jobs have reportable errors, and we haven't generated
902 an e-mail report in the last `REPORT_INTERVAL_SECS` seconds,
903 send new e-mail describing the failures.
904
905 """
Prathmesh Prabhu343d1712017-01-30 16:54:15 -0800906 failed_jobs = [j for j in self._open_jobs.values() if
Allen Lib41527d2017-06-22 17:28:00 -0700907 j.first_offload_start]
Prathmesh Prabhuea869732017-01-30 17:04:25 -0800908 self._report_failed_jobs_count(failed_jobs)
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -0800909 self._log_failed_jobs_locally(failed_jobs)
Prathmesh Prabhu343d1712017-01-30 16:54:15 -0800910
911
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800912 def offload_once(self):
913 """Perform one offload cycle.
914
915 Find all job directories for new jobs that we haven't seen
916 before. Then, attempt to offload the directories for any
917 jobs that have finished running. Offload of multiple jobs
918 is done in parallel, up to `self._processes` at a time.
919
920 After we've tried uploading all directories, go through the list
921 checking the status of all uploaded directories. If necessary,
922 report failures via e-mail.
923
924 """
925 self._add_new_jobs()
Prathmesh Prabhuc9856852017-01-30 16:52:59 -0800926 self._report_current_jobs_count()
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800927 with parallel.BackgroundTaskRunner(
Allen Lib41527d2017-06-22 17:28:00 -0700928 self._gs_offloader.offload, processes=self._processes) as queue:
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -0800929 for job in self._open_jobs.values():
Allen Lib41527d2017-06-22 17:28:00 -0700930 _enqueue_offload(job, queue, self._upload_age_limit)
Allen Li0be2f2d2017-05-15 15:53:21 -0700931 self._give_up_on_jobs_over_limit()
Allen Lib41527d2017-06-22 17:28:00 -0700932 self._remove_offloaded_jobs()
933 self._report_failed_jobs()
Scott Zawalski20a9b582011-11-21 11:49:40 -0800934
935
Allen Li0be2f2d2017-05-15 15:53:21 -0700936 def _give_up_on_jobs_over_limit(self):
937 """Give up on jobs that have gone over the offload limit.
938
939 We mark them as uploaded as we won't try to offload them any more.
940 """
941 for job in self._open_jobs.values():
Allen Li808828b2017-06-23 13:36:41 -0700942 if job.offload_count >= self._offload_count_limit:
Allen Li0be2f2d2017-05-15 15:53:21 -0700943 _mark_uploaded(job.dirname)
944
945
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -0800946 def _log_failed_jobs_locally(self, failed_jobs,
947 log_file=FAILED_OFFLOADS_FILE):
948 """Updates a local file listing all the failed jobs.
949
950 The dropped file can be used by the developers to list jobs that we have
951 failed to upload.
952
953 @param failed_jobs: A list of failed _JobDirectory objects.
954 @param log_file: The file to log the failed jobs to.
955 """
956 now = datetime.datetime.now()
Prathmesh Prabhu80dfb1e2017-01-30 18:01:29 -0800957 now_str = now.strftime(FAILED_OFFLOADS_TIME_FORMAT)
Prathmesh Prabhu16f9e5c2017-01-30 17:54:40 -0800958 formatted_jobs = [_format_job_for_failure_reporting(job)
959 for job in failed_jobs]
960 formatted_jobs.sort()
961
962 with open(log_file, 'w') as logfile:
963 logfile.write(FAILED_OFFLOADS_FILE_HEADER %
964 (now_str, len(failed_jobs)))
965 logfile.writelines(formatted_jobs)
966
967
Prathmesh Prabhuc9856852017-01-30 16:52:59 -0800968 def _report_current_jobs_count(self):
969 """Report the number of outstanding jobs to monarch."""
970 metrics.Gauge('chromeos/autotest/gs_offloader/current_jobs_count').set(
971 len(self._open_jobs))
972
973
Prathmesh Prabhuea869732017-01-30 17:04:25 -0800974 def _report_failed_jobs_count(self, failed_jobs):
975 """Report the number of outstanding failed offload jobs to monarch.
976
977 @param: List of failed jobs.
978 """
979 metrics.Gauge('chromeos/autotest/gs_offloader/failed_jobs_count').set(
980 len(failed_jobs))
981
982
Allen Lib41527d2017-06-22 17:28:00 -0700983def _enqueue_offload(job, queue, age_limit):
984 """Enqueue the job for offload, if it's eligible.
985
986 The job is eligible for offloading if the database has marked
987 it finished, and the job is older than the `age_limit`
988 parameter.
989
990 If the job is eligible, offload processing is requested by
991 passing the `queue` parameter's `put()` method a sequence with
992 the job's `dirname` attribute and its directory name.
993
994 @param job _JobDirectory instance to offload.
995 @param queue If the job should be offloaded, put the offload
996 parameters into this queue for processing.
997 @param age_limit Minimum age for a job to be offloaded. A value
998 of 0 means that the job will be offloaded as
999 soon as it is finished.
1000
1001 """
1002 if not job.offload_count:
1003 if not _is_expired(job, age_limit):
1004 return
1005 job.first_offload_start = time.time()
1006 job.offload_count += 1
1007 if job.process_gs_instructions():
1008 timestamp = job.get_timestamp_if_finished()
1009 queue.put([job.dirname, os.path.dirname(job.dirname), timestamp])
1010
1011
Simran Basi7d9a1492012-10-25 13:51:54 -07001012def parse_options():
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001013 """Parse the args passed into gs_offloader."""
1014 defaults = 'Defaults:\n Destination: %s\n Results Path: %s' % (
1015 utils.DEFAULT_OFFLOAD_GSURI, RESULTS_DIR)
1016 usage = 'usage: %prog [options]\n' + defaults
1017 parser = OptionParser(usage)
1018 parser.add_option('-a', '--all', dest='process_all',
1019 action='store_true',
1020 help='Offload all files in the results directory.')
1021 parser.add_option('-s', '--hosts', dest='process_hosts_only',
1022 action='store_true',
1023 help='Offload only the special tasks result files '
1024 'located in the results/hosts subdirectory')
1025 parser.add_option('-p', '--parallelism', dest='parallelism',
1026 type='int', default=1,
1027 help='Number of parallel workers to use.')
1028 parser.add_option('-o', '--delete_only', dest='delete_only',
1029 action='store_true',
1030 help='GS Offloader will only the delete the '
1031 'directories and will not offload them to google '
1032 'storage. NOTE: If global_config variable '
1033 'CROS.gs_offloading_enabled is False, --delete_only '
1034 'is automatically True.',
1035 default=not GS_OFFLOADING_ENABLED)
1036 parser.add_option('-d', '--days_old', dest='days_old',
1037 help='Minimum job age in days before a result can be '
1038 'offloaded.', type='int', default=0)
1039 parser.add_option('-l', '--log_size', dest='log_size',
1040 help='Limit the offloader logs to a specified '
1041 'number of Mega Bytes.', type='int', default=0)
1042 parser.add_option('-m', dest='multiprocessing', action='store_true',
Michael Tang0df2eb42016-05-13 19:06:54 -07001043 help='Turn on -m option for gsutil. If not set, the '
1044 'global config setting gs_offloader_multiprocessing '
1045 'under CROS section is applied.')
Keith Haddow44b5e4b2016-10-14 11:25:57 -07001046 parser.add_option('-i', '--offload_once', dest='offload_once',
1047 action='store_true',
1048 help='Upload all available results and then exit.')
1049 parser.add_option('-y', '--normal_priority', dest='normal_priority',
1050 action='store_true',
1051 help='Upload using normal process priority.')
Keith Haddow5ba5fb82016-11-09 11:39:36 -08001052 parser.add_option('-u', '--age_to_upload', dest='age_to_upload',
1053 help='Minimum job age in days before a result can be '
1054 'offloaded, but not removed from local storage',
1055 type='int', default=None)
1056 parser.add_option('-n', '--age_to_delete', dest='age_to_delete',
1057 help='Minimum job age in days before a result can be '
1058 'removed from local storage',
1059 type='int', default=None)
Prathmesh Prabhuf6b3add2017-11-29 15:25:43 -08001060 parser.add_option(
1061 '--metrics-file',
1062 help='If provided, drop metrics to this local file instead of '
1063 'reporting to ts_mon',
1064 type=str,
1065 default=None,
1066 )
Keith Haddow5ba5fb82016-11-09 11:39:36 -08001067
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001068 options = parser.parse_args()[0]
1069 if options.process_all and options.process_hosts_only:
1070 parser.print_help()
1071 print ('Cannot process all files and only the hosts '
1072 'subdirectory. Please remove an argument.')
1073 sys.exit(1)
Keith Haddow5ba5fb82016-11-09 11:39:36 -08001074
1075 if options.days_old and (options.age_to_upload or options.age_to_delete):
1076 parser.print_help()
1077 print('Use the days_old option or the age_to_* options but not both')
1078 sys.exit(1)
1079
1080 if options.age_to_upload == None:
1081 options.age_to_upload = options.days_old
1082 if options.age_to_delete == None:
1083 options.age_to_delete = options.days_old
1084
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001085 return options
Scott Zawalskicb2e2b72012-04-17 12:10:05 -04001086
Simran Basi9523eaa2012-06-28 17:18:45 -07001087
1088def main():
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001089 """Main method of gs_offloader."""
1090 options = parse_options()
Alex Miller0c8db6d2013-02-15 15:41:00 -08001091
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001092 if options.process_all:
1093 offloader_type = 'all'
1094 elif options.process_hosts_only:
1095 offloader_type = 'hosts'
1096 else:
1097 offloader_type = 'jobs'
Alex Miller0c8db6d2013-02-15 15:41:00 -08001098
Allen Lib41527d2017-06-22 17:28:00 -07001099 _setup_logging(options, offloader_type)
J. Richard Barnetteea785362014-03-17 16:00:53 -07001100
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001101 # Nice our process (carried to subprocesses) so we don't overload
1102 # the system.
Keith Haddow44b5e4b2016-10-14 11:25:57 -07001103 if not options.normal_priority:
1104 logging.debug('Set process to nice value: %d', NICENESS)
1105 os.nice(NICENESS)
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001106 if psutil:
1107 proc = psutil.Process()
1108 logging.debug('Set process to ionice IDLE')
1109 proc.ionice(psutil.IOPRIO_CLASS_IDLE)
J. Richard Barnetteea785362014-03-17 16:00:53 -07001110
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001111 # os.listdir returns relative paths, so change to where we need to
1112 # be to avoid an os.path.join on each loop.
1113 logging.debug('Offloading Autotest results in %s', RESULTS_DIR)
1114 os.chdir(RESULTS_DIR)
J. Richard Barnetteea785362014-03-17 16:00:53 -07001115
Aviv Keshet6fe79f02017-04-27 16:38:46 -07001116 service_name = 'gs_offloader(%s)' % offloader_type
1117 with ts_mon_config.SetupTsMonGlobalState(service_name, indirect=True,
Prathmesh Prabhuf6b3add2017-11-29 15:25:43 -08001118 short_lived=False,
1119 debug_file=options.metrics_file):
Don Garrettfb984d52017-10-27 13:08:57 -07001120 with metrics.SuccessCounter('chromeos/autotest/gs_offloader/exit'):
1121 offloader = Offloader(options)
1122 if not options.delete_only:
1123 wait_for_gs_write_access(offloader.gs_uri)
1124 while True:
1125 offloader.offload_once()
1126 if options.offload_once:
1127 break
1128 time.sleep(SLEEP_TIME_SECS)
Scott Zawalskicb2e2b72012-04-17 12:10:05 -04001129
1130
Allen Lib41527d2017-06-22 17:28:00 -07001131_LOG_LOCATION = '/usr/local/autotest/logs/'
1132_LOG_FILENAME_FORMAT = 'gs_offloader_%s_log_%s.txt'
1133_LOG_TIMESTAMP_FORMAT = '%Y%m%d_%H%M%S'
1134_LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
1135
1136
1137def _setup_logging(options, offloader_type):
1138 """Set up logging.
1139
1140 @param options: Parsed options.
1141 @param offloader_type: Type of offloader action as string.
1142 """
1143 log_filename = _get_log_filename(options, offloader_type)
1144 log_formatter = logging.Formatter(_LOGGING_FORMAT)
1145 # Replace the default logging handler with a RotatingFileHandler. If
1146 # options.log_size is 0, the file size will not be limited. Keeps
1147 # one backup just in case.
1148 handler = logging.handlers.RotatingFileHandler(
1149 log_filename, maxBytes=1024 * options.log_size, backupCount=1)
1150 handler.setFormatter(log_formatter)
1151 logger = logging.getLogger()
1152 logger.setLevel(logging.DEBUG)
1153 logger.addHandler(handler)
1154
1155
1156def _get_log_filename(options, offloader_type):
1157 """Get log filename.
1158
1159 @param options: Parsed options.
1160 @param offloader_type: Type of offloader action as string.
1161 """
1162 if options.log_size > 0:
1163 log_timestamp = ''
1164 else:
1165 log_timestamp = time.strftime(_LOG_TIMESTAMP_FORMAT)
1166 log_basename = _LOG_FILENAME_FORMAT % (offloader_type, log_timestamp)
1167 return os.path.join(_LOG_LOCATION, log_basename)
1168
1169
Scott Zawalski20a9b582011-11-21 11:49:40 -08001170if __name__ == '__main__':
J. Richard Barnette2c41e1e2015-12-08 16:21:10 -08001171 main()