blob: 1247cfd036714ddface5e445167885eb1220cb74 [file] [log] [blame]
Mike Frysingerd03e6b52019-08-03 12:49:01 -04001#!/usr/bin/env python2
Dan Shi5a1af082016-05-23 13:06:28 -07002
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""
8This module is used to upload csv files generated by performance related tests
9to cns. More details about the implementation can be found in crbug.com/598504.
10
11The overall work flow is as follows.
121. Query tko_test_attributes table for perf_csv_folder attribute. The attribute
13contains a path to csv files need to be uploaded to cns.
142. Filter the perf_csv_folder attributes only for test jobs have finished an
15hour before. This is to make sure the results have already being uploaded to GS.
163. Locate the csv files in GS, and upload them to desired cns location.
17
18After every run, the script saves the maximum test idx to a local file, and
19repeats the workflow.
20
21"""
22
23import argparse
24import datetime
25import logging
26import os
Dan Shifcd78ba2016-05-24 15:25:03 -070027import shutil
28import tempfile
Dan Shi5a1af082016-05-23 13:06:28 -070029import time
30
31import common
32from autotest_lib.client.bin import utils
33from autotest_lib.client.common_lib import logging_config
34from autotest_lib.client.common_lib.cros import retry
35from autotest_lib.frontend import setup_django_environment
36from autotest_lib.frontend.tko import models as tko_models
37
38
39# Number of hours that a test has to be finished for the script to process.
40# This allows gs_offloader to have enough time to upload the results to GS.
41CUTOFF_TIME_HOURS = 1
42
43# Default wait time in seconds after each run.
44DEFAULT_INTERVAL_SEC = 60
45
46# Timeout in minutes for upload attempts for a given folder.
47UPLOAD_TIMEOUT_MINS = 5
48
49class CsvNonexistenceException(Exception):
50 """Exception raised when csv files not found in GS."""
51
52
53class CsvFolder(object):
54 """A class contains the information of a folder storing csv files to be
55 uploaded, and logic to upload the csv files.
56 """
57
58 # A class variable whose value is the GoogleStorage path to the test
59 # results.
60 gs_path = None
61
62 # A class variable whose value is the cns path to upload the csv files to.
63 cns_path = None
64
65 def __init__(self, test_attribute_id, perf_csv_folder, test_view):
66 """Initialize a CsvFolder object.
67
68 @param test_attribute_id: ID of test attribute record.
69 @param perf_csv_folder: Path of the folder contains csv files in test
70 results. It's the value of perf_csv_folder attribute from
71 tko_test_attributes table.
72 @param test_view: A db object from querying tko_test_view_2 for the
73 related tko_test_attributes.
74 """
75 self.test_attribute_id = test_attribute_id
76 self.perf_csv_folder = perf_csv_folder
77 self.test_view = test_view
78
79
80 def __str__(self):
81 return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag,
82 self.perf_csv_folder)
83
84
85 def _get_url(self):
86 """Get the url to the folder storing csv files in GS.
87
88 The url can be formulated based on csv folder, test_name and hostname.
89 For example:
Dan Shifcd78ba2016-05-24 15:25:03 -070090 gs://chromeos-autotest-results/123-chromeos-test/host1/
Dan Shi5a1af082016-05-23 13:06:28 -070091 gsutil is used to download the csv files with this gs url.
92 """
Dan Shifcd78ba2016-05-24 15:25:03 -070093 return os.path.join(self.gs_path, self.test_view.job_tag)
Dan Shi5a1af082016-05-23 13:06:28 -070094
95
96 def _download(self, dest_dir):
97 """Download the folder containing csv files to the given dest_dir.
98
99 @param dest_dir: A directory to store the downloaded csv files.
100
101 @return: A list of strings, each is a path to a csv file in the
Dan Shifcd78ba2016-05-24 15:25:03 -0700102 downloaded folder.
103 @raise CsvNonexistenceException: If no csv file found in the GS.
Dan Shi5a1af082016-05-23 13:06:28 -0700104 """
Dan Shifcd78ba2016-05-24 15:25:03 -0700105 gs_url = self._get_url()
106 # Find all csv files in given GS url recursively
107 files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' %
Dan Shi109fc2e2016-06-14 18:40:19 -0700108 gs_url, ignore_status=True).stdout.strip().split('\n')
109 if not files or files == ['']:
Dan Shifcd78ba2016-05-24 15:25:03 -0700110 raise CsvNonexistenceException('No csv file found in %s', gs_url)
111
112 # Copy files from GS to temp_dir
113 for f in files:
114 utils.run('gsutil cp %s %s' % (f, dest_dir))
Dan Shi5a1af082016-05-23 13:06:28 -0700115
116
117 @retry.retry(Exception, blacklist=[CsvNonexistenceException],
118 timeout_min=UPLOAD_TIMEOUT_MINS)
119 def upload(self):
120 """Upload the folder to cns.
Dan Shi5a1af082016-05-23 13:06:28 -0700121 """
Dan Shifcd78ba2016-05-24 15:25:03 -0700122 temp_dir = tempfile.mkdtemp(suffix='perf_csv')
123 try:
124 self._download(temp_dir)
125 files = os.listdir(temp_dir)
126 # File in cns is stored under folder with format of:
127 # <test_name>/<host_name>/YYYY/mm/dd/hh/mm
128 path_in_cns = os.path.join(
129 self.cns_path,
130 self.test_view.test_name, self.test_view.hostname,
131 str(self.test_view.job_finished_time.year),
132 str(self.test_view.job_finished_time.month).zfill(2),
133 str(self.test_view.job_finished_time.day).zfill(2),
134 str(self.test_view.job_finished_time.hour).zfill(2),
135 str(self.test_view.job_finished_time.minute).zfill(2))
136 utils.run('fileutil mkdir -p %s' % path_in_cns)
137 for f in files:
Dan Shi109fc2e2016-06-14 18:40:19 -0700138 utils.run('fileutil copytodir -f %s %s' %
Dan Shifcd78ba2016-05-24 15:25:03 -0700139 (os.path.join(temp_dir, f), path_in_cns))
140 finally:
141 shutil.rmtree(temp_dir)
Dan Shi5a1af082016-05-23 13:06:28 -0700142
143
144class DBScanner(object):
145 """Class contains the logic to query tko_test_attributes table for
146 new perf_csv_folder attributes and create CsvFolder object for each
147 new perf_csv_folder attribute.
148 """
149
150 # Minimum test_attribute id for querying tko_test_attributes table.
151 min_test_attribute_id = -1
152
153 @classmethod
154 def get_perf_csv_folders(cls):
155 """Query tko_test_attributes table for new entries of perf_csv_folder.
156
157 @return: A list of CsvFolder objects for each new entry of
158 perf_csv_folder attribute in tko_test_attributes table.
159 """
160 attributes = tko_models.TestAttribute.objects.filter(
161 attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id)
162 folders = []
163
164 cutoff_time = (datetime.datetime.now() -
165 datetime.timedelta(hours=CUTOFF_TIME_HOURS))
166 for attribute in attributes:
167 test_views = tko_models.TestView.objects.filter(
168 test_idx=attribute.test_id)
169 if test_views[0].job_finished_time > cutoff_time:
170 continue
171 folders.append(CsvFolder(attribute.id, attribute.value,
172 test_views[0]))
173 return folders
174
175
176def setup_logging(log_dir):
177 """Setup logging information.
178
179 @param log_dir: Path to the directory storing logs of this script.
180 """
181 config = logging_config.LoggingConfig()
182 logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log')
183 config.add_file_handler(file_path=logfile, level=logging.DEBUG)
184
185
186def save_min_test_attribute_id(test_attribute_id_file):
187 """Save the minimum test attribute id to a cached file.
188
189 @param test_attribute_id_file: Path to the file storing the value of
190 min_test_attribute_id.
191 """
192 with open(test_attribute_id_file, 'w') as f:
193 return f.write(str(DBScanner.min_test_attribute_id))
194
195
196def get_min_test_attribute_id(test_attribute_id_file):
197 """Get the minimum test attribute id from a cached file.
198
199 @param test_attribute_id_file: Path to the file storing the value of
200 min_test_attribute_id.
201 """
202 try:
203 with open(test_attribute_id_file, 'r') as f:
204 return int(f.read())
205 except IOError:
206 # min_test_attribute_id has not been set, default to -1.
207 return -1
208
209
210def get_options():
211 """Get the command line options.
212
213 @return: Command line options of the script.
214 """
215 parser = argparse.ArgumentParser()
216 parser.add_argument('--gs_path', type=str, dest='gs_path',
217 help='GoogleStorage path that stores test results.')
218 parser.add_argument('--cns_path', type=str, dest='cns_path',
219 help='cns path to where csv files are uploaded to.')
220 parser.add_argument('--log_dir', type=str, dest='log_dir',
221 help='Directory used to store logs.')
222
223 options = parser.parse_args()
224 CsvFolder.gs_path = options.gs_path
225 CsvFolder.cns_path = options.cns_path
226
227 return options
228
229
230def main():
231 """Main process to repeat the workflow of searching/uploading csv files.
232 """
233 options = get_options()
234 setup_logging(options.log_dir)
235 test_attribute_id_file = os.path.join(options.log_dir,
236 'perf_csv_uploader_test_attr_id')
237 DBScanner.min_test_attribute_id = get_min_test_attribute_id(
238 test_attribute_id_file)
239
240 while True:
241 folders = DBScanner.get_perf_csv_folders()
242 if not folders:
Dan Shifcd78ba2016-05-24 15:25:03 -0700243 logging.info('No new folders found. Wait...')
Dan Shi5a1af082016-05-23 13:06:28 -0700244 time.sleep(DEFAULT_INTERVAL_SEC)
245 continue
246
247 failed_folders = []
248 for folder in folders:
249 try:
Dan Shifcd78ba2016-05-24 15:25:03 -0700250 logging.info('Uploading folder: %s', folder)
Dan Shi5a1af082016-05-23 13:06:28 -0700251 folder.upload()
252 except CsvNonexistenceException:
253 # Ignore the failure if CSV files are not found in GS.
254 pass
255 except Exception as e:
256 failed_folders.append(folder)
257 logging.error('Failed to upload folder %s, error: %s',
258 folder, e)
259 if failed_folders:
260 # Set the min_test_attribute_id to be the smallest one that failed
261 # to upload.
262 min_test_attribute_id = min([folder.test_attribute_id for folder in
263 failed_folders])
264 else:
265 min_test_attribute_id = max([folder.test_attribute_id for folder in
266 folders]) + 1
267 if DBScanner.min_test_attribute_id != min_test_attribute_id:
268 DBScanner.min_test_attribute_id = min_test_attribute_id
269 save_min_test_attribute_id(test_attribute_id_file)
270
271
272if __name__ == '__main__':
273 main()