Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """ |
| 8 | This module is used to upload csv files generated by performance related tests |
| 9 | to cns. More details about the implementation can be found in crbug.com/598504. |
| 10 | |
| 11 | The overall work flow is as follows. |
| 12 | 1. Query tko_test_attributes table for perf_csv_folder attribute. The attribute |
| 13 | contains a path to csv files need to be uploaded to cns. |
| 14 | 2. Filter the perf_csv_folder attributes only for test jobs have finished an |
| 15 | hour before. This is to make sure the results have already being uploaded to GS. |
| 16 | 3. Locate the csv files in GS, and upload them to desired cns location. |
| 17 | |
| 18 | After every run, the script saves the maximum test idx to a local file, and |
| 19 | repeats the workflow. |
| 20 | |
| 21 | """ |
| 22 | |
| 23 | import argparse |
| 24 | import datetime |
| 25 | import logging |
| 26 | import os |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 27 | import shutil |
| 28 | import tempfile |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 29 | import time |
| 30 | |
| 31 | import common |
| 32 | from autotest_lib.client.bin import utils |
| 33 | from autotest_lib.client.common_lib import logging_config |
| 34 | from autotest_lib.client.common_lib.cros import retry |
| 35 | from autotest_lib.frontend import setup_django_environment |
| 36 | from autotest_lib.frontend.tko import models as tko_models |
| 37 | |
| 38 | |
| 39 | # Number of hours that a test has to be finished for the script to process. |
| 40 | # This allows gs_offloader to have enough time to upload the results to GS. |
| 41 | CUTOFF_TIME_HOURS = 1 |
| 42 | |
| 43 | # Default wait time in seconds after each run. |
| 44 | DEFAULT_INTERVAL_SEC = 60 |
| 45 | |
| 46 | # Timeout in minutes for upload attempts for a given folder. |
| 47 | UPLOAD_TIMEOUT_MINS = 5 |
| 48 | |
| 49 | class CsvNonexistenceException(Exception): |
| 50 | """Exception raised when csv files not found in GS.""" |
| 51 | |
| 52 | |
| 53 | class CsvFolder(object): |
| 54 | """A class contains the information of a folder storing csv files to be |
| 55 | uploaded, and logic to upload the csv files. |
| 56 | """ |
| 57 | |
| 58 | # A class variable whose value is the GoogleStorage path to the test |
| 59 | # results. |
| 60 | gs_path = None |
| 61 | |
| 62 | # A class variable whose value is the cns path to upload the csv files to. |
| 63 | cns_path = None |
| 64 | |
| 65 | def __init__(self, test_attribute_id, perf_csv_folder, test_view): |
| 66 | """Initialize a CsvFolder object. |
| 67 | |
| 68 | @param test_attribute_id: ID of test attribute record. |
| 69 | @param perf_csv_folder: Path of the folder contains csv files in test |
| 70 | results. It's the value of perf_csv_folder attribute from |
| 71 | tko_test_attributes table. |
| 72 | @param test_view: A db object from querying tko_test_view_2 for the |
| 73 | related tko_test_attributes. |
| 74 | """ |
| 75 | self.test_attribute_id = test_attribute_id |
| 76 | self.perf_csv_folder = perf_csv_folder |
| 77 | self.test_view = test_view |
| 78 | |
| 79 | |
| 80 | def __str__(self): |
| 81 | return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag, |
| 82 | self.perf_csv_folder) |
| 83 | |
| 84 | |
| 85 | def _get_url(self): |
| 86 | """Get the url to the folder storing csv files in GS. |
| 87 | |
| 88 | The url can be formulated based on csv folder, test_name and hostname. |
| 89 | For example: |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 90 | gs://chromeos-autotest-results/123-chromeos-test/host1/ |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 91 | gsutil is used to download the csv files with this gs url. |
| 92 | """ |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 93 | return os.path.join(self.gs_path, self.test_view.job_tag) |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 94 | |
| 95 | |
| 96 | def _download(self, dest_dir): |
| 97 | """Download the folder containing csv files to the given dest_dir. |
| 98 | |
| 99 | @param dest_dir: A directory to store the downloaded csv files. |
| 100 | |
| 101 | @return: A list of strings, each is a path to a csv file in the |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 102 | downloaded folder. |
| 103 | @raise CsvNonexistenceException: If no csv file found in the GS. |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 104 | """ |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 105 | gs_url = self._get_url() |
| 106 | # Find all csv files in given GS url recursively |
| 107 | files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' % |
Dan Shi | 109fc2e | 2016-06-14 18:40:19 -0700 | [diff] [blame] | 108 | gs_url, ignore_status=True).stdout.strip().split('\n') |
| 109 | if not files or files == ['']: |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 110 | raise CsvNonexistenceException('No csv file found in %s', gs_url) |
| 111 | |
| 112 | # Copy files from GS to temp_dir |
| 113 | for f in files: |
| 114 | utils.run('gsutil cp %s %s' % (f, dest_dir)) |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 115 | |
| 116 | |
| 117 | @retry.retry(Exception, blacklist=[CsvNonexistenceException], |
| 118 | timeout_min=UPLOAD_TIMEOUT_MINS) |
| 119 | def upload(self): |
| 120 | """Upload the folder to cns. |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 121 | """ |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 122 | temp_dir = tempfile.mkdtemp(suffix='perf_csv') |
| 123 | try: |
| 124 | self._download(temp_dir) |
| 125 | files = os.listdir(temp_dir) |
| 126 | # File in cns is stored under folder with format of: |
| 127 | # <test_name>/<host_name>/YYYY/mm/dd/hh/mm |
| 128 | path_in_cns = os.path.join( |
| 129 | self.cns_path, |
| 130 | self.test_view.test_name, self.test_view.hostname, |
| 131 | str(self.test_view.job_finished_time.year), |
| 132 | str(self.test_view.job_finished_time.month).zfill(2), |
| 133 | str(self.test_view.job_finished_time.day).zfill(2), |
| 134 | str(self.test_view.job_finished_time.hour).zfill(2), |
| 135 | str(self.test_view.job_finished_time.minute).zfill(2)) |
| 136 | utils.run('fileutil mkdir -p %s' % path_in_cns) |
| 137 | for f in files: |
Dan Shi | 109fc2e | 2016-06-14 18:40:19 -0700 | [diff] [blame] | 138 | utils.run('fileutil copytodir -f %s %s' % |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 139 | (os.path.join(temp_dir, f), path_in_cns)) |
| 140 | finally: |
| 141 | shutil.rmtree(temp_dir) |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 142 | |
| 143 | |
| 144 | class DBScanner(object): |
| 145 | """Class contains the logic to query tko_test_attributes table for |
| 146 | new perf_csv_folder attributes and create CsvFolder object for each |
| 147 | new perf_csv_folder attribute. |
| 148 | """ |
| 149 | |
| 150 | # Minimum test_attribute id for querying tko_test_attributes table. |
| 151 | min_test_attribute_id = -1 |
| 152 | |
| 153 | @classmethod |
| 154 | def get_perf_csv_folders(cls): |
| 155 | """Query tko_test_attributes table for new entries of perf_csv_folder. |
| 156 | |
| 157 | @return: A list of CsvFolder objects for each new entry of |
| 158 | perf_csv_folder attribute in tko_test_attributes table. |
| 159 | """ |
| 160 | attributes = tko_models.TestAttribute.objects.filter( |
| 161 | attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id) |
| 162 | folders = [] |
| 163 | |
| 164 | cutoff_time = (datetime.datetime.now() - |
| 165 | datetime.timedelta(hours=CUTOFF_TIME_HOURS)) |
| 166 | for attribute in attributes: |
| 167 | test_views = tko_models.TestView.objects.filter( |
| 168 | test_idx=attribute.test_id) |
| 169 | if test_views[0].job_finished_time > cutoff_time: |
| 170 | continue |
| 171 | folders.append(CsvFolder(attribute.id, attribute.value, |
| 172 | test_views[0])) |
| 173 | return folders |
| 174 | |
| 175 | |
| 176 | def setup_logging(log_dir): |
| 177 | """Setup logging information. |
| 178 | |
| 179 | @param log_dir: Path to the directory storing logs of this script. |
| 180 | """ |
| 181 | config = logging_config.LoggingConfig() |
| 182 | logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log') |
| 183 | config.add_file_handler(file_path=logfile, level=logging.DEBUG) |
| 184 | |
| 185 | |
| 186 | def save_min_test_attribute_id(test_attribute_id_file): |
| 187 | """Save the minimum test attribute id to a cached file. |
| 188 | |
| 189 | @param test_attribute_id_file: Path to the file storing the value of |
| 190 | min_test_attribute_id. |
| 191 | """ |
| 192 | with open(test_attribute_id_file, 'w') as f: |
| 193 | return f.write(str(DBScanner.min_test_attribute_id)) |
| 194 | |
| 195 | |
| 196 | def get_min_test_attribute_id(test_attribute_id_file): |
| 197 | """Get the minimum test attribute id from a cached file. |
| 198 | |
| 199 | @param test_attribute_id_file: Path to the file storing the value of |
| 200 | min_test_attribute_id. |
| 201 | """ |
| 202 | try: |
| 203 | with open(test_attribute_id_file, 'r') as f: |
| 204 | return int(f.read()) |
| 205 | except IOError: |
| 206 | # min_test_attribute_id has not been set, default to -1. |
| 207 | return -1 |
| 208 | |
| 209 | |
| 210 | def get_options(): |
| 211 | """Get the command line options. |
| 212 | |
| 213 | @return: Command line options of the script. |
| 214 | """ |
| 215 | parser = argparse.ArgumentParser() |
| 216 | parser.add_argument('--gs_path', type=str, dest='gs_path', |
| 217 | help='GoogleStorage path that stores test results.') |
| 218 | parser.add_argument('--cns_path', type=str, dest='cns_path', |
| 219 | help='cns path to where csv files are uploaded to.') |
| 220 | parser.add_argument('--log_dir', type=str, dest='log_dir', |
| 221 | help='Directory used to store logs.') |
| 222 | |
| 223 | options = parser.parse_args() |
| 224 | CsvFolder.gs_path = options.gs_path |
| 225 | CsvFolder.cns_path = options.cns_path |
| 226 | |
| 227 | return options |
| 228 | |
| 229 | |
| 230 | def main(): |
| 231 | """Main process to repeat the workflow of searching/uploading csv files. |
| 232 | """ |
| 233 | options = get_options() |
| 234 | setup_logging(options.log_dir) |
| 235 | test_attribute_id_file = os.path.join(options.log_dir, |
| 236 | 'perf_csv_uploader_test_attr_id') |
| 237 | DBScanner.min_test_attribute_id = get_min_test_attribute_id( |
| 238 | test_attribute_id_file) |
| 239 | |
| 240 | while True: |
| 241 | folders = DBScanner.get_perf_csv_folders() |
| 242 | if not folders: |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 243 | logging.info('No new folders found. Wait...') |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 244 | time.sleep(DEFAULT_INTERVAL_SEC) |
| 245 | continue |
| 246 | |
| 247 | failed_folders = [] |
| 248 | for folder in folders: |
| 249 | try: |
Dan Shi | fcd78ba | 2016-05-24 15:25:03 -0700 | [diff] [blame] | 250 | logging.info('Uploading folder: %s', folder) |
Dan Shi | 5a1af08 | 2016-05-23 13:06:28 -0700 | [diff] [blame] | 251 | folder.upload() |
| 252 | except CsvNonexistenceException: |
| 253 | # Ignore the failure if CSV files are not found in GS. |
| 254 | pass |
| 255 | except Exception as e: |
| 256 | failed_folders.append(folder) |
| 257 | logging.error('Failed to upload folder %s, error: %s', |
| 258 | folder, e) |
| 259 | if failed_folders: |
| 260 | # Set the min_test_attribute_id to be the smallest one that failed |
| 261 | # to upload. |
| 262 | min_test_attribute_id = min([folder.test_attribute_id for folder in |
| 263 | failed_folders]) |
| 264 | else: |
| 265 | min_test_attribute_id = max([folder.test_attribute_id for folder in |
| 266 | folders]) + 1 |
| 267 | if DBScanner.min_test_attribute_id != min_test_attribute_id: |
| 268 | DBScanner.min_test_attribute_id = min_test_attribute_id |
| 269 | save_min_test_attribute_id(test_attribute_id_file) |
| 270 | |
| 271 | |
| 272 | if __name__ == '__main__': |
| 273 | main() |