Mike Frysinger | d03e6b5 | 2019-08-03 12:49:01 -0400 | [diff] [blame] | 1 | #!/usr/bin/python2 |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 2 | |
| 3 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """Module used to back up the mysql db and upload to Google Storage. |
| 8 | |
| 9 | Usage: |
| 10 | backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10 |
| 11 | |
| 12 | gs_bucket may refer to a local location by omitting gs:// and giving a local |
| 13 | path if desired for testing. The example usage above creates a dump |
| 14 | of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and |
| 15 | cleans up older dumps if there are more than 10 in that directory. |
| 16 | """ |
| 17 | |
| 18 | import datetime |
| 19 | from distutils import version |
| 20 | import logging |
| 21 | import optparse |
| 22 | import os |
| 23 | import tempfile |
| 24 | |
| 25 | import common |
| 26 | |
| 27 | from autotest_lib.client.common_lib import error |
Shuqian Zhao | 3fc87b7 | 2017-09-28 15:40:28 -0700 | [diff] [blame] | 28 | from autotest_lib.client.common_lib import global_config |
| 29 | from autotest_lib.client.common_lib import logging_manager |
| 30 | from autotest_lib.client.common_lib import utils |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 31 | from autotest_lib.utils import test_importer |
| 32 | |
Shuqian Zhao | 3fc87b7 | 2017-09-28 15:40:28 -0700 | [diff] [blame] | 33 | from chromite.lib import metrics |
| 34 | from chromite.lib import ts_mon_config |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 35 | |
| 36 | _ATTEMPTS = 3 |
| 37 | _GSUTIL_BIN = 'gsutil' |
Scott Zawalski | 6a6c9d1 | 2012-04-16 13:03:07 -0400 | [diff] [blame] | 38 | _GS_BUCKET = 'gs://chromeos-lab/backup/database' |
Scott Zawalski | 6ccf284 | 2012-08-31 20:45:43 -0400 | [diff] [blame] | 39 | # TODO(scottz): Should we need to ignore more than one database a general |
| 40 | # function should be designed that lists tables in the database and properly |
| 41 | # creates the --ignore-table= args to be passed to mysqldump. |
| 42 | # Tables to ignore when dumping all databases. |
| 43 | # performance_schema is an internal database that cannot be dumped |
| 44 | IGNORE_TABLES = ['performance_schema.cond_instances', |
| 45 | 'performance_schema.events_waits_current', |
| 46 | 'performance_schema.cond_instances', |
| 47 | 'performance_schema.events_waits_history', |
| 48 | 'performance_schema.events_waits_history_long', |
| 49 | 'performance_schema.events_waits_summary_by_instance', |
| 50 | ('performance_schema.' |
| 51 | 'events_waits_summary_by_thread_by_event_name'), |
| 52 | 'performance_schema.events_waits_summary_global_by_event_name', |
| 53 | 'performance_schema.file_instances', |
| 54 | 'performance_schema.file_summary_by_event_name', |
| 55 | 'performance_schema.file_summary_by_instance', |
| 56 | 'performance_schema.mutex_instances', |
| 57 | 'performance_schema.performance_timers', |
| 58 | 'performance_schema.rwlock_instances', |
| 59 | 'performance_schema.setup_consumers', |
| 60 | 'performance_schema.setup_instruments', |
| 61 | 'performance_schema.setup_timers', |
| 62 | 'performance_schema.threads'] |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 63 | |
| 64 | # Conventional mysqldump schedules. |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 65 | _DAILY = 'daily' |
| 66 | _WEEKLY = 'weekly' |
| 67 | _MONTHLY = 'monthly' |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 68 | |
Jacob Kopczynski | 2871887 | 2018-08-20 14:54:44 -0700 | [diff] [blame] | 69 | # Dump of server db only |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 70 | _SERVER_DB = 'server_db' |
| 71 | |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 72 | # Contrary to a conventional mysql dump which takes O(hours) on large databases, |
| 73 | # a host dump is the cheapest form of backup possible. We dump the output of a |
| 74 | # of a mysql command showing all hosts and their pool labels to a text file that |
| 75 | # is backed up to google storage. |
| 76 | _ONLY_HOSTS = 'only_hosts' |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 77 | _ONLY_SHARDS = 'only_shards' |
Jacob Kopczynski | 8a329a9 | 2019-06-03 12:26:35 -0700 | [diff] [blame] | 78 | _SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS, |
Jacob Kopczynski | 1a05dc0 | 2018-09-02 09:30:42 -0700 | [diff] [blame] | 79 | _DAILY, _WEEKLY, _MONTHLY] |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 80 | |
| 81 | class BackupError(Exception): |
| 82 | """Raised for error occurred during backup.""" |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 83 | |
| 84 | |
| 85 | class MySqlArchiver(object): |
| 86 | """Class that archives the Autotest MySQL DB to Google Storage. |
| 87 | |
| 88 | Vars: |
| 89 | gs_dir: The path to the directory in Google Storage that this dump file |
| 90 | will be uploaded to. |
| 91 | number_to_keep: The number of dumps we should store. |
| 92 | """ |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 93 | _AUTOTEST_DB = "chromeos_autotest_db" |
| 94 | _SERVER_DB = "chromeos_lab_servers" |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 95 | |
| 96 | |
| 97 | def __init__(self, scheduled_type, number_to_keep, gs_bucket): |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 98 | # For conventional scheduled type, we back up all databases. |
| 99 | # self._db is only used when scheduled_type is not |
| 100 | # conventional scheduled type. |
| 101 | self._db = self._get_db_name(scheduled_type) |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 102 | self._gs_dir = '/'.join([gs_bucket, scheduled_type]) |
| 103 | self._number_to_keep = number_to_keep |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 104 | self._type = scheduled_type |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 105 | |
| 106 | |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 107 | @classmethod |
| 108 | def _get_db_name(cls, scheduled_type): |
| 109 | """Get the db name to backup. |
| 110 | |
| 111 | @param scheduled_type: one of _SCHEDULER_TYPES. |
| 112 | |
| 113 | @returns: The name of the db to backup. |
| 114 | Or None for backup all dbs. |
| 115 | """ |
| 116 | if scheduled_type == _SERVER_DB: |
| 117 | return cls._SERVER_DB |
| 118 | elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]: |
| 119 | return cls._AUTOTEST_DB |
| 120 | else: |
| 121 | return None |
| 122 | |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 123 | @staticmethod |
| 124 | def _get_user_pass(): |
| 125 | """Returns a tuple containing the user/pass to use to access the DB.""" |
| 126 | user = global_config.global_config.get_config_value( |
| 127 | 'CROS', 'db_backup_user') |
| 128 | password = global_config.global_config.get_config_value( |
| 129 | 'CROS', 'db_backup_password') |
| 130 | return user, password |
| 131 | |
| 132 | |
| 133 | def create_mysql_dump(self): |
| 134 | """Returns the path to a mysql dump of the current autotest DB.""" |
| 135 | user, password = self._get_user_pass() |
| 136 | _, filename = tempfile.mkstemp('autotest_db_dump') |
| 137 | logging.debug('Dumping mysql database to file %s', filename) |
Scott Zawalski | 6ccf284 | 2012-08-31 20:45:43 -0400 | [diff] [blame] | 138 | extra_dump_args = '' |
| 139 | for entry in IGNORE_TABLES: |
| 140 | extra_dump_args += '--ignore-table=%s ' % entry |
Jacob Kopczynski | 93cf7d6 | 2018-08-20 14:32:30 -0700 | [diff] [blame] | 141 | if self._type in [_WEEKLY, _MONTHLY]: |
Jacob Kopczynski | 2871887 | 2018-08-20 14:54:44 -0700 | [diff] [blame] | 142 | extra_dump_args += '--dump-slave ' |
Scott Zawalski | 6ccf284 | 2012-08-31 20:45:43 -0400 | [diff] [blame] | 143 | |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 144 | if not self._db: |
| 145 | extra_dump_args += "--all-databases" |
| 146 | db_name = self._db or '' |
| 147 | utils.system('set -o pipefail; mysqldump --user=%s ' |
| 148 | '--password=%s %s %s| gzip - > %s' % ( |
| 149 | user, password, extra_dump_args, db_name, filename)) |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 150 | return filename |
| 151 | |
| 152 | |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 153 | def _create_dump_from_query(self, query): |
| 154 | """Dumps result of a query into a text file. |
| 155 | |
| 156 | @param query: Query to execute. |
| 157 | |
| 158 | @return: The path to a tempfile containing the response of the query. |
| 159 | """ |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 160 | if not self._db: |
| 161 | raise BackupError("_create_dump_from_query requires a specific db.") |
| 162 | parameters = {'db': self._db, 'query': query} |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 163 | parameters['user'], parameters['password'] = self._get_user_pass() |
| 164 | _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump') |
| 165 | utils.system( |
| 166 | 'set -o pipefail; mysql -u %(user)s -p%(password)s ' |
Fang Deng | 4e6ba49 | 2015-12-11 15:44:04 -0800 | [diff] [blame] | 167 | '%(db)s -e "%(query)s" > %(filename)s' % |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 168 | parameters) |
| 169 | return parameters['filename'] |
| 170 | |
| 171 | |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 172 | def create_host_dump(self): |
| 173 | """Dumps hosts and their labels into a text file. |
| 174 | |
| 175 | @return: The path to a tempfile containing a dump of |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 176 | hosts and their pool labels. |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 177 | """ |
Xixuan Wu | c7049db | 2017-12-18 16:52:01 -0800 | [diff] [blame] | 178 | respect_static_labels = global_config.global_config.get_config_value( |
| 179 | 'SKYLAB', 'respect_static_labels', type=bool, default=False) |
| 180 | template = ('SELECT hosts.hostname, labels.name FROM afe_hosts AS ' |
| 181 | 'hosts JOIN %(hosts_labels_table)s AS hlt ON ' |
| 182 | 'hosts.id = hlt.host_id ' |
| 183 | 'JOIN %(labels_table)s AS labels ' |
| 184 | 'ON labels.id = hlt.%(column)s ' |
| 185 | 'WHERE labels.name LIKE \'%%pool%%\';') |
| 186 | if respect_static_labels: |
| 187 | # HACK: We're not checking the replaced_by_static_label on the |
| 188 | # pool label and just hard coding the fact that pool labels are |
| 189 | # indeed static labels. Expedience. |
| 190 | query = template % { |
| 191 | 'hosts_labels_table': 'afe_static_hosts_labels', |
| 192 | 'labels_table': 'afe_static_labels', |
| 193 | 'column': 'staticlabel_id', |
| 194 | } |
| 195 | else: |
| 196 | query = template % { |
| 197 | 'hosts_labels_table': 'afe_hosts_labels', |
| 198 | 'labels_table': 'afe_labels', |
| 199 | 'column': 'label_id', |
| 200 | } |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 201 | return self._create_dump_from_query(query) |
| 202 | |
| 203 | |
| 204 | def create_shards_dump(self): |
| 205 | """Dumps shards and their labels into a text file. |
| 206 | |
| 207 | @return: The path to a tempfile containing a dump of |
| 208 | shards and their labels. |
| 209 | """ |
| 210 | query = ('SELECT hostname, labels.name FROM afe_shards AS shards ' |
| 211 | 'JOIN afe_shards_labels ' |
| 212 | 'ON shards.id = afe_shards_labels.shard_id ' |
| 213 | 'JOIN afe_labels AS labels ' |
| 214 | 'ON labels.id = afe_shards_labels.label_id;') |
| 215 | return self._create_dump_from_query(query) |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 216 | |
| 217 | |
| 218 | def dump(self): |
| 219 | """Creates a data dump based on the type of schedule. |
| 220 | |
| 221 | @return: The path to a file containing the dump. |
| 222 | """ |
| 223 | if self._type == _ONLY_HOSTS: |
| 224 | return self.create_host_dump() |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 225 | if self._type == _ONLY_SHARDS: |
| 226 | return self.create_shards_dump() |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 227 | return self.create_mysql_dump() |
| 228 | |
| 229 | |
| 230 | def _get_name(self): |
| 231 | """Returns the name of the dump as presented to google storage.""" |
Jakob Juelich | e0e9c93 | 2014-09-24 14:40:48 -0700 | [diff] [blame] | 232 | if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]: |
| 233 | file_type = 'txt' |
| 234 | else: |
| 235 | file_type = 'gz' |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 236 | return 'autotest-dump.%s.%s' % ( |
| 237 | datetime.datetime.now().strftime('%y.%m.%d'), file_type) |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 238 | |
| 239 | |
| 240 | @staticmethod |
| 241 | def _retry_run(cmd): |
| 242 | """Run the specified |cmd| string, retrying if necessary. |
| 243 | |
| 244 | Args: |
| 245 | cmd: The command to run. |
| 246 | """ |
| 247 | for attempt in range(_ATTEMPTS): |
| 248 | try: |
| 249 | return utils.system_output(cmd) |
| 250 | except error.CmdError: |
| 251 | if attempt == _ATTEMPTS - 1: |
| 252 | raise |
| 253 | else: |
| 254 | logging.error('Failed to run %r', cmd) |
| 255 | |
| 256 | |
| 257 | def upload_to_google_storage(self, dump_file): |
Prashanth B | 064c949 | 2014-09-06 15:29:32 -0700 | [diff] [blame] | 258 | """Uploads the given |dump_file| to Google Storage. |
| 259 | |
| 260 | @param dump_file: The path to the file containing the dump. |
| 261 | """ |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 262 | cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s' |
| 263 | input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file, |
| 264 | name=self._get_name(), gs_dir=self._gs_dir) |
| 265 | cmd = cmd % input_dict |
| 266 | logging.debug('Uploading mysql dump to google storage') |
| 267 | self._retry_run(cmd) |
| 268 | os.remove(dump_file) |
| 269 | |
| 270 | |
| 271 | def _get_gs_command(self, cmd): |
| 272 | """Returns an array representing the command for rm or ls.""" |
| 273 | # Helpful code to allow us to test without gs. |
| 274 | assert cmd in ['rm', 'ls'] |
| 275 | gs_bin = _GSUTIL_BIN |
Scott Zawalski | 6a6c9d1 | 2012-04-16 13:03:07 -0400 | [diff] [blame] | 276 | if self._gs_dir.startswith('gs://'): |
| 277 | cmd_array = [gs_bin, cmd] |
| 278 | else: |
| 279 | cmd_array = [cmd] |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 280 | |
| 281 | return cmd_array |
| 282 | |
| 283 | |
| 284 | def _do_ls(self): |
| 285 | """Returns the output of running ls on the gs bucket.""" |
| 286 | cmd = self._get_gs_command('ls') + [self._gs_dir] |
| 287 | return self._retry_run(' '.join(cmd)) |
| 288 | |
| 289 | |
| 290 | def cleanup(self): |
| 291 | """Cleans up the gs bucket to ensure we don't over archive.""" |
| 292 | logging.debug('Cleaning up previously archived dump files.') |
| 293 | listing = self._do_ls() |
| 294 | ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion) |
| 295 | if len(ordered_listing) < self._number_to_keep: |
| 296 | logging.debug('Cleanup found nothing to do.') |
| 297 | return |
| 298 | |
| 299 | to_remove = ordered_listing[:-self._number_to_keep] |
| 300 | rm_cmd = self._get_gs_command('rm') |
| 301 | for artifact in to_remove: |
Shuqian Zhao | 3fc87b7 | 2017-09-28 15:40:28 -0700 | [diff] [blame] | 302 | cmd = ' '.join(rm_cmd + [artifact]) |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 303 | self._retry_run(cmd) |
| 304 | |
| 305 | |
| 306 | def parse_options(): |
| 307 | """Parses given options.""" |
| 308 | parser = optparse.OptionParser() |
| 309 | parser.add_option('--gs_bucket', default=_GS_BUCKET, |
| 310 | help='Google storage bucket to store mysql db dumps.') |
| 311 | parser.add_option('--keep', default=10, type=int, |
| 312 | help='Number of dumps to keep of specified type.') |
| 313 | parser.add_option('--type', default=_DAILY, |
| 314 | help='The type of mysql dump to store.') |
| 315 | parser.add_option('--verbose', default=False, action='store_true', |
| 316 | help='Google storage bucket to store mysql db dumps.') |
| 317 | options = parser.parse_args()[0] |
| 318 | if options.type not in _SCHEDULER_TYPES: |
| 319 | parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES)) |
| 320 | |
| 321 | return options |
| 322 | |
| 323 | |
| 324 | def main(): |
| 325 | """Runs the program.""" |
| 326 | options = parse_options() |
| 327 | logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(), |
| 328 | verbose=options.verbose) |
Shuqian Zhao | 3fc87b7 | 2017-09-28 15:40:28 -0700 | [diff] [blame] | 329 | backup_succeeded = False |
| 330 | |
| 331 | with ts_mon_config.SetupTsMonGlobalState(service_name='mysql_db_backup', |
| 332 | indirect=True): |
| 333 | with metrics.SecondsTimer( |
| 334 | 'chromeos/autotest/afe_db/backup/durations', |
| 335 | fields={'type': options.type}): |
| 336 | try: |
| 337 | logging.debug('Start db backup: %s', options.type) |
| 338 | archiver = MySqlArchiver( |
| 339 | options.type, options.keep, options.gs_bucket) |
| 340 | dump_file = archiver.dump() |
| 341 | logging.debug('Uploading backup: %s', options.type) |
| 342 | archiver.upload_to_google_storage(dump_file) |
| 343 | archiver.cleanup() |
| 344 | logging.debug('Db backup completed: %s', options.type) |
| 345 | backup_succeeded = True |
| 346 | finally: |
| 347 | metrics.Counter( |
| 348 | 'chromeos/autotest/db/db_backup/completed').increment( |
| 349 | fields={'success': backup_succeeded, |
| 350 | 'type': options.type}) |
Chris Sosa | 621509d | 2012-04-04 16:02:52 -0700 | [diff] [blame] | 351 | |
| 352 | |
| 353 | if __name__ == '__main__': |
| 354 | main() |