blob: d7b05b00b7d0bac9bcb84fbc5e847ac6d516dfc9 [file] [log] [blame]
Chris Sosa621509d2012-04-04 16:02:52 -07001#!/usr/bin/python
2
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module used to back up the mysql db and upload to Google Storage.
8
9Usage:
10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
11
12 gs_bucket may refer to a local location by omitting gs:// and giving a local
13 path if desired for testing. The example usage above creates a dump
14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
15 cleans up older dumps if there are more than 10 in that directory.
16"""
17
18import datetime
19from distutils import version
20import logging
21import optparse
22import os
23import tempfile
24
25import common
26
27from autotest_lib.client.common_lib import error
28from autotest_lib.client.common_lib import global_config, logging_manager, utils
29from autotest_lib.utils import test_importer
30
31
32_ATTEMPTS = 3
33_GSUTIL_BIN = 'gsutil'
Scott Zawalski6a6c9d12012-04-16 13:03:07 -040034_GS_BUCKET = 'gs://chromeos-lab/backup/database'
Scott Zawalski6ccf2842012-08-31 20:45:43 -040035# TODO(scottz): Should we need to ignore more than one database a general
36# function should be designed that lists tables in the database and properly
37# creates the --ignore-table= args to be passed to mysqldump.
38# Tables to ignore when dumping all databases.
39# performance_schema is an internal database that cannot be dumped
40IGNORE_TABLES = ['performance_schema.cond_instances',
41 'performance_schema.events_waits_current',
42 'performance_schema.cond_instances',
43 'performance_schema.events_waits_history',
44 'performance_schema.events_waits_history_long',
45 'performance_schema.events_waits_summary_by_instance',
46 ('performance_schema.'
47 'events_waits_summary_by_thread_by_event_name'),
48 'performance_schema.events_waits_summary_global_by_event_name',
49 'performance_schema.file_instances',
50 'performance_schema.file_summary_by_event_name',
51 'performance_schema.file_summary_by_instance',
52 'performance_schema.mutex_instances',
53 'performance_schema.performance_timers',
54 'performance_schema.rwlock_instances',
55 'performance_schema.setup_consumers',
56 'performance_schema.setup_instruments',
57 'performance_schema.setup_timers',
58 'performance_schema.threads']
Chris Sosa621509d2012-04-04 16:02:52 -070059_DAILY = 'daily'
60_WEEKLY = 'weekly'
61_MONTHLY = 'monthly'
62_SCHEDULER_TYPES = [_DAILY, _WEEKLY, _MONTHLY]
63
64
65class MySqlArchiver(object):
66 """Class that archives the Autotest MySQL DB to Google Storage.
67
68 Vars:
69 gs_dir: The path to the directory in Google Storage that this dump file
70 will be uploaded to.
71 number_to_keep: The number of dumps we should store.
72 """
73
74
75 def __init__(self, scheduled_type, number_to_keep, gs_bucket):
76 self._gs_dir = '/'.join([gs_bucket, scheduled_type])
77 self._number_to_keep = number_to_keep
78
79
80 @staticmethod
81 def _get_user_pass():
82 """Returns a tuple containing the user/pass to use to access the DB."""
83 user = global_config.global_config.get_config_value(
84 'CROS', 'db_backup_user')
85 password = global_config.global_config.get_config_value(
86 'CROS', 'db_backup_password')
87 return user, password
88
89
90 def create_mysql_dump(self):
91 """Returns the path to a mysql dump of the current autotest DB."""
92 user, password = self._get_user_pass()
93 _, filename = tempfile.mkstemp('autotest_db_dump')
94 logging.debug('Dumping mysql database to file %s', filename)
Scott Zawalski6ccf2842012-08-31 20:45:43 -040095 extra_dump_args = ''
96 for entry in IGNORE_TABLES:
97 extra_dump_args += '--ignore-table=%s ' % entry
98
Chris Sosa621509d2012-04-04 16:02:52 -070099 utils.system('set -o pipefail; mysqldump --all-databases --user=%s '
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400100 '--password=%s %s | gzip - > %s' % (user, password,
101 extra_dump_args,
102 filename))
Chris Sosa621509d2012-04-04 16:02:52 -0700103 return filename
104
105
106 @staticmethod
107 def _get_name():
108 """Returns the name to use for this mysql dump."""
109 return 'autotest-dump.%s.gz' % (
110 datetime.datetime.now().strftime('%y.%m.%d'))
111
112
113 @staticmethod
114 def _retry_run(cmd):
115 """Run the specified |cmd| string, retrying if necessary.
116
117 Args:
118 cmd: The command to run.
119 """
120 for attempt in range(_ATTEMPTS):
121 try:
122 return utils.system_output(cmd)
123 except error.CmdError:
124 if attempt == _ATTEMPTS - 1:
125 raise
126 else:
127 logging.error('Failed to run %r', cmd)
128
129
130 def upload_to_google_storage(self, dump_file):
131 """Uploads the given |dump_file| to Google Storage."""
132 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
133 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
134 name=self._get_name(), gs_dir=self._gs_dir)
135 cmd = cmd % input_dict
136 logging.debug('Uploading mysql dump to google storage')
137 self._retry_run(cmd)
138 os.remove(dump_file)
139
140
141 def _get_gs_command(self, cmd):
142 """Returns an array representing the command for rm or ls."""
143 # Helpful code to allow us to test without gs.
144 assert cmd in ['rm', 'ls']
145 gs_bin = _GSUTIL_BIN
Scott Zawalski6a6c9d12012-04-16 13:03:07 -0400146 if self._gs_dir.startswith('gs://'):
147 cmd_array = [gs_bin, cmd]
148 else:
149 cmd_array = [cmd]
Chris Sosa621509d2012-04-04 16:02:52 -0700150
151 return cmd_array
152
153
154 def _do_ls(self):
155 """Returns the output of running ls on the gs bucket."""
156 cmd = self._get_gs_command('ls') + [self._gs_dir]
157 return self._retry_run(' '.join(cmd))
158
159
160 def cleanup(self):
161 """Cleans up the gs bucket to ensure we don't over archive."""
162 logging.debug('Cleaning up previously archived dump files.')
163 listing = self._do_ls()
164 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
165 if len(ordered_listing) < self._number_to_keep:
166 logging.debug('Cleanup found nothing to do.')
167 return
168
169 to_remove = ordered_listing[:-self._number_to_keep]
170 rm_cmd = self._get_gs_command('rm')
171 for artifact in to_remove:
172 cmd = ' '.join(rm_cmd + [self._gs_dir + '/' + artifact])
173 self._retry_run(cmd)
174
175
176def parse_options():
177 """Parses given options."""
178 parser = optparse.OptionParser()
179 parser.add_option('--gs_bucket', default=_GS_BUCKET,
180 help='Google storage bucket to store mysql db dumps.')
181 parser.add_option('--keep', default=10, type=int,
182 help='Number of dumps to keep of specified type.')
183 parser.add_option('--type', default=_DAILY,
184 help='The type of mysql dump to store.')
185 parser.add_option('--verbose', default=False, action='store_true',
186 help='Google storage bucket to store mysql db dumps.')
187 options = parser.parse_args()[0]
188 if options.type not in _SCHEDULER_TYPES:
189 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
190
191 return options
192
193
194def main():
195 """Runs the program."""
196 options = parse_options()
197 logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
198 verbose=options.verbose)
199 archiver = MySqlArchiver(options.type, options.keep, options.gs_bucket)
200 dump_file = archiver.create_mysql_dump()
201 archiver.upload_to_google_storage(dump_file)
202 archiver.cleanup()
203
204
205if __name__ == '__main__':
206 main()