blob: 57b40592abd07df723a0d94cc8cffe7ec551113a [file] [log] [blame]
Chris Sosa621509d2012-04-04 16:02:52 -07001#!/usr/bin/python
2
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module used to back up the mysql db and upload to Google Storage.
8
9Usage:
10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
11
12 gs_bucket may refer to a local location by omitting gs:// and giving a local
13 path if desired for testing. The example usage above creates a dump
14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
15 cleans up older dumps if there are more than 10 in that directory.
16"""
17
18import datetime
19from distutils import version
20import logging
21import optparse
22import os
23import tempfile
24
25import common
26
27from autotest_lib.client.common_lib import error
28from autotest_lib.client.common_lib import global_config, logging_manager, utils
29from autotest_lib.utils import test_importer
30
31
32_ATTEMPTS = 3
33_GSUTIL_BIN = 'gsutil'
Scott Zawalski6a6c9d12012-04-16 13:03:07 -040034_GS_BUCKET = 'gs://chromeos-lab/backup/database'
Scott Zawalski6ccf2842012-08-31 20:45:43 -040035# TODO(scottz): Should we need to ignore more than one database a general
36# function should be designed that lists tables in the database and properly
37# creates the --ignore-table= args to be passed to mysqldump.
38# Tables to ignore when dumping all databases.
39# performance_schema is an internal database that cannot be dumped
40IGNORE_TABLES = ['performance_schema.cond_instances',
41 'performance_schema.events_waits_current',
42 'performance_schema.cond_instances',
43 'performance_schema.events_waits_history',
44 'performance_schema.events_waits_history_long',
45 'performance_schema.events_waits_summary_by_instance',
46 ('performance_schema.'
47 'events_waits_summary_by_thread_by_event_name'),
48 'performance_schema.events_waits_summary_global_by_event_name',
49 'performance_schema.file_instances',
50 'performance_schema.file_summary_by_event_name',
51 'performance_schema.file_summary_by_instance',
52 'performance_schema.mutex_instances',
53 'performance_schema.performance_timers',
54 'performance_schema.rwlock_instances',
55 'performance_schema.setup_consumers',
56 'performance_schema.setup_instruments',
57 'performance_schema.setup_timers',
58 'performance_schema.threads']
Prashanth B064c9492014-09-06 15:29:32 -070059
60# Conventional mysqldump schedules.
Chris Sosa621509d2012-04-04 16:02:52 -070061_DAILY = 'daily'
62_WEEKLY = 'weekly'
63_MONTHLY = 'monthly'
Prashanth B064c9492014-09-06 15:29:32 -070064
65# Contrary to a conventional mysql dump which takes O(hours) on large databases,
66# a host dump is the cheapest form of backup possible. We dump the output of a
67# of a mysql command showing all hosts and their pool labels to a text file that
68# is backed up to google storage.
69_ONLY_HOSTS = 'only_hosts'
70_SCHEDULER_TYPES = [_ONLY_HOSTS, _DAILY, _WEEKLY, _MONTHLY]
Chris Sosa621509d2012-04-04 16:02:52 -070071
72
73class MySqlArchiver(object):
74 """Class that archives the Autotest MySQL DB to Google Storage.
75
76 Vars:
77 gs_dir: The path to the directory in Google Storage that this dump file
78 will be uploaded to.
79 number_to_keep: The number of dumps we should store.
80 """
81
82
83 def __init__(self, scheduled_type, number_to_keep, gs_bucket):
84 self._gs_dir = '/'.join([gs_bucket, scheduled_type])
85 self._number_to_keep = number_to_keep
Prashanth B064c9492014-09-06 15:29:32 -070086 self._type = scheduled_type
Chris Sosa621509d2012-04-04 16:02:52 -070087
88
89 @staticmethod
90 def _get_user_pass():
91 """Returns a tuple containing the user/pass to use to access the DB."""
92 user = global_config.global_config.get_config_value(
93 'CROS', 'db_backup_user')
94 password = global_config.global_config.get_config_value(
95 'CROS', 'db_backup_password')
96 return user, password
97
98
99 def create_mysql_dump(self):
100 """Returns the path to a mysql dump of the current autotest DB."""
101 user, password = self._get_user_pass()
102 _, filename = tempfile.mkstemp('autotest_db_dump')
103 logging.debug('Dumping mysql database to file %s', filename)
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400104 extra_dump_args = ''
105 for entry in IGNORE_TABLES:
106 extra_dump_args += '--ignore-table=%s ' % entry
107
Chris Sosa621509d2012-04-04 16:02:52 -0700108 utils.system('set -o pipefail; mysqldump --all-databases --user=%s '
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400109 '--password=%s %s | gzip - > %s' % (user, password,
110 extra_dump_args,
111 filename))
Chris Sosa621509d2012-04-04 16:02:52 -0700112 return filename
113
114
Prashanth B064c9492014-09-06 15:29:32 -0700115 def create_host_dump(self):
116 """Dumps hosts and their labels into a text file.
117
118 @return: The path to a tempfile containing a dump of
119 hosts and their pool labels.
120 """
121 user, password = self._get_user_pass()
122 _, filename = tempfile.mkstemp('autotest_db_dump')
123 logging.debug('Dumping hosts to file %s', filename)
124 utils.system(
125 'set -o pipefail; mysql -u %s -p%s chromeos_autotest_db -e '
126 '"select hostname, labels.name from afe_hosts as hosts join '
127 'afe_hosts_labels on hosts.id = afe_hosts_labels.host_id join '
128 'afe_labels as labels on labels.id = afe_hosts_labels.label_id '
129 'where labels.name like \'%%pool%%\';" > %s' %
130 (user, password, filename))
131 return filename
132
133
134 def dump(self):
135 """Creates a data dump based on the type of schedule.
136
137 @return: The path to a file containing the dump.
138 """
139 if self._type == _ONLY_HOSTS:
140 return self.create_host_dump()
141 return self.create_mysql_dump()
142
143
144 def _get_name(self):
145 """Returns the name of the dump as presented to google storage."""
146 file_type = 'gz' if self._type != _ONLY_HOSTS else 'txt'
147 return 'autotest-dump.%s.%s' % (
148 datetime.datetime.now().strftime('%y.%m.%d'), file_type)
Chris Sosa621509d2012-04-04 16:02:52 -0700149
150
151 @staticmethod
152 def _retry_run(cmd):
153 """Run the specified |cmd| string, retrying if necessary.
154
155 Args:
156 cmd: The command to run.
157 """
158 for attempt in range(_ATTEMPTS):
159 try:
160 return utils.system_output(cmd)
161 except error.CmdError:
162 if attempt == _ATTEMPTS - 1:
163 raise
164 else:
165 logging.error('Failed to run %r', cmd)
166
167
168 def upload_to_google_storage(self, dump_file):
Prashanth B064c9492014-09-06 15:29:32 -0700169 """Uploads the given |dump_file| to Google Storage.
170
171 @param dump_file: The path to the file containing the dump.
172 """
Chris Sosa621509d2012-04-04 16:02:52 -0700173 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
174 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
175 name=self._get_name(), gs_dir=self._gs_dir)
176 cmd = cmd % input_dict
177 logging.debug('Uploading mysql dump to google storage')
178 self._retry_run(cmd)
179 os.remove(dump_file)
180
181
182 def _get_gs_command(self, cmd):
183 """Returns an array representing the command for rm or ls."""
184 # Helpful code to allow us to test without gs.
185 assert cmd in ['rm', 'ls']
186 gs_bin = _GSUTIL_BIN
Scott Zawalski6a6c9d12012-04-16 13:03:07 -0400187 if self._gs_dir.startswith('gs://'):
188 cmd_array = [gs_bin, cmd]
189 else:
190 cmd_array = [cmd]
Chris Sosa621509d2012-04-04 16:02:52 -0700191
192 return cmd_array
193
194
195 def _do_ls(self):
196 """Returns the output of running ls on the gs bucket."""
197 cmd = self._get_gs_command('ls') + [self._gs_dir]
198 return self._retry_run(' '.join(cmd))
199
200
201 def cleanup(self):
202 """Cleans up the gs bucket to ensure we don't over archive."""
203 logging.debug('Cleaning up previously archived dump files.')
204 listing = self._do_ls()
205 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
206 if len(ordered_listing) < self._number_to_keep:
207 logging.debug('Cleanup found nothing to do.')
208 return
209
210 to_remove = ordered_listing[:-self._number_to_keep]
211 rm_cmd = self._get_gs_command('rm')
212 for artifact in to_remove:
213 cmd = ' '.join(rm_cmd + [self._gs_dir + '/' + artifact])
214 self._retry_run(cmd)
215
216
217def parse_options():
218 """Parses given options."""
219 parser = optparse.OptionParser()
220 parser.add_option('--gs_bucket', default=_GS_BUCKET,
221 help='Google storage bucket to store mysql db dumps.')
222 parser.add_option('--keep', default=10, type=int,
223 help='Number of dumps to keep of specified type.')
224 parser.add_option('--type', default=_DAILY,
225 help='The type of mysql dump to store.')
226 parser.add_option('--verbose', default=False, action='store_true',
227 help='Google storage bucket to store mysql db dumps.')
228 options = parser.parse_args()[0]
229 if options.type not in _SCHEDULER_TYPES:
230 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
231
232 return options
233
234
235def main():
236 """Runs the program."""
237 options = parse_options()
238 logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
239 verbose=options.verbose)
240 archiver = MySqlArchiver(options.type, options.keep, options.gs_bucket)
Prashanth B064c9492014-09-06 15:29:32 -0700241 dump_file = archiver.dump()
Chris Sosa621509d2012-04-04 16:02:52 -0700242 archiver.upload_to_google_storage(dump_file)
243 archiver.cleanup()
244
245
246if __name__ == '__main__':
247 main()