blob: 57e37c4b5640590751f4deb28a5c5826c19b1980 [file] [log] [blame]
Alex Miller7d68a2e2014-05-12 11:36:18 -07001#!/usr/bin/python
Simran Basie129a962012-08-31 13:03:53 -07002# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Alex Miller7d68a2e2014-05-12 11:36:18 -07006import argparse
7import datetime
Jakob Juelich5050d952014-10-09 11:55:06 -07008import os
Alex Miller7d68a2e2014-05-12 11:36:18 -07009import re
10import sys
11import logging
Simran Basie129a962012-08-31 13:03:53 -070012
Jakob Juelich5050d952014-10-09 11:55:06 -070013os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings'
14
Simran Basie129a962012-08-31 13:03:53 -070015import common
Fang Deng001c4dd2016-02-24 13:55:44 -080016from autotest_lib.server import utils
Jakob Juelich5050d952014-10-09 11:55:06 -070017from django.db import connections, transaction
Simran Basie129a962012-08-31 13:03:53 -070018
19
20# Format Appears as: [Date] [Time] - [Msg Level] - [Message]
21LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
22# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
23DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
24 '|3[01])$')
Alex Miller7d68a2e2014-05-12 11:36:18 -070025SELECT_CMD_FORMAT = """
26SELECT %(table)s.%(primary_key)s FROM %(table)s
27WHERE %(table)s.%(time_column)s <= "%(date)s"
28"""
29SELECT_JOIN_CMD_FORMAT = """
30SELECT %(table)s.%(primary_key)s FROM %(table)s
31INNER JOIN %(related_table)s
32 ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s
33WHERE %(related_table)s.%(time_column)s <= "%(date)s"
34"""
35SELECT_WITH_INDIRECTION_FORMAT = """
36SELECT %(table)s.%(primary_key)s FROM %(table)s
37INNER JOIN %(indirection_table)s
38 ON %(table)s.%(foreign_key)s =
39 %(indirection_table)s.%(indirection_primary_key)s
40INNER JOIN %(related_table)s
41 ON %(indirection_table)s.%(indirection_foreign_key)s =
42 %(related_table)s.%(related_primary_key)s
43WHERE %(related_table)s.%(time_column)s <= "%(date)s"
44"""
45DELETE_ROWS_FORMAT = """
46DELETE FROM %(table)s
47WHERE %(table)s.%(primary_key)s IN (%(rows)s)
48"""
49
Simran Basie129a962012-08-31 13:03:53 -070050
51AFE_JOB_ID = 'afe_job_id'
52JOB_ID = 'job_id'
53JOB_IDX = 'job_idx'
54TEST_IDX = 'test_idx'
Jakob Juelich5050d952014-10-09 11:55:06 -070055
56# CAUTION: Make sure only the 'default' connection is used. Otherwise
57# db_cleanup may delete stuff from the global database, which is generally not
58# intended.
59cursor = connections['default'].cursor()
Alex Miller7d68a2e2014-05-12 11:36:18 -070060
61STEP_SIZE = None # Threading this through properly is disgusting.
62
63class ProgressBar(object):
64 TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})"
65
66 def __init__(self, name, amount):
67 self._name = name
68 self._amount = amount
69 self._cur = 0
70
71 def __enter__(self):
72 return self
73
74 def __exit__(self, a, b, c):
75 sys.stdout.write('\n')
76 sys.stdout.flush()
77
78 def update(self, x):
79 """
80 Advance the counter by `x`.
81
82 @param x: An integer of how many more elements were processed.
83 """
84 self._cur += x
85
86 def show(self):
87 """
88 Display the progress bar on the current line. Repeated invocations
89 "update" the display.
90 """
91 if self._amount == 0:
92 barlen = 20
93 else:
94 barlen = int(20 * self._cur / float(self._amount))
95 if barlen:
96 bartext = '=' * (barlen-1) + '>'
97 else:
98 bartext = ''
99 text = self.TEXT.format(self._name, bartext, self._cur, self._amount)
100 sys.stdout.write('\r')
101 sys.stdout.write(text)
102 sys.stdout.flush()
Simran Basie129a962012-08-31 13:03:53 -0700103
104
Alex Miller7d68a2e2014-05-12 11:36:18 -0700105def grouper(iterable, n):
106 """
107 Group the elements of `iterable` into groups of maximum size `n`.
108
109 @param iterable: An iterable.
110 @param n: Max size of returned groups.
111 @returns: Yields iterables of size <= n.
112
113 >>> grouper('ABCDEFG', 3)
114 [['A', 'B', C'], ['D', 'E', 'F'], ['G']]
115 """
116 args = [iter(iterable)] * n
117 while True:
118 lst = []
119 try:
120 for itr in args:
121 lst.append(next(itr))
122 yield lst
123 except StopIteration:
124 if lst:
125 yield lst
126 break
127
128
129def _delete_table_data_before_date(table_to_delete_from, primary_key,
130 related_table, related_primary_key,
131 date, foreign_key=None,
Simran Basie129a962012-08-31 13:03:53 -0700132 time_column="started_time",
133 indirection_table=None,
134 indirection_primary_key=None,
135 indirection_foreign_key=None):
136 """
137 We want a delete statement that will only delete from one table while
138 using a related table to find the rows to delete.
139
140 An example mysql command:
141 DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
142 tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
143 tko_tests.started_time <= '2012-02-01';
144
145 There are also tables that require 2 joins to determine which rows we want
146 to delete and we determine these rows by joining the table we want to
147 delete from with an indirection table to the actual jobs table.
148
149 @param table_to_delete_from: Table whose rows we want to delete.
150 @param related_table: Table with the date information we are selecting by.
151 @param foreign_key: Foreign key used in table_to_delete_from to reference
152 the related table. If None, the primary_key is used.
153 @param primary_key: Primary key in the related table.
154 @param date: End date of the information we are trying to delete.
155 @param time_column: Column that we want to use to compare the date to.
156 @param indirection_table: Table we use to link the data we are trying to
157 delete with the table with the date information.
158 @param indirection_primary_key: Key we use to connect the indirection table
159 to the table we are trying to delete rows
160 from.
161 @param indirection_foreign_key: Key we use to connect the indirection table
162 to the table with the date information.
163 """
164 if not foreign_key:
165 foreign_key = primary_key
Alex Miller7d68a2e2014-05-12 11:36:18 -0700166
Simran Basie129a962012-08-31 13:03:53 -0700167 if not related_table:
168 # Deleting from a table directly.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700169 variables = dict(table=table_to_delete_from, primary_key=primary_key,
170 time_column=time_column, date=date)
171 sql = SELECT_CMD_FORMAT % variables
172 elif not indirection_table:
Simran Basie129a962012-08-31 13:03:53 -0700173 # Deleting using a single JOIN to get the date information.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700174 variables = dict(primary_key=primary_key, table=table_to_delete_from,
175 foreign_key=foreign_key, related_table=related_table,
176 related_primary_key=related_primary_key,
177 time_column=time_column, date=date)
178 sql = SELECT_JOIN_CMD_FORMAT % variables
Simran Basie129a962012-08-31 13:03:53 -0700179 else:
180 # There are cases where we need to JOIN 3 TABLES to determine the rows
181 # we want to delete.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700182 variables = dict(primary_key=primary_key, table=table_to_delete_from,
183 indirection_table=indirection_table,
184 foreign_key=foreign_key,
185 indirection_primary_key=indirection_primary_key,
186 related_table=related_table,
187 related_primary_key=related_primary_key,
188 indirection_foreign_key=indirection_foreign_key,
189 time_column=time_column, date=date)
190 sql = SELECT_WITH_INDIRECTION_FORMAT % variables
191
Simran Basie129a962012-08-31 13:03:53 -0700192 logging.debug('SQL: %s', sql)
Jakob Juelich5050d952014-10-09 11:55:06 -0700193 cursor.execute(sql, [])
194 rows = [x[0] for x in cursor.fetchall()]
Alex Miller7d68a2e2014-05-12 11:36:18 -0700195 logging.debug(rows)
196
197 if not rows or rows == [None]:
198 with ProgressBar(table_to_delete_from, 0) as pb:
199 pb.show()
200 logging.debug('Noting to delete for %s', table_to_delete_from)
201 return
202
203 with ProgressBar(table_to_delete_from, len(rows)) as pb:
204 for row_keys in grouper(rows, STEP_SIZE):
205 variables['rows'] = ','.join([str(x) for x in row_keys])
206 sql = DELETE_ROWS_FORMAT % variables
207 logging.debug('SQL: %s', sql)
Jakob Juelich5050d952014-10-09 11:55:06 -0700208 cursor.execute(sql, [])
209 transaction.commit_unless_managed(using='default')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700210 pb.update(len(row_keys))
211 pb.show()
Simran Basie129a962012-08-31 13:03:53 -0700212
213
214def _subtract_days(date, days_to_subtract):
215 """
216 Return a date (string) that is 'days' before 'date'
217
218 @param date: date (string) we are subtracting from.
219 @param days_to_subtract: days (int) we are subtracting.
220 """
221 date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
222 difference = date_obj - datetime.timedelta(days=days_to_subtract)
223 return difference.strftime('%Y-%m-%d')
224
225
226def _delete_all_data_before_date(date):
227 """
228 Delete all the database data before a given date.
229
230 This function focuses predominately on the data for jobs in tko_jobs.
231 However not all jobs in afe_jobs are also in tko_jobs.
232
233 Therefore we delete all the afe_job and foreign key relations prior to two
234 days before date. Then we do the queries using tko_jobs and these
235 tables to ensure all the related information is gone. Even though we are
236 repeating deletes on these tables, the second delete will be quick and
237 completely thorough in ensuring we clean up all the foreign key
238 dependencies correctly.
239
240 @param date: End date of the information we are trying to delete.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700241 @param step: Rows to delete per SQL query.
Simran Basie129a962012-08-31 13:03:53 -0700242 """
243 # First cleanup all afe_job related data (prior to 2 days before date).
244 # The reason for this is not all afe_jobs may be in tko_jobs.
245 afe_date = _subtract_days(date, 2)
Fang Denga9815ac2015-03-11 14:26:10 -0700246 logging.info('Cleaning up all afe_job data prior to %s.', afe_date)
Simran Basie129a962012-08-31 13:03:53 -0700247 _delete_table_data_before_date('afe_aborted_host_queue_entries',
Alex Miller7d68a2e2014-05-12 11:36:18 -0700248 'queue_entry_id',
Simran Basie129a962012-08-31 13:03:53 -0700249 'afe_jobs', 'id', afe_date,
250 time_column= 'created_on',
251 foreign_key='queue_entry_id',
252 indirection_table='afe_host_queue_entries',
253 indirection_primary_key='id',
254 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700255 _delete_table_data_before_date('afe_special_tasks', 'id',
256 'afe_jobs', 'id',
Simran Basie129a962012-08-31 13:03:53 -0700257 afe_date, time_column='created_on',
258 foreign_key='queue_entry_id',
259 indirection_table='afe_host_queue_entries',
260 indirection_primary_key='id',
261 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700262 _delete_table_data_before_date('afe_host_queue_entries', 'id',
263 'afe_jobs', 'id',
Simran Basie129a962012-08-31 13:03:53 -0700264 afe_date, time_column='created_on',
265 foreign_key=JOB_ID)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700266 _delete_table_data_before_date('afe_job_keyvals', 'id',
267 'afe_jobs', 'id',
268 afe_date, time_column='created_on',
Simran Basie129a962012-08-31 13:03:53 -0700269 foreign_key=JOB_ID)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700270 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
271 'afe_jobs', 'id',
272 afe_date, time_column='created_on',
273 foreign_key=JOB_ID)
274 _delete_table_data_before_date('afe_jobs', 'id',
275 None, None,
276 afe_date, time_column='created_on')
277 # Special tasks that aren't associated with an HQE
278 # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit
279 # more than we should, but I doubt anyone will notice or care.
280 _delete_table_data_before_date('afe_special_tasks', 'id',
281 None, None,
282 afe_date, time_column='time_requested')
Simran Basie129a962012-08-31 13:03:53 -0700283
284 # Now go through and clean up all the rows related to tko_jobs prior to
285 # date.
Fang Denga9815ac2015-03-11 14:26:10 -0700286 logging.info('Cleaning up all data related to tko_jobs prior to %s.',
Simran Basie129a962012-08-31 13:03:53 -0700287 date)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700288 _delete_table_data_before_date('tko_test_attributes', 'id',
289 'tko_tests', TEST_IDX,
290 date, foreign_key=TEST_IDX)
291 _delete_table_data_before_date('tko_test_labels_tests', 'id',
292 'tko_tests', TEST_IDX,
293 date, foreign_key= 'test_id')
294 _delete_table_data_before_date('tko_iteration_result', TEST_IDX,
295 'tko_tests', TEST_IDX,
296 date)
297 _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX,
298 'tko_tests', TEST_IDX,
299 date)
300 _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX,
301 'tko_tests', TEST_IDX,
302 date)
303 _delete_table_data_before_date('tko_job_keyvals', 'id',
304 'tko_jobs', JOB_IDX,
Simran Basie129a962012-08-31 13:03:53 -0700305 date, foreign_key='job_id')
306 _delete_table_data_before_date('afe_aborted_host_queue_entries',
Alex Miller7d68a2e2014-05-12 11:36:18 -0700307 'queue_entry_id',
Simran Basie129a962012-08-31 13:03:53 -0700308 'tko_jobs', AFE_JOB_ID, date,
309 foreign_key='queue_entry_id',
310 indirection_table='afe_host_queue_entries',
311 indirection_primary_key='id',
312 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700313 _delete_table_data_before_date('afe_special_tasks', 'id',
314 'tko_jobs', AFE_JOB_ID,
Simran Basie129a962012-08-31 13:03:53 -0700315 date, foreign_key='queue_entry_id',
316 indirection_table='afe_host_queue_entries',
317 indirection_primary_key='id',
318 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700319 _delete_table_data_before_date('afe_host_queue_entries', 'id',
320 'tko_jobs', AFE_JOB_ID,
Simran Basie129a962012-08-31 13:03:53 -0700321 date, foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700322 _delete_table_data_before_date('afe_job_keyvals', 'id',
323 'tko_jobs', AFE_JOB_ID,
324 date, foreign_key='job_id')
325 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
326 'tko_jobs', AFE_JOB_ID,
327 date, foreign_key='job_id')
328 _delete_table_data_before_date('afe_jobs', 'id',
329 'tko_jobs', AFE_JOB_ID,
Simran Basie129a962012-08-31 13:03:53 -0700330 date, foreign_key='id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700331 _delete_table_data_before_date('tko_tests', TEST_IDX,
332 'tko_jobs', JOB_IDX,
333 date, foreign_key=JOB_IDX)
334 _delete_table_data_before_date('tko_jobs', JOB_IDX,
335 None, None, date)
336
337
338def parse_args():
339 """Parse command line arguments"""
340 parser = argparse.ArgumentParser()
341 parser.add_argument('-v', '--verbose', action='store_true',
342 help='Print SQL commands and results')
343 parser.add_argument('--step', type=int, action='store',
344 default=1000,
345 help='Number of rows to delete at once')
Fang Deng001c4dd2016-02-24 13:55:44 -0800346 parser.add_argument('-c', '--check_server', action='store_true',
347 help='Check if the server should run db clean up.')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700348 parser.add_argument('date', help='Keep results newer than')
349 return parser.parse_args()
Simran Basie129a962012-08-31 13:03:53 -0700350
351
Fang Deng001c4dd2016-02-24 13:55:44 -0800352def should_cleanup():
353 """Check if the server should run db_cleanup.
354
355 Only shard should clean up db.
356
357 @returns: True if it should run db cleanup otherwise False.
358 """
359 return utils.is_shard()
360
361
Simran Basie129a962012-08-31 13:03:53 -0700362def main():
Alex Miller7d68a2e2014-05-12 11:36:18 -0700363 """main"""
364 args = parse_args()
365
Fang Denga9815ac2015-03-11 14:26:10 -0700366 level = logging.DEBUG if args.verbose else logging.INFO
Alex Miller7d68a2e2014-05-12 11:36:18 -0700367 logging.basicConfig(level=level, format=LOGGING_FORMAT)
Fang Denga9815ac2015-03-11 14:26:10 -0700368 logging.info('Calling: %s', sys.argv)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700369
370 if not re.match(DATE_FORMAT_REGEX, args.date):
Simran Basie129a962012-08-31 13:03:53 -0700371 print 'DATE must be in yyyy-mm-dd format!'
372 return
Fang Deng001c4dd2016-02-24 13:55:44 -0800373 if args.check_server and not should_cleanup():
374 print 'Only shard can run db cleanup.'
375 return
Alex Miller7d68a2e2014-05-12 11:36:18 -0700376
377 global STEP_SIZE
378 STEP_SIZE = args.step
379 _delete_all_data_before_date(args.date)
Simran Basie129a962012-08-31 13:03:53 -0700380
381
382if __name__ == '__main__':
Dennis Jeffrey368c54b2013-07-24 11:19:03 -0700383 main()