blob: c2f951025d38d966e693449dfeddc6bb8a223501 [file] [log] [blame]
Mike Frysingerd03e6b52019-08-03 12:49:01 -04001#!/usr/bin/python2
Simran Basie129a962012-08-31 13:03:53 -07002# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Alex Miller7d68a2e2014-05-12 11:36:18 -07006import argparse
7import datetime
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -07008import logging
Jakob Juelich5050d952014-10-09 11:55:06 -07009import os
Alex Miller7d68a2e2014-05-12 11:36:18 -070010import re
11import sys
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -070012import time
Simran Basie129a962012-08-31 13:03:53 -070013
Jakob Juelich5050d952014-10-09 11:55:06 -070014os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings'
15
Simran Basie129a962012-08-31 13:03:53 -070016import common
Fang Deng001c4dd2016-02-24 13:55:44 -080017from autotest_lib.server import utils
Jakob Juelich5050d952014-10-09 11:55:06 -070018from django.db import connections, transaction
Simran Basie129a962012-08-31 13:03:53 -070019
20
21# Format Appears as: [Date] [Time] - [Msg Level] - [Message]
22LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
23# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
24DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
25 '|3[01])$')
Alex Miller7d68a2e2014-05-12 11:36:18 -070026SELECT_CMD_FORMAT = """
27SELECT %(table)s.%(primary_key)s FROM %(table)s
28WHERE %(table)s.%(time_column)s <= "%(date)s"
29"""
30SELECT_JOIN_CMD_FORMAT = """
31SELECT %(table)s.%(primary_key)s FROM %(table)s
32INNER JOIN %(related_table)s
33 ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s
34WHERE %(related_table)s.%(time_column)s <= "%(date)s"
35"""
36SELECT_WITH_INDIRECTION_FORMAT = """
37SELECT %(table)s.%(primary_key)s FROM %(table)s
38INNER JOIN %(indirection_table)s
39 ON %(table)s.%(foreign_key)s =
40 %(indirection_table)s.%(indirection_primary_key)s
41INNER JOIN %(related_table)s
42 ON %(indirection_table)s.%(indirection_foreign_key)s =
43 %(related_table)s.%(related_primary_key)s
44WHERE %(related_table)s.%(time_column)s <= "%(date)s"
45"""
46DELETE_ROWS_FORMAT = """
47DELETE FROM %(table)s
48WHERE %(table)s.%(primary_key)s IN (%(rows)s)
49"""
50
Simran Basie129a962012-08-31 13:03:53 -070051
52AFE_JOB_ID = 'afe_job_id'
53JOB_ID = 'job_id'
54JOB_IDX = 'job_idx'
55TEST_IDX = 'test_idx'
Jakob Juelich5050d952014-10-09 11:55:06 -070056
57# CAUTION: Make sure only the 'default' connection is used. Otherwise
58# db_cleanup may delete stuff from the global database, which is generally not
59# intended.
60cursor = connections['default'].cursor()
Alex Miller7d68a2e2014-05-12 11:36:18 -070061
Paul Hobbs4d11bc32017-09-08 01:02:25 -070062# Globals for command line flag constants, for convenience.
63DRY_RUN = False
64STEP_SIZE = None
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -070065LOAD_RATIO = 1.0
Alex Miller7d68a2e2014-05-12 11:36:18 -070066
67class ProgressBar(object):
68 TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})"
69
70 def __init__(self, name, amount):
71 self._name = name
72 self._amount = amount
73 self._cur = 0
74
75 def __enter__(self):
76 return self
77
78 def __exit__(self, a, b, c):
79 sys.stdout.write('\n')
80 sys.stdout.flush()
81
82 def update(self, x):
83 """
84 Advance the counter by `x`.
85
86 @param x: An integer of how many more elements were processed.
87 """
88 self._cur += x
89
90 def show(self):
91 """
92 Display the progress bar on the current line. Repeated invocations
93 "update" the display.
94 """
95 if self._amount == 0:
96 barlen = 20
97 else:
98 barlen = int(20 * self._cur / float(self._amount))
99 if barlen:
100 bartext = '=' * (barlen-1) + '>'
101 else:
102 bartext = ''
103 text = self.TEXT.format(self._name, bartext, self._cur, self._amount)
104 sys.stdout.write('\r')
105 sys.stdout.write(text)
106 sys.stdout.flush()
Simran Basie129a962012-08-31 13:03:53 -0700107
108
Alex Miller7d68a2e2014-05-12 11:36:18 -0700109def grouper(iterable, n):
110 """
111 Group the elements of `iterable` into groups of maximum size `n`.
112
113 @param iterable: An iterable.
114 @param n: Max size of returned groups.
115 @returns: Yields iterables of size <= n.
116
117 >>> grouper('ABCDEFG', 3)
118 [['A', 'B', C'], ['D', 'E', 'F'], ['G']]
119 """
120 args = [iter(iterable)] * n
121 while True:
122 lst = []
123 try:
124 for itr in args:
125 lst.append(next(itr))
126 yield lst
127 except StopIteration:
128 if lst:
129 yield lst
130 break
131
132
133def _delete_table_data_before_date(table_to_delete_from, primary_key,
134 related_table, related_primary_key,
135 date, foreign_key=None,
Simran Basie129a962012-08-31 13:03:53 -0700136 time_column="started_time",
137 indirection_table=None,
138 indirection_primary_key=None,
139 indirection_foreign_key=None):
140 """
141 We want a delete statement that will only delete from one table while
142 using a related table to find the rows to delete.
143
144 An example mysql command:
145 DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
146 tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
147 tko_tests.started_time <= '2012-02-01';
148
149 There are also tables that require 2 joins to determine which rows we want
150 to delete and we determine these rows by joining the table we want to
151 delete from with an indirection table to the actual jobs table.
152
153 @param table_to_delete_from: Table whose rows we want to delete.
154 @param related_table: Table with the date information we are selecting by.
155 @param foreign_key: Foreign key used in table_to_delete_from to reference
156 the related table. If None, the primary_key is used.
157 @param primary_key: Primary key in the related table.
158 @param date: End date of the information we are trying to delete.
159 @param time_column: Column that we want to use to compare the date to.
160 @param indirection_table: Table we use to link the data we are trying to
161 delete with the table with the date information.
162 @param indirection_primary_key: Key we use to connect the indirection table
163 to the table we are trying to delete rows
164 from.
165 @param indirection_foreign_key: Key we use to connect the indirection table
166 to the table with the date information.
167 """
168 if not foreign_key:
169 foreign_key = primary_key
Alex Miller7d68a2e2014-05-12 11:36:18 -0700170
Simran Basie129a962012-08-31 13:03:53 -0700171 if not related_table:
172 # Deleting from a table directly.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700173 variables = dict(table=table_to_delete_from, primary_key=primary_key,
174 time_column=time_column, date=date)
175 sql = SELECT_CMD_FORMAT % variables
176 elif not indirection_table:
Simran Basie129a962012-08-31 13:03:53 -0700177 # Deleting using a single JOIN to get the date information.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700178 variables = dict(primary_key=primary_key, table=table_to_delete_from,
179 foreign_key=foreign_key, related_table=related_table,
180 related_primary_key=related_primary_key,
181 time_column=time_column, date=date)
182 sql = SELECT_JOIN_CMD_FORMAT % variables
Simran Basie129a962012-08-31 13:03:53 -0700183 else:
184 # There are cases where we need to JOIN 3 TABLES to determine the rows
185 # we want to delete.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700186 variables = dict(primary_key=primary_key, table=table_to_delete_from,
187 indirection_table=indirection_table,
188 foreign_key=foreign_key,
189 indirection_primary_key=indirection_primary_key,
190 related_table=related_table,
191 related_primary_key=related_primary_key,
192 indirection_foreign_key=indirection_foreign_key,
193 time_column=time_column, date=date)
194 sql = SELECT_WITH_INDIRECTION_FORMAT % variables
195
Simran Basie129a962012-08-31 13:03:53 -0700196 logging.debug('SQL: %s', sql)
Jakob Juelich5050d952014-10-09 11:55:06 -0700197 cursor.execute(sql, [])
198 rows = [x[0] for x in cursor.fetchall()]
Alex Miller7d68a2e2014-05-12 11:36:18 -0700199 logging.debug(rows)
200
201 if not rows or rows == [None]:
202 with ProgressBar(table_to_delete_from, 0) as pb:
203 pb.show()
Paul Hobbs4d11bc32017-09-08 01:02:25 -0700204 logging.debug('Nothing to delete for %s', table_to_delete_from)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700205 return
206
207 with ProgressBar(table_to_delete_from, len(rows)) as pb:
208 for row_keys in grouper(rows, STEP_SIZE):
209 variables['rows'] = ','.join([str(x) for x in row_keys])
210 sql = DELETE_ROWS_FORMAT % variables
Paul Hobbs4d11bc32017-09-08 01:02:25 -0700211
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -0700212 start = time.time()
Alex Miller7d68a2e2014-05-12 11:36:18 -0700213 logging.debug('SQL: %s', sql)
Paul Hobbs4d11bc32017-09-08 01:02:25 -0700214 if not DRY_RUN:
215 cursor.execute(sql, [])
216 transaction.commit_unless_managed(using='default')
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -0700217 end = time.time()
218
Alex Miller7d68a2e2014-05-12 11:36:18 -0700219 pb.update(len(row_keys))
220 pb.show()
Simran Basie129a962012-08-31 13:03:53 -0700221
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -0700222 if LOAD_RATIO != 1.0:
223 assert 0 < LOAD_RATIO <= 1, (
224 'Load ratio must be a fraction between 0 and 1.')
225 time.sleep((end - start) / LOAD_RATIO)
226
Simran Basie129a962012-08-31 13:03:53 -0700227
228def _subtract_days(date, days_to_subtract):
229 """
230 Return a date (string) that is 'days' before 'date'
231
232 @param date: date (string) we are subtracting from.
233 @param days_to_subtract: days (int) we are subtracting.
234 """
235 date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
236 difference = date_obj - datetime.timedelta(days=days_to_subtract)
237 return difference.strftime('%Y-%m-%d')
238
239
240def _delete_all_data_before_date(date):
241 """
242 Delete all the database data before a given date.
243
244 This function focuses predominately on the data for jobs in tko_jobs.
245 However not all jobs in afe_jobs are also in tko_jobs.
246
247 Therefore we delete all the afe_job and foreign key relations prior to two
248 days before date. Then we do the queries using tko_jobs and these
249 tables to ensure all the related information is gone. Even though we are
250 repeating deletes on these tables, the second delete will be quick and
251 completely thorough in ensuring we clean up all the foreign key
252 dependencies correctly.
253
254 @param date: End date of the information we are trying to delete.
Alex Miller7d68a2e2014-05-12 11:36:18 -0700255 @param step: Rows to delete per SQL query.
Simran Basie129a962012-08-31 13:03:53 -0700256 """
257 # First cleanup all afe_job related data (prior to 2 days before date).
258 # The reason for this is not all afe_jobs may be in tko_jobs.
259 afe_date = _subtract_days(date, 2)
Fang Denga9815ac2015-03-11 14:26:10 -0700260 logging.info('Cleaning up all afe_job data prior to %s.', afe_date)
Simran Basie129a962012-08-31 13:03:53 -0700261 _delete_table_data_before_date('afe_aborted_host_queue_entries',
Alex Miller7d68a2e2014-05-12 11:36:18 -0700262 'queue_entry_id',
Simran Basie129a962012-08-31 13:03:53 -0700263 'afe_jobs', 'id', afe_date,
264 time_column= 'created_on',
265 foreign_key='queue_entry_id',
266 indirection_table='afe_host_queue_entries',
267 indirection_primary_key='id',
268 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700269 _delete_table_data_before_date('afe_special_tasks', 'id',
270 'afe_jobs', 'id',
Simran Basie129a962012-08-31 13:03:53 -0700271 afe_date, time_column='created_on',
272 foreign_key='queue_entry_id',
273 indirection_table='afe_host_queue_entries',
274 indirection_primary_key='id',
275 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700276 _delete_table_data_before_date('afe_host_queue_entries', 'id',
277 'afe_jobs', 'id',
Simran Basie129a962012-08-31 13:03:53 -0700278 afe_date, time_column='created_on',
279 foreign_key=JOB_ID)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700280 _delete_table_data_before_date('afe_job_keyvals', 'id',
281 'afe_jobs', 'id',
282 afe_date, time_column='created_on',
Simran Basie129a962012-08-31 13:03:53 -0700283 foreign_key=JOB_ID)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700284 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
285 'afe_jobs', 'id',
286 afe_date, time_column='created_on',
287 foreign_key=JOB_ID)
288 _delete_table_data_before_date('afe_jobs', 'id',
289 None, None,
290 afe_date, time_column='created_on')
291 # Special tasks that aren't associated with an HQE
292 # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit
293 # more than we should, but I doubt anyone will notice or care.
294 _delete_table_data_before_date('afe_special_tasks', 'id',
295 None, None,
296 afe_date, time_column='time_requested')
Simran Basie129a962012-08-31 13:03:53 -0700297
298 # Now go through and clean up all the rows related to tko_jobs prior to
299 # date.
Fang Denga9815ac2015-03-11 14:26:10 -0700300 logging.info('Cleaning up all data related to tko_jobs prior to %s.',
Simran Basie129a962012-08-31 13:03:53 -0700301 date)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700302 _delete_table_data_before_date('tko_test_attributes', 'id',
303 'tko_tests', TEST_IDX,
304 date, foreign_key=TEST_IDX)
305 _delete_table_data_before_date('tko_test_labels_tests', 'id',
306 'tko_tests', TEST_IDX,
307 date, foreign_key= 'test_id')
308 _delete_table_data_before_date('tko_iteration_result', TEST_IDX,
309 'tko_tests', TEST_IDX,
310 date)
311 _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX,
312 'tko_tests', TEST_IDX,
313 date)
314 _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX,
315 'tko_tests', TEST_IDX,
316 date)
317 _delete_table_data_before_date('tko_job_keyvals', 'id',
318 'tko_jobs', JOB_IDX,
Simran Basie129a962012-08-31 13:03:53 -0700319 date, foreign_key='job_id')
320 _delete_table_data_before_date('afe_aborted_host_queue_entries',
Alex Miller7d68a2e2014-05-12 11:36:18 -0700321 'queue_entry_id',
Simran Basie129a962012-08-31 13:03:53 -0700322 'tko_jobs', AFE_JOB_ID, date,
323 foreign_key='queue_entry_id',
324 indirection_table='afe_host_queue_entries',
325 indirection_primary_key='id',
326 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700327 _delete_table_data_before_date('afe_special_tasks', 'id',
328 'tko_jobs', AFE_JOB_ID,
Simran Basie129a962012-08-31 13:03:53 -0700329 date, foreign_key='queue_entry_id',
330 indirection_table='afe_host_queue_entries',
331 indirection_primary_key='id',
332 indirection_foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700333 _delete_table_data_before_date('afe_host_queue_entries', 'id',
334 'tko_jobs', AFE_JOB_ID,
Simran Basie129a962012-08-31 13:03:53 -0700335 date, foreign_key='job_id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700336 _delete_table_data_before_date('afe_job_keyvals', 'id',
337 'tko_jobs', AFE_JOB_ID,
338 date, foreign_key='job_id')
339 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
340 'tko_jobs', AFE_JOB_ID,
341 date, foreign_key='job_id')
342 _delete_table_data_before_date('afe_jobs', 'id',
343 'tko_jobs', AFE_JOB_ID,
Simran Basie129a962012-08-31 13:03:53 -0700344 date, foreign_key='id')
Alex Miller7d68a2e2014-05-12 11:36:18 -0700345 _delete_table_data_before_date('tko_tests', TEST_IDX,
346 'tko_jobs', JOB_IDX,
347 date, foreign_key=JOB_IDX)
348 _delete_table_data_before_date('tko_jobs', JOB_IDX,
349 None, None, date)
350
351
352def parse_args():
353 """Parse command line arguments"""
354 parser = argparse.ArgumentParser()
355 parser.add_argument('-v', '--verbose', action='store_true',
356 help='Print SQL commands and results')
357 parser.add_argument('--step', type=int, action='store',
358 default=1000,
359 help='Number of rows to delete at once')
Paul Hobbs4d11bc32017-09-08 01:02:25 -0700360 parser.add_argument('--dry_run', action='store_true',
361 help='Print SQL queries instead of executing them.')
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -0700362 parser.add_argument('--load_ratio', type=float, action='store', default=0.2,
363 help=('The fraction of time the script should be '
364 'performing deletes. For example --load_ratio=.2 '
365 'will cause the script to sleep 80% of the time, '
366 'and perform work for the other 20%.'))
Alex Miller7d68a2e2014-05-12 11:36:18 -0700367 parser.add_argument('date', help='Keep results newer than')
368 return parser.parse_args()
Simran Basie129a962012-08-31 13:03:53 -0700369
370
371def main():
Alex Miller7d68a2e2014-05-12 11:36:18 -0700372 args = parse_args()
373
Paul Hobbs4d11bc32017-09-08 01:02:25 -0700374 verbose = args.verbose or args.dry_run
375 level = logging.DEBUG if verbose else logging.INFO
Alex Miller7d68a2e2014-05-12 11:36:18 -0700376 logging.basicConfig(level=level, format=LOGGING_FORMAT)
Fang Denga9815ac2015-03-11 14:26:10 -0700377 logging.info('Calling: %s', sys.argv)
Alex Miller7d68a2e2014-05-12 11:36:18 -0700378
379 if not re.match(DATE_FORMAT_REGEX, args.date):
Simran Basie129a962012-08-31 13:03:53 -0700380 print 'DATE must be in yyyy-mm-dd format!'
381 return
Alex Miller7d68a2e2014-05-12 11:36:18 -0700382
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -0700383 global STEP_SIZE, DRY_RUN, LOAD_RATIO
Alex Miller7d68a2e2014-05-12 11:36:18 -0700384 STEP_SIZE = args.step
Paul Hobbs4d11bc32017-09-08 01:02:25 -0700385 DRY_RUN = args.dry_run
Paul Hobbs9d6c6dc2017-09-08 01:23:28 -0700386 LOAD_RATIO = args.load_ratio
387
Alex Miller7d68a2e2014-05-12 11:36:18 -0700388 _delete_all_data_before_date(args.date)
Simran Basie129a962012-08-31 13:03:53 -0700389
390
391if __name__ == '__main__':
Dennis Jeffrey368c54b2013-07-24 11:19:03 -0700392 main()