Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 2 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 6 | import argparse |
| 7 | import datetime |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 8 | import logging |
Jakob Juelich | 5050d95 | 2014-10-09 11:55:06 -0700 | [diff] [blame] | 9 | import os |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 10 | import re |
| 11 | import sys |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 12 | import time |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 13 | |
Jakob Juelich | 5050d95 | 2014-10-09 11:55:06 -0700 | [diff] [blame] | 14 | os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings' |
| 15 | |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 16 | import common |
Fang Deng | 001c4dd | 2016-02-24 13:55:44 -0800 | [diff] [blame] | 17 | from autotest_lib.server import utils |
Jakob Juelich | 5050d95 | 2014-10-09 11:55:06 -0700 | [diff] [blame] | 18 | from django.db import connections, transaction |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 19 | |
| 20 | |
| 21 | # Format Appears as: [Date] [Time] - [Msg Level] - [Message] |
| 22 | LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s' |
| 23 | # This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01) |
| 24 | DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]' |
| 25 | '|3[01])$') |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 26 | SELECT_CMD_FORMAT = """ |
| 27 | SELECT %(table)s.%(primary_key)s FROM %(table)s |
| 28 | WHERE %(table)s.%(time_column)s <= "%(date)s" |
| 29 | """ |
| 30 | SELECT_JOIN_CMD_FORMAT = """ |
| 31 | SELECT %(table)s.%(primary_key)s FROM %(table)s |
| 32 | INNER JOIN %(related_table)s |
| 33 | ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s |
| 34 | WHERE %(related_table)s.%(time_column)s <= "%(date)s" |
| 35 | """ |
| 36 | SELECT_WITH_INDIRECTION_FORMAT = """ |
| 37 | SELECT %(table)s.%(primary_key)s FROM %(table)s |
| 38 | INNER JOIN %(indirection_table)s |
| 39 | ON %(table)s.%(foreign_key)s = |
| 40 | %(indirection_table)s.%(indirection_primary_key)s |
| 41 | INNER JOIN %(related_table)s |
| 42 | ON %(indirection_table)s.%(indirection_foreign_key)s = |
| 43 | %(related_table)s.%(related_primary_key)s |
| 44 | WHERE %(related_table)s.%(time_column)s <= "%(date)s" |
| 45 | """ |
| 46 | DELETE_ROWS_FORMAT = """ |
| 47 | DELETE FROM %(table)s |
| 48 | WHERE %(table)s.%(primary_key)s IN (%(rows)s) |
| 49 | """ |
| 50 | |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 51 | |
| 52 | AFE_JOB_ID = 'afe_job_id' |
| 53 | JOB_ID = 'job_id' |
| 54 | JOB_IDX = 'job_idx' |
| 55 | TEST_IDX = 'test_idx' |
Jakob Juelich | 5050d95 | 2014-10-09 11:55:06 -0700 | [diff] [blame] | 56 | |
| 57 | # CAUTION: Make sure only the 'default' connection is used. Otherwise |
| 58 | # db_cleanup may delete stuff from the global database, which is generally not |
| 59 | # intended. |
| 60 | cursor = connections['default'].cursor() |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 61 | |
Paul Hobbs | 4d11bc3 | 2017-09-08 01:02:25 -0700 | [diff] [blame] | 62 | # Globals for command line flag constants, for convenience. |
| 63 | DRY_RUN = False |
| 64 | STEP_SIZE = None |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 65 | LOAD_RATIO = 1.0 |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 66 | |
| 67 | class ProgressBar(object): |
| 68 | TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})" |
| 69 | |
| 70 | def __init__(self, name, amount): |
| 71 | self._name = name |
| 72 | self._amount = amount |
| 73 | self._cur = 0 |
| 74 | |
| 75 | def __enter__(self): |
| 76 | return self |
| 77 | |
| 78 | def __exit__(self, a, b, c): |
| 79 | sys.stdout.write('\n') |
| 80 | sys.stdout.flush() |
| 81 | |
| 82 | def update(self, x): |
| 83 | """ |
| 84 | Advance the counter by `x`. |
| 85 | |
| 86 | @param x: An integer of how many more elements were processed. |
| 87 | """ |
| 88 | self._cur += x |
| 89 | |
| 90 | def show(self): |
| 91 | """ |
| 92 | Display the progress bar on the current line. Repeated invocations |
| 93 | "update" the display. |
| 94 | """ |
| 95 | if self._amount == 0: |
| 96 | barlen = 20 |
| 97 | else: |
| 98 | barlen = int(20 * self._cur / float(self._amount)) |
| 99 | if barlen: |
| 100 | bartext = '=' * (barlen-1) + '>' |
| 101 | else: |
| 102 | bartext = '' |
| 103 | text = self.TEXT.format(self._name, bartext, self._cur, self._amount) |
| 104 | sys.stdout.write('\r') |
| 105 | sys.stdout.write(text) |
| 106 | sys.stdout.flush() |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 107 | |
| 108 | |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 109 | def grouper(iterable, n): |
| 110 | """ |
| 111 | Group the elements of `iterable` into groups of maximum size `n`. |
| 112 | |
| 113 | @param iterable: An iterable. |
| 114 | @param n: Max size of returned groups. |
| 115 | @returns: Yields iterables of size <= n. |
| 116 | |
| 117 | >>> grouper('ABCDEFG', 3) |
| 118 | [['A', 'B', C'], ['D', 'E', 'F'], ['G']] |
| 119 | """ |
| 120 | args = [iter(iterable)] * n |
| 121 | while True: |
| 122 | lst = [] |
| 123 | try: |
| 124 | for itr in args: |
| 125 | lst.append(next(itr)) |
| 126 | yield lst |
| 127 | except StopIteration: |
| 128 | if lst: |
| 129 | yield lst |
| 130 | break |
| 131 | |
| 132 | |
| 133 | def _delete_table_data_before_date(table_to_delete_from, primary_key, |
| 134 | related_table, related_primary_key, |
| 135 | date, foreign_key=None, |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 136 | time_column="started_time", |
| 137 | indirection_table=None, |
| 138 | indirection_primary_key=None, |
| 139 | indirection_foreign_key=None): |
| 140 | """ |
| 141 | We want a delete statement that will only delete from one table while |
| 142 | using a related table to find the rows to delete. |
| 143 | |
| 144 | An example mysql command: |
| 145 | DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN |
| 146 | tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND |
| 147 | tko_tests.started_time <= '2012-02-01'; |
| 148 | |
| 149 | There are also tables that require 2 joins to determine which rows we want |
| 150 | to delete and we determine these rows by joining the table we want to |
| 151 | delete from with an indirection table to the actual jobs table. |
| 152 | |
| 153 | @param table_to_delete_from: Table whose rows we want to delete. |
| 154 | @param related_table: Table with the date information we are selecting by. |
| 155 | @param foreign_key: Foreign key used in table_to_delete_from to reference |
| 156 | the related table. If None, the primary_key is used. |
| 157 | @param primary_key: Primary key in the related table. |
| 158 | @param date: End date of the information we are trying to delete. |
| 159 | @param time_column: Column that we want to use to compare the date to. |
| 160 | @param indirection_table: Table we use to link the data we are trying to |
| 161 | delete with the table with the date information. |
| 162 | @param indirection_primary_key: Key we use to connect the indirection table |
| 163 | to the table we are trying to delete rows |
| 164 | from. |
| 165 | @param indirection_foreign_key: Key we use to connect the indirection table |
| 166 | to the table with the date information. |
| 167 | """ |
| 168 | if not foreign_key: |
| 169 | foreign_key = primary_key |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 170 | |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 171 | if not related_table: |
| 172 | # Deleting from a table directly. |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 173 | variables = dict(table=table_to_delete_from, primary_key=primary_key, |
| 174 | time_column=time_column, date=date) |
| 175 | sql = SELECT_CMD_FORMAT % variables |
| 176 | elif not indirection_table: |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 177 | # Deleting using a single JOIN to get the date information. |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 178 | variables = dict(primary_key=primary_key, table=table_to_delete_from, |
| 179 | foreign_key=foreign_key, related_table=related_table, |
| 180 | related_primary_key=related_primary_key, |
| 181 | time_column=time_column, date=date) |
| 182 | sql = SELECT_JOIN_CMD_FORMAT % variables |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 183 | else: |
| 184 | # There are cases where we need to JOIN 3 TABLES to determine the rows |
| 185 | # we want to delete. |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 186 | variables = dict(primary_key=primary_key, table=table_to_delete_from, |
| 187 | indirection_table=indirection_table, |
| 188 | foreign_key=foreign_key, |
| 189 | indirection_primary_key=indirection_primary_key, |
| 190 | related_table=related_table, |
| 191 | related_primary_key=related_primary_key, |
| 192 | indirection_foreign_key=indirection_foreign_key, |
| 193 | time_column=time_column, date=date) |
| 194 | sql = SELECT_WITH_INDIRECTION_FORMAT % variables |
| 195 | |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 196 | logging.debug('SQL: %s', sql) |
Jakob Juelich | 5050d95 | 2014-10-09 11:55:06 -0700 | [diff] [blame] | 197 | cursor.execute(sql, []) |
| 198 | rows = [x[0] for x in cursor.fetchall()] |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 199 | logging.debug(rows) |
| 200 | |
| 201 | if not rows or rows == [None]: |
| 202 | with ProgressBar(table_to_delete_from, 0) as pb: |
| 203 | pb.show() |
Paul Hobbs | 4d11bc3 | 2017-09-08 01:02:25 -0700 | [diff] [blame] | 204 | logging.debug('Nothing to delete for %s', table_to_delete_from) |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 205 | return |
| 206 | |
| 207 | with ProgressBar(table_to_delete_from, len(rows)) as pb: |
| 208 | for row_keys in grouper(rows, STEP_SIZE): |
| 209 | variables['rows'] = ','.join([str(x) for x in row_keys]) |
| 210 | sql = DELETE_ROWS_FORMAT % variables |
Paul Hobbs | 4d11bc3 | 2017-09-08 01:02:25 -0700 | [diff] [blame] | 211 | |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 212 | start = time.time() |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 213 | logging.debug('SQL: %s', sql) |
Paul Hobbs | 4d11bc3 | 2017-09-08 01:02:25 -0700 | [diff] [blame] | 214 | if not DRY_RUN: |
| 215 | cursor.execute(sql, []) |
| 216 | transaction.commit_unless_managed(using='default') |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 217 | end = time.time() |
| 218 | |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 219 | pb.update(len(row_keys)) |
| 220 | pb.show() |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 221 | |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 222 | if LOAD_RATIO != 1.0: |
| 223 | assert 0 < LOAD_RATIO <= 1, ( |
| 224 | 'Load ratio must be a fraction between 0 and 1.') |
| 225 | time.sleep((end - start) / LOAD_RATIO) |
| 226 | |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 227 | |
| 228 | def _subtract_days(date, days_to_subtract): |
| 229 | """ |
| 230 | Return a date (string) that is 'days' before 'date' |
| 231 | |
| 232 | @param date: date (string) we are subtracting from. |
| 233 | @param days_to_subtract: days (int) we are subtracting. |
| 234 | """ |
| 235 | date_obj = datetime.datetime.strptime(date, '%Y-%m-%d') |
| 236 | difference = date_obj - datetime.timedelta(days=days_to_subtract) |
| 237 | return difference.strftime('%Y-%m-%d') |
| 238 | |
| 239 | |
| 240 | def _delete_all_data_before_date(date): |
| 241 | """ |
| 242 | Delete all the database data before a given date. |
| 243 | |
| 244 | This function focuses predominately on the data for jobs in tko_jobs. |
| 245 | However not all jobs in afe_jobs are also in tko_jobs. |
| 246 | |
| 247 | Therefore we delete all the afe_job and foreign key relations prior to two |
| 248 | days before date. Then we do the queries using tko_jobs and these |
| 249 | tables to ensure all the related information is gone. Even though we are |
| 250 | repeating deletes on these tables, the second delete will be quick and |
| 251 | completely thorough in ensuring we clean up all the foreign key |
| 252 | dependencies correctly. |
| 253 | |
| 254 | @param date: End date of the information we are trying to delete. |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 255 | @param step: Rows to delete per SQL query. |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 256 | """ |
| 257 | # First cleanup all afe_job related data (prior to 2 days before date). |
| 258 | # The reason for this is not all afe_jobs may be in tko_jobs. |
| 259 | afe_date = _subtract_days(date, 2) |
Fang Deng | a9815ac | 2015-03-11 14:26:10 -0700 | [diff] [blame] | 260 | logging.info('Cleaning up all afe_job data prior to %s.', afe_date) |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 261 | _delete_table_data_before_date('afe_aborted_host_queue_entries', |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 262 | 'queue_entry_id', |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 263 | 'afe_jobs', 'id', afe_date, |
| 264 | time_column= 'created_on', |
| 265 | foreign_key='queue_entry_id', |
| 266 | indirection_table='afe_host_queue_entries', |
| 267 | indirection_primary_key='id', |
| 268 | indirection_foreign_key='job_id') |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 269 | _delete_table_data_before_date('afe_special_tasks', 'id', |
| 270 | 'afe_jobs', 'id', |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 271 | afe_date, time_column='created_on', |
| 272 | foreign_key='queue_entry_id', |
| 273 | indirection_table='afe_host_queue_entries', |
| 274 | indirection_primary_key='id', |
| 275 | indirection_foreign_key='job_id') |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 276 | _delete_table_data_before_date('afe_host_queue_entries', 'id', |
| 277 | 'afe_jobs', 'id', |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 278 | afe_date, time_column='created_on', |
| 279 | foreign_key=JOB_ID) |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 280 | _delete_table_data_before_date('afe_job_keyvals', 'id', |
| 281 | 'afe_jobs', 'id', |
| 282 | afe_date, time_column='created_on', |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 283 | foreign_key=JOB_ID) |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 284 | _delete_table_data_before_date('afe_jobs_dependency_labels', 'id', |
| 285 | 'afe_jobs', 'id', |
| 286 | afe_date, time_column='created_on', |
| 287 | foreign_key=JOB_ID) |
| 288 | _delete_table_data_before_date('afe_jobs', 'id', |
| 289 | None, None, |
| 290 | afe_date, time_column='created_on') |
| 291 | # Special tasks that aren't associated with an HQE |
| 292 | # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit |
| 293 | # more than we should, but I doubt anyone will notice or care. |
| 294 | _delete_table_data_before_date('afe_special_tasks', 'id', |
| 295 | None, None, |
| 296 | afe_date, time_column='time_requested') |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 297 | |
| 298 | # Now go through and clean up all the rows related to tko_jobs prior to |
| 299 | # date. |
Fang Deng | a9815ac | 2015-03-11 14:26:10 -0700 | [diff] [blame] | 300 | logging.info('Cleaning up all data related to tko_jobs prior to %s.', |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 301 | date) |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 302 | _delete_table_data_before_date('tko_test_attributes', 'id', |
| 303 | 'tko_tests', TEST_IDX, |
| 304 | date, foreign_key=TEST_IDX) |
| 305 | _delete_table_data_before_date('tko_test_labels_tests', 'id', |
| 306 | 'tko_tests', TEST_IDX, |
| 307 | date, foreign_key= 'test_id') |
| 308 | _delete_table_data_before_date('tko_iteration_result', TEST_IDX, |
| 309 | 'tko_tests', TEST_IDX, |
| 310 | date) |
| 311 | _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX, |
| 312 | 'tko_tests', TEST_IDX, |
| 313 | date) |
| 314 | _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX, |
| 315 | 'tko_tests', TEST_IDX, |
| 316 | date) |
| 317 | _delete_table_data_before_date('tko_job_keyvals', 'id', |
| 318 | 'tko_jobs', JOB_IDX, |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 319 | date, foreign_key='job_id') |
| 320 | _delete_table_data_before_date('afe_aborted_host_queue_entries', |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 321 | 'queue_entry_id', |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 322 | 'tko_jobs', AFE_JOB_ID, date, |
| 323 | foreign_key='queue_entry_id', |
| 324 | indirection_table='afe_host_queue_entries', |
| 325 | indirection_primary_key='id', |
| 326 | indirection_foreign_key='job_id') |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 327 | _delete_table_data_before_date('afe_special_tasks', 'id', |
| 328 | 'tko_jobs', AFE_JOB_ID, |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 329 | date, foreign_key='queue_entry_id', |
| 330 | indirection_table='afe_host_queue_entries', |
| 331 | indirection_primary_key='id', |
| 332 | indirection_foreign_key='job_id') |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 333 | _delete_table_data_before_date('afe_host_queue_entries', 'id', |
| 334 | 'tko_jobs', AFE_JOB_ID, |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 335 | date, foreign_key='job_id') |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 336 | _delete_table_data_before_date('afe_job_keyvals', 'id', |
| 337 | 'tko_jobs', AFE_JOB_ID, |
| 338 | date, foreign_key='job_id') |
| 339 | _delete_table_data_before_date('afe_jobs_dependency_labels', 'id', |
| 340 | 'tko_jobs', AFE_JOB_ID, |
| 341 | date, foreign_key='job_id') |
| 342 | _delete_table_data_before_date('afe_jobs', 'id', |
| 343 | 'tko_jobs', AFE_JOB_ID, |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 344 | date, foreign_key='id') |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 345 | _delete_table_data_before_date('tko_tests', TEST_IDX, |
| 346 | 'tko_jobs', JOB_IDX, |
| 347 | date, foreign_key=JOB_IDX) |
| 348 | _delete_table_data_before_date('tko_jobs', JOB_IDX, |
| 349 | None, None, date) |
| 350 | |
| 351 | |
| 352 | def parse_args(): |
| 353 | """Parse command line arguments""" |
| 354 | parser = argparse.ArgumentParser() |
| 355 | parser.add_argument('-v', '--verbose', action='store_true', |
| 356 | help='Print SQL commands and results') |
| 357 | parser.add_argument('--step', type=int, action='store', |
| 358 | default=1000, |
| 359 | help='Number of rows to delete at once') |
Paul Hobbs | 4d11bc3 | 2017-09-08 01:02:25 -0700 | [diff] [blame] | 360 | parser.add_argument('--dry_run', action='store_true', |
| 361 | help='Print SQL queries instead of executing them.') |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 362 | parser.add_argument('--load_ratio', type=float, action='store', default=0.2, |
| 363 | help=('The fraction of time the script should be ' |
| 364 | 'performing deletes. For example --load_ratio=.2 ' |
| 365 | 'will cause the script to sleep 80% of the time, ' |
| 366 | 'and perform work for the other 20%.')) |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 367 | parser.add_argument('date', help='Keep results newer than') |
| 368 | return parser.parse_args() |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 369 | |
| 370 | |
| 371 | def main(): |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 372 | args = parse_args() |
| 373 | |
Paul Hobbs | 4d11bc3 | 2017-09-08 01:02:25 -0700 | [diff] [blame] | 374 | verbose = args.verbose or args.dry_run |
| 375 | level = logging.DEBUG if verbose else logging.INFO |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 376 | logging.basicConfig(level=level, format=LOGGING_FORMAT) |
Fang Deng | a9815ac | 2015-03-11 14:26:10 -0700 | [diff] [blame] | 377 | logging.info('Calling: %s', sys.argv) |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 378 | |
| 379 | if not re.match(DATE_FORMAT_REGEX, args.date): |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 380 | print 'DATE must be in yyyy-mm-dd format!' |
| 381 | return |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 382 | |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 383 | global STEP_SIZE, DRY_RUN, LOAD_RATIO |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 384 | STEP_SIZE = args.step |
Paul Hobbs | 4d11bc3 | 2017-09-08 01:02:25 -0700 | [diff] [blame] | 385 | DRY_RUN = args.dry_run |
Paul Hobbs | 9d6c6dc | 2017-09-08 01:23:28 -0700 | [diff] [blame] | 386 | LOAD_RATIO = args.load_ratio |
| 387 | |
Alex Miller | 7d68a2e | 2014-05-12 11:36:18 -0700 | [diff] [blame] | 388 | _delete_all_data_before_date(args.date) |
Simran Basi | e129a96 | 2012-08-31 13:03:53 -0700 | [diff] [blame] | 389 | |
| 390 | |
| 391 | if __name__ == '__main__': |
Dennis Jeffrey | 368c54b | 2013-07-24 11:19:03 -0700 | [diff] [blame] | 392 | main() |