blob: 8d157ed5cecc67fe69af173faa885ad24b2f5270 [file] [log] [blame]
J. Richard Barnetteea785362014-03-17 16:00:53 -07001import abc
2import datetime
3import glob
Simran Basi1e10e922015-04-16 15:09:56 -07004import json
J. Richard Barnetteea785362014-03-17 16:00:53 -07005import os
Dan Shicf4d2032015-03-12 15:04:21 -07006import re
Simran Basi1e10e922015-04-16 15:09:56 -07007import shutil
J. Richard Barnetteea785362014-03-17 16:00:53 -07008
9import common
Dan Shidfea3682014-08-10 23:38:40 -070010from autotest_lib.client.common_lib import time_utils
Dan Shi81800632015-09-29 12:16:48 -070011from autotest_lib.client.common_lib import utils
Simran Basi1e10e922015-04-16 15:09:56 -070012from autotest_lib.server.cros.dynamic_suite import constants
J. Richard Barnetteacdb0132014-09-03 16:44:12 -070013from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
J. Richard Barnetteea785362014-03-17 16:00:53 -070014
J. Richard Barnetteea785362014-03-17 16:00:53 -070015
Dan Shidfea3682014-08-10 23:38:40 -070016_AFE = frontend_wrappers.RetryingAFE()
J. Richard Barnetteea785362014-03-17 16:00:53 -070017
Dan Shi1b4c7c32015-10-05 10:38:57 -070018SPECIAL_TASK_PATTERN = '.*/hosts/[^/]+/(\d+)-[^/]+'
19JOB_PATTERN = '.*/(\d+)-[^/]+'
Dan Shiafa63872016-02-23 15:32:31 -080020# Pattern of a job folder, e.g., 123-debug_user, where 123 is job id and
21# debug_user is the name of user starts the job.
22JOB_FOLDER_PATTERN = '.*/(\d+-[^/]+)'
Dan Shi1b4c7c32015-10-05 10:38:57 -070023
Keith Haddow5ba5fb82016-11-09 11:39:36 -080024def is_job_expired(age_limit, timestamp):
J. Richard Barnetteea785362014-03-17 16:00:53 -070025 """Check whether a job timestamp is older than an age limit.
26
27 @param age_limit: Minimum age, measured in days. If the value is
28 not positive, the job is always expired.
29 @param timestamp: Timestamp of the job whose age we are checking.
Dan Shidfea3682014-08-10 23:38:40 -070030 The format must match time_utils.TIME_FMT.
J. Richard Barnetteea785362014-03-17 16:00:53 -070031
32 @returns True iff the job is old enough to be expired.
33 """
34 if age_limit <= 0:
35 return True
Dan Shidfea3682014-08-10 23:38:40 -070036 job_time = time_utils.time_string_to_datetime(timestamp)
J. Richard Barnetteea785362014-03-17 16:00:53 -070037 expiration = job_time + datetime.timedelta(days=age_limit)
38 return datetime.datetime.now() >= expiration
39
40
Dan Shicf4d2032015-03-12 15:04:21 -070041def get_job_id_or_task_id(result_dir):
42 """Extract job id or special task id from result_dir
43
44 @param result_dir: path to the result dir.
45 For test job:
46 /usr/local/autotest/results/2032-chromeos-test/chromeos1-rack5-host6
47 The hostname at the end is optional.
48 For special task:
49 /usr/local/autotest/results/hosts/chromeos1-rack5-host6/1343-cleanup
50
Dan Shi81800632015-09-29 12:16:48 -070051 @returns: integer representing the job id or task id. Returns None if fail
52 to parse job or task id from the result_dir.
Dan Shicf4d2032015-03-12 15:04:21 -070053 """
54 if not result_dir:
55 return
56 result_dir = os.path.abspath(result_dir)
Dan Shi81800632015-09-29 12:16:48 -070057 # Result folder for job running inside container has only job id.
58 ssp_job_pattern = '.*/(\d+)$'
Dan Shicf4d2032015-03-12 15:04:21 -070059 # Try to get the job ID from the last pattern of number-text. This avoids
60 # issue with path like 123-results/456-debug_user, in which 456 is the real
61 # job ID.
Dan Shi1b4c7c32015-10-05 10:38:57 -070062 m_job = re.findall(JOB_PATTERN, result_dir)
Dan Shi81800632015-09-29 12:16:48 -070063 if m_job:
64 return int(m_job[-1])
Dan Shi1b4c7c32015-10-05 10:38:57 -070065 m_special_task = re.match(SPECIAL_TASK_PATTERN, result_dir)
Dan Shi81800632015-09-29 12:16:48 -070066 if m_special_task:
67 return int(m_special_task.group(1))
68 m_ssp_job_pattern = re.match(ssp_job_pattern, result_dir)
69 if m_ssp_job_pattern and utils.is_in_container():
70 return int(m_ssp_job_pattern.group(1))
Dan Shicf4d2032015-03-12 15:04:21 -070071
72
Dan Shiafa63872016-02-23 15:32:31 -080073def get_job_folder_name(result_dir):
74 """Extract folder name of a job from result_dir.
75
76 @param result_dir: path to the result dir.
77 For test job:
78 /usr/local/autotest/results/2032-chromeos-test/chromeos1-rack5-host6
79 The hostname at the end is optional.
80 For special task:
81 /usr/local/autotest/results/hosts/chromeos1-rack5-host6/1343-cleanup
82
83 @returns: The name of the folder of a job. Returns None if fail to parse
84 the name matching pattern JOB_FOLDER_PATTERN from the result_dir.
85 """
86 if not result_dir:
87 return
88 m_job = re.findall(JOB_FOLDER_PATTERN, result_dir)
89 if m_job:
90 return m_job[-1]
91
92
J. Richard Barnetteea785362014-03-17 16:00:53 -070093class _JobDirectory(object):
94 """State associated with a job to be offloaded.
95
96 The full life-cycle of a job (including failure events that
97 normally don't occur) looks like this:
98 1. The job's results directory is discovered by
99 `get_job_directories()`, and a job instance is created for it.
100 2. Calls to `offload()` have no effect so long as the job
101 isn't complete in the database and the job isn't expired
102 according to the `age_limit` parameter.
103 3. Eventually, the job is both finished and expired. The next
104 call to `offload()` makes the first attempt to offload the
105 directory to GS. Offload is attempted, but fails to complete
106 (e.g. because of a GS problem).
Prathmesh Prabhuca481592017-01-30 18:05:49 -0800107 4. Finally, a call to `offload()` succeeds, and the directory no
J. Richard Barnetteea785362014-03-17 16:00:53 -0700108 longer exists. Now `is_offloaded()` is true, so the job
109 instance is deleted, and future failures will not mention this
110 directory any more.
111
Prathmesh Prabhuca481592017-01-30 18:05:49 -0800112 Only steps 1. and 4. are guaranteed to occur. The others depend
J. Richard Barnetteea785362014-03-17 16:00:53 -0700113 on the timing of calls to `offload()`, and on the reliability of
114 the actual offload process.
115
116 """
117
118 __metaclass__ = abc.ABCMeta
119
120 GLOB_PATTERN = None # must be redefined in subclass
121
122 def __init__(self, resultsdir):
Allen Lib41527d2017-06-22 17:28:00 -0700123 self.dirname = resultsdir
Dan Shicf4d2032015-03-12 15:04:21 -0700124 self._id = get_job_id_or_task_id(resultsdir)
Allen Lib41527d2017-06-22 17:28:00 -0700125 self.offload_count = 0
126 self.first_offload_start = 0
J. Richard Barnetteea785362014-03-17 16:00:53 -0700127
128 @classmethod
129 def get_job_directories(cls):
130 """Return a list of directories of jobs that need offloading."""
131 return [d for d in glob.glob(cls.GLOB_PATTERN) if os.path.isdir(d)]
132
133 @abc.abstractmethod
134 def get_timestamp_if_finished(self):
135 """Return this job's timestamp from the database.
136
137 If the database has not marked the job as finished, return
138 `None`. Otherwise, return a timestamp for the job. The
139 timestamp is to be used to determine expiration in
Keith Haddow5ba5fb82016-11-09 11:39:36 -0800140 `is_job_expired()`.
J. Richard Barnetteea785362014-03-17 16:00:53 -0700141
142 @return Return `None` if the job is still running; otherwise
143 return a string with a timestamp in the appropriate
144 format.
145 """
146 raise NotImplementedError("_JobDirectory.get_timestamp_if_finished")
147
Simran Basi1e10e922015-04-16 15:09:56 -0700148 def process_gs_instructions(self):
149 """Process any gs_offloader instructions for this special task.
150
151 @returns True/False if there is anything left to offload.
152 """
153 # Default support is to still offload the directory.
154 return True
155
J. Richard Barnetteea785362014-03-17 16:00:53 -0700156
Aviv Keshet114f2fc2017-02-02 16:07:04 -0800157NO_OFFLOAD_README = """These results have been deleted rather than offloaded.
158This is the expected behavior for passing jobs from the Commit Queue."""
159
160
J. Richard Barnetteea785362014-03-17 16:00:53 -0700161class RegularJobDirectory(_JobDirectory):
162 """Subclass of _JobDirectory for regular test jobs."""
163
164 GLOB_PATTERN = '[0-9]*-*'
165
Simran Basi1e10e922015-04-16 15:09:56 -0700166 def process_gs_instructions(self):
167 """Process any gs_offloader instructions for this job.
168
169 @returns True/False if there is anything left to offload.
170 """
171 # Go through the gs_offloader instructions file for each test in this job.
Allen Lib41527d2017-06-22 17:28:00 -0700172 for path in glob.glob(os.path.join(self.dirname, '*',
Simran Basi1e10e922015-04-16 15:09:56 -0700173 constants.GS_OFFLOADER_INSTRUCTIONS)):
174 with open(path, 'r') as f:
175 gs_off_instructions = json.load(f)
176 if gs_off_instructions.get(constants.GS_OFFLOADER_NO_OFFLOAD):
Aviv Keshet114f2fc2017-02-02 16:07:04 -0800177 dirname = os.path.dirname(path)
Allen Lib41527d2017-06-22 17:28:00 -0700178 _remove_log_directory_contents(dirname)
Simran Basi1e10e922015-04-16 15:09:56 -0700179
180 # Finally check if there's anything left to offload.
Allen Lib41527d2017-06-22 17:28:00 -0700181 if not os.listdir(self.dirname):
182 shutil.rmtree(self.dirname)
Simran Basi1e10e922015-04-16 15:09:56 -0700183 return False
184 return True
185
186
J. Richard Barnetteea785362014-03-17 16:00:53 -0700187 def get_timestamp_if_finished(self):
Simran Basifb98e462014-08-18 12:35:44 -0700188 """Get the timestamp to use for finished jobs.
189
190 @returns the latest hqe finished_on time. If the finished_on times are null
191 returns the job's created_on time.
192 """
J. Richard Barnettedd0227d2015-04-10 15:18:48 -0700193 entry = _AFE.get_jobs(id=self._id, finished=True)
Simran Basifb98e462014-08-18 12:35:44 -0700194 if not entry:
195 return None
J. Richard Barnettedd0227d2015-04-10 15:18:48 -0700196 hqes = _AFE.get_host_queue_entries(finished_on__isnull=False,
197 job_id=self._id)
Simran Basifb98e462014-08-18 12:35:44 -0700198 if not hqes:
J. Richard Barnettedd0227d2015-04-10 15:18:48 -0700199 return entry[0].created_on
Simran Basifb98e462014-08-18 12:35:44 -0700200 # While most Jobs have 1 HQE, some can have multiple, so check them all.
J. Richard Barnettedd0227d2015-04-10 15:18:48 -0700201 return max([hqe.finished_on for hqe in hqes])
J. Richard Barnetteea785362014-03-17 16:00:53 -0700202
203
Allen Lib41527d2017-06-22 17:28:00 -0700204def _remove_log_directory_contents(dirpath):
205 """Remove log directory contents.
206
207 Leave a note explaining what has happened to the logs.
208
209 @param dirpath: Path to log directory.
210 """
211 shutil.rmtree(dirpath)
212 os.mkdir(dirpath)
213 breadcrumb_name = os.path.join(dirpath, 'logs-removed-readme.txt')
214 with open(breadcrumb_name, 'w') as f:
215 f.write(NO_OFFLOAD_README)
216
217
J. Richard Barnetteea785362014-03-17 16:00:53 -0700218class SpecialJobDirectory(_JobDirectory):
219 """Subclass of _JobDirectory for special (per-host) jobs."""
220
221 GLOB_PATTERN = 'hosts/*/[0-9]*-*'
222
223 def __init__(self, resultsdir):
224 super(SpecialJobDirectory, self).__init__(resultsdir)
J. Richard Barnetteea785362014-03-17 16:00:53 -0700225
226 def get_timestamp_if_finished(self):
J. Richard Barnettedd0227d2015-04-10 15:18:48 -0700227 entry = _AFE.get_special_tasks(id=self._id, is_complete=True)
228 return entry[0].time_finished if entry else None