tko/parse.py - platform/external/autotest - Gitiles

 #!/usr/bin/python -u

 import datetime
 import json
 import os, sys, optparse, fcntl, errno, traceback, socket

 import common
 from autotest_lib.client.common_lib import mail, pidfile
 from autotest_lib.client.common_lib import utils
 from autotest_lib.frontend import setup_django_environment
 from autotest_lib.frontend.tko import models as tko_models
 from autotest_lib.server.cros.dynamic_suite import constants
 from autotest_lib.site_utils import job_overhead
 from autotest_lib.tko import db as tko_db, utils as tko_utils
 from autotest_lib.tko import models, status_lib
 from autotest_lib.tko.perf_upload import perf_uploader


 def parse_args():
     """Parse args."""
     # build up our options parser and parse sys.argv
     parser = optparse.OptionParser()
     parser.add_option("-m", help="Send mail for FAILED tests",
                       dest="mailit", action="store_true")
     parser.add_option("-r", help="Reparse the results of a job",
                       dest="reparse", action="store_true")
     parser.add_option("-o", help="Parse a single results directory",
                       dest="singledir", action="store_true")
     parser.add_option("-l", help=("Levels of subdirectories to include "
                                   "in the job name"),
                       type="int", dest="level", default=1)
     parser.add_option("-n", help="No blocking on an existing parse",
                       dest="noblock", action="store_true")
     parser.add_option("-s", help="Database server hostname",
                       dest="db_host", action="store")
     parser.add_option("-u", help="Database username", dest="db_user",
                       action="store")
     parser.add_option("-p", help="Database password", dest="db_pass",
                       action="store")
     parser.add_option("-d", help="Database name", dest="db_name",
                       action="store")
     parser.add_option("--write-pidfile",
                       help="write pidfile (.parser_execute)",
                       dest="write_pidfile", action="store_true",
                       default=False)
     parser.add_option("--record-duration",
                       help="Record timing to metadata db",
                       dest="record_duration", action="store_true",
                       default=False)
     options, args = parser.parse_args()

     # we need a results directory
     if len(args) == 0:
         tko_utils.dprint("ERROR: at least one results directory must "
                          "be provided")
         parser.print_help()
         sys.exit(1)

     # pass the options back
     return options, args


 def format_failure_message(jobname, kernel, testname, status, reason):
     """Format failure message with the given information.

     @param jobname: String representing the job name.
     @param kernel: String representing the kernel.
     @param testname: String representing the test name.
     @param status: String representing the test status.
     @param reason: String representing the reason.

     @return: Failure message as a string.
     """
     format_string = "%-12s %-20s %-12s %-10s %s"
     return format_string % (jobname, kernel, testname, status, reason)


 def mailfailure(jobname, job, message):
     """Send an email about the failure.

     @param jobname: String representing the job name.
     @param job: A job object.
     @param message: The message to mail.
     """
     message_lines = [""]
     message_lines.append("The following tests FAILED for this job")
     message_lines.append("http://%s/results/%s" %
                          (socket.gethostname(), jobname))
     message_lines.append("")
     message_lines.append(format_failure_message("Job name", "Kernel",
                                                 "Test name", "FAIL/WARN",
                                                 "Failure reason"))
     message_lines.append(format_failure_message("=" * 8, "=" * 6, "=" * 8,
                                                 "=" * 8, "=" * 14))
     message_header = "\n".join(message_lines)

     subject = "AUTOTEST: FAILED tests from job %s" % jobname
     mail.send("", job.user, "", subject, message_header + message)


 def _invalidate_original_tests(orig_job_idx, retry_job_idx):
     """Retry tests invalidates original tests.

     Whenever a retry job is complete, we want to invalidate the original
     job's test results, such that the consumers of the tko database
     (e.g. tko frontend, wmatrix) could figure out which results are the latest.

     When a retry job is parsed, we retrieve the original job's afe_job_id
     from the retry job's keyvals, which is then converted to tko job_idx and
     passed into this method as |orig_job_idx|.

     In this method, we are going to invalidate the rows in tko_tests that are
     associated with the original job by flipping their 'invalid' bit to True.
     In addition, in tko_tests, we also maintain a pointer from the retry results
     to the original results, so that later we can always know which rows in
     tko_tests are retries and which are the corresponding original results.
     This is done by setting the field 'invalidates_test_idx' of the tests
     associated with the retry job.

     For example, assume Job(job_idx=105) are retried by Job(job_idx=108), after
     this method is run, their tko_tests rows will look like:
     __________________________________________________________________________
     test_idx| job_idx | test            | ... | invalid | invalidates_test_idx
     10      | 105     | dummy_Fail.Error| ... | 1       | NULL
     11      | 105     | dummy_Fail.Fail | ... | 1       | NULL
     ...
     20      | 108     | dummy_Fail.Error| ... | 0       | 10
     21      | 108     | dummy_Fail.Fail | ... | 0       | 11
     __________________________________________________________________________
     Note the invalid bits of the rows for Job(job_idx=105) are set to '1'.
     And the 'invalidates_test_idx' fields of the rows for Job(job_idx=108)
     are set to 10 and 11 (the test_idx of the rows for the original job).

     @param orig_job_idx: An integer representing the original job's
                          tko job_idx. Tests associated with this job will
                          be marked as 'invalid'.
     @param retry_job_idx: An integer representing the retry job's
                           tko job_idx. The field 'invalidates_test_idx'
                           of the tests associated with this job will be updated.

     """
     msg = 'orig_job_idx: %s, retry_job_idx: %s' % (orig_job_idx, retry_job_idx)
     if not orig_job_idx or not retry_job_idx:
         tko_utils.dprint('ERROR: Could not invalidate tests: ' + msg)
     # Using django models here makes things easier, but make sure that
     # before this method is called, all other relevant transactions have been
     # committed to avoid race condition. In the long run, we might consider
     # to make the rest of parser use django models.
     orig_tests = tko_models.Test.objects.filter(job__job_idx=orig_job_idx)
     retry_tests = tko_models.Test.objects.filter(job__job_idx=retry_job_idx)

     # Invalidate original tests.
     orig_tests.update(invalid=True)

     # Maintain a dictionary that maps (test, subdir) to original tests.
     # Note that within the scope of a job, (test, subdir) uniquelly
     # identifies a test run, but 'test' does not.
     # In a control file, one could run the same test with different
     # 'subdir_tag', for example,
     #     job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_1')
     #     job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_2')
     # In tko, we will get
     #    (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_1')
     #    (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_2')
     invalidated_tests = {(orig_test.test, orig_test.subdir): orig_test
                          for orig_test in orig_tests}
     for retry in retry_tests:
         # It is possible that (retry.test, retry.subdir) doesn't exist
         # in invalidated_tests. This could happen when the original job
         # didn't run some of its tests. For example, a dut goes offline
         # since the beginning of the job, in which case invalidated_tests
         # will only have one entry for 'SERVER_JOB'.
         orig_test = invalidated_tests.get((retry.test, retry.subdir), None)
         if orig_test:
             retry.invalidates_test = orig_test
             retry.save()
     tko_utils.dprint('DEBUG: Invalidated tests associated to job: ' + msg)


 def parse_one(db, jobname, path, reparse, mail_on_failure):
     """Parse a single job. Optionally send email on failure.

     @param db: database object.
     @param jobname: the tag used to search for existing job in db,
                     e.g. '1234-chromeos-test/host1'
     @param path: The path to the results to be parsed.
     @param reparse: True/False, whether this is reparsing of the job.
     @param mail_on_failure: whether to send email on FAILED test.


     """
     tko_utils.dprint("\nScanning %s (%s)" % (jobname, path))
     old_job_idx = db.find_job(jobname)
     # old tests is a dict from tuple (test_name, subdir) to test_idx
     old_tests = {}
     if old_job_idx is not None:
         if not reparse:
             tko_utils.dprint("! Job is already parsed, done")
             return

         raw_old_tests = db.select("test_idx,subdir,test", "tko_tests",
                                   {"job_idx": old_job_idx})
         if raw_old_tests:
             old_tests = dict(((test, subdir), test_idx)
                              for test_idx, subdir, test in raw_old_tests)

     # look up the status version
     job_keyval = models.job.read_keyval(path)
     status_version = job_keyval.get("status_version", 0)

     # parse out the job
     parser = status_lib.parser(status_version)
     job = parser.make_job(path)
     status_log = os.path.join(path, "status.log")
     if not os.path.exists(status_log):
         status_log = os.path.join(path, "status")
     if not os.path.exists(status_log):
         tko_utils.dprint("! Unable to parse job, no status file")
         return

     # parse the status logs
     tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname))
     status_lines = open(status_log).readlines()
     parser.start(job)
     tests = parser.end(status_lines)

     # parser.end can return the same object multiple times, so filter out dups
     job.tests = []
     already_added = set()
     for test in tests:
         if test not in already_added:
             already_added.add(test)
             job.tests.append(test)

     # try and port test_idx over from the old tests, but if old tests stop
     # matching up with new ones just give up
     if reparse and old_job_idx is not None:
         job.index = old_job_idx
         for test in job.tests:
             test_idx = old_tests.pop((test.testname, test.subdir), None)
             if test_idx is not None:
                 test.test_idx = test_idx
             else:
                 tko_utils.dprint("! Reparse returned new test "
                                  "testname=%r subdir=%r" %
                                  (test.testname, test.subdir))
         for test_idx in old_tests.itervalues():
             where = {'test_idx' : test_idx}
             db.delete('tko_iteration_result', where)
             db.delete('tko_iteration_perf_value', where)
             db.delete('tko_iteration_attributes', where)
             db.delete('tko_test_attributes', where)
             db.delete('tko_test_labels_tests', {'test_id': test_idx})
             db.delete('tko_tests', where)

     # check for failures
     message_lines = [""]
     job_successful = True
     for test in job.tests:
         if not test.subdir:
             continue
         tko_utils.dprint("* testname, status, reason: %s %s %s"
                          % (test.subdir, test.status, test.reason))
         if test.status != 'GOOD':
             job_successful = False
             message_lines.append(format_failure_message(
                 jobname, test.kernel.base, test.subdir,
                 test.status, test.reason))
     if job_successful:
         # Check if we should not offload this test's results.
         if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False):
             # Update the gs_offloader_instructions json file.
             gs_instructions_file = os.path.join(
                     path, constants.GS_OFFLOADER_INSTRUCTIONS)
             gs_offloader_instructions = {}
             if os.path.exists(gs_instructions_file):
                 with open(gs_instructions_file, 'r') as f:
                     gs_offloader_instructions = json.load(f)

             gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True
             with open(gs_instructions_file, 'w') as f:
                 json.dump(gs_offloader_instructions, f)


     message = "\n".join(message_lines)

     # send out a email report of failure
     if len(message) > 2 and mail_on_failure:
         tko_utils.dprint("Sending email report of failure on %s to %s"
                          % (jobname, job.user))
         mailfailure(jobname, job, message)

     # write the job into the database.
     db.insert_job(jobname, job,
                   parent_job_id=job_keyval.get(constants.PARENT_JOB_ID, None))

     # Upload perf values to the perf dashboard, if applicable.
     for test in job.tests:
         perf_uploader.upload_test(job, test)

     # Although the cursor has autocommit, we still need to force it to commit
     # existing changes before we can use django models, otherwise it
     # will go into deadlock when django models try to start a new trasaction
     # while the current one has not finished yet.
     db.commit()

     # Handle retry job.
     orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID, None)
     if orig_afe_job_id:
         orig_job_idx = tko_models.Job.objects.get(
                 afe_job_id=orig_afe_job_id).job_idx
         _invalidate_original_tests(orig_job_idx, job.index)

     # Serializing job into a binary file
     try:
         from autotest_lib.tko import tko_pb2
         from autotest_lib.tko import job_serializer

         serializer = job_serializer.JobSerializer()
         binary_file_name = os.path.join(path, "job.serialize")
         serializer.serialize_to_binary(job, jobname, binary_file_name)

         if reparse:
             site_export_file = "autotest_lib.tko.site_export"
             site_export = utils.import_site_function(__file__,
                                                      site_export_file,
                                                      "site_export",
                                                      _site_export_dummy)
             site_export(binary_file_name)

     except ImportError:
         tko_utils.dprint("DEBUG: tko_pb2.py doesn't exist. Create by "
                          "compiling tko/tko.proto.")

     db.commit()

 def _site_export_dummy(binary_file_name):
     pass

 def _get_job_subdirs(path):
     """
     Returns a list of job subdirectories at path. Returns None if the test
     is itself a job directory. Does not recurse into the subdirs.
     """
     # if there's a .machines file, use it to get the subdirs
     machine_list = os.path.join(path, ".machines")
     if os.path.exists(machine_list):
         subdirs = set(line.strip() for line in file(machine_list))
         existing_subdirs = set(subdir for subdir in subdirs
                                if os.path.exists(os.path.join(path, subdir)))
         if len(existing_subdirs) != 0:
             return existing_subdirs

     # if this dir contains ONLY subdirectories, return them
     contents = set(os.listdir(path))
     contents.discard(".parse.lock")
     subdirs = set(sub for sub in contents if
                   os.path.isdir(os.path.join(path, sub)))
     if len(contents) == len(subdirs) != 0:
         return subdirs

     # this is a job directory, or something else we don't understand
     return None


 def parse_leaf_path(db, path, level, reparse, mail_on_failure):
     """Parse a leaf path.

     @param db: database handle.
     @param path: The path to the results to be parsed.
     @param level: Integer, level of subdirectories to include in the job name.
     @param reparse: True/False, whether this is reparsing of the job.
     @param mail_on_failure: whether to send email on FAILED test.

     @returns: The job name of the parsed job, e.g. '123-chromeos-test/host1'
     """
     job_elements = path.split("/")[-level:]
     jobname = "/".join(job_elements)
     try:
         db.run_with_retry(parse_one, db, jobname, path, reparse,
                           mail_on_failure)
     except Exception:
         traceback.print_exc()
     return jobname


 def parse_path(db, path, level, reparse, mail_on_failure):
     """Parse a path

     @param db: database handle.
     @param path: The path to the results to be parsed.
     @param level: Integer, level of subdirectories to include in the job name.
     @param reparse: True/False, whether this is reparsing of the job.
     @param mail_on_failure: whether to send email on FAILED test.

     @returns: A set of job names of the parsed jobs.
               set(['123-chromeos-test/host1', '123-chromeos-test/host2'])
     """
     processed_jobs = set()
     job_subdirs = _get_job_subdirs(path)
     if job_subdirs is not None:
         # parse status.log in current directory, if it exists. multi-machine
         # synchronous server side tests record output in this directory. without
         # this check, we do not parse these results.
         if os.path.exists(os.path.join(path, 'status.log')):
             new_job = parse_leaf_path(db, path, level, reparse, mail_on_failure)
             processed_jobs.add(new_job)
         # multi-machine job
         for subdir in job_subdirs:
             jobpath = os.path.join(path, subdir)
             new_jobs = parse_path(db, jobpath, level + 1, reparse, mail_on_failure)
             processed_jobs.update(new_jobs)
     else:
         # single machine job
         new_job = parse_leaf_path(db, path, level, reparse, mail_on_failure)
         processed_jobs.add(new_job)
     return processed_jobs


 def record_parsing(processed_jobs, duration_secs):
     """Record the time spent on parsing to metadata db.

     @param processed_jobs: A set of job names of the parsed jobs.
               set(['123-chromeos-test/host1', '123-chromeos-test/host2'])
     @param duration_secs: Total time spent on parsing, in seconds.
     """

     for job_name in processed_jobs:
         job_id, hostname = tko_utils.get_afe_job_id_and_hostname(job_name)
         if not job_id or not hostname:
             tko_utils.dprint('ERROR: can not parse job name %s, '
                              'will not send duration to metadata db.'
                              % job_name)
             continue
         else:
             job_overhead.record_state_duration(
                     job_id, hostname, job_overhead.STATUS.PARSING,
                     duration_secs)


 def main():
     """Main entrance."""
     start_time = datetime.datetime.now()
     # Record the processed jobs so that
     # we can send the duration of parsing to metadata db.
     processed_jobs = set()

     options, args = parse_args()
     results_dir = os.path.abspath(args[0])
     assert os.path.exists(results_dir)

     pid_file_manager = pidfile.PidFileManager("parser", results_dir)

     if options.write_pidfile:
         pid_file_manager.open_file()

     try:
         # build up the list of job dirs to parse
         if options.singledir:
             jobs_list = [results_dir]
         else:
             jobs_list = [os.path.join(results_dir, subdir)
                          for subdir in os.listdir(results_dir)]

         # build up the database
         db = tko_db.db(autocommit=False, host=options.db_host,
                        user=options.db_user, password=options.db_pass,
                        database=options.db_name)

         # parse all the jobs
         for path in jobs_list:
             lockfile = open(os.path.join(path, ".parse.lock"), "w")
             flags = fcntl.LOCK_EX
             if options.noblock:
                 flags |= fcntl.LOCK_NB
             try:
                 fcntl.flock(lockfile, flags)
             except IOError, e:
                 # lock is not available and nonblock has been requested
                 if e.errno == errno.EWOULDBLOCK:
                     lockfile.close()
                     continue
                 else:
                     raise # something unexpected happened
             try:
                 new_jobs = parse_path(db, path, options.level, options.reparse,
                            options.mailit)
                 processed_jobs.update(new_jobs)

             finally:
                 fcntl.flock(lockfile, fcntl.LOCK_UN)
                 lockfile.close()

     except:
         pid_file_manager.close_file(1)
         raise
     else:
         pid_file_manager.close_file(0)
     duration_secs = (datetime.datetime.now() - start_time).total_seconds()
     if options.record_duration:
         record_parsing(processed_jobs, duration_secs)


 if __name__ == "__main__":
     main()
	#!/usr/bin/python -u

	import datetime
	import json
	import os, sys, optparse, fcntl, errno, traceback, socket

	import common
	from autotest_lib.client.common_lib import mail, pidfile
	from autotest_lib.client.common_lib import utils
	from autotest_lib.frontend import setup_django_environment
	from autotest_lib.frontend.tko import models as tko_models
	from autotest_lib.server.cros.dynamic_suite import constants
	from autotest_lib.site_utils import job_overhead
	from autotest_lib.tko import db as tko_db, utils as tko_utils
	from autotest_lib.tko import models, status_lib
	from autotest_lib.tko.perf_upload import perf_uploader


	def parse_args():
	"""Parse args."""
	# build up our options parser and parse sys.argv
	parser = optparse.OptionParser()
	parser.add_option("-m", help="Send mail for FAILED tests",
	dest="mailit", action="store_true")
	parser.add_option("-r", help="Reparse the results of a job",
	dest="reparse", action="store_true")
	parser.add_option("-o", help="Parse a single results directory",
	dest="singledir", action="store_true")
	parser.add_option("-l", help=("Levels of subdirectories to include "
	"in the job name"),
	type="int", dest="level", default=1)
	parser.add_option("-n", help="No blocking on an existing parse",
	dest="noblock", action="store_true")
	parser.add_option("-s", help="Database server hostname",
	dest="db_host", action="store")
	parser.add_option("-u", help="Database username", dest="db_user",
	action="store")
	parser.add_option("-p", help="Database password", dest="db_pass",
	action="store")
	parser.add_option("-d", help="Database name", dest="db_name",
	action="store")
	parser.add_option("--write-pidfile",
	help="write pidfile (.parser_execute)",
	dest="write_pidfile", action="store_true",
	default=False)
	parser.add_option("--record-duration",
	help="Record timing to metadata db",
	dest="record_duration", action="store_true",
	default=False)
	options, args = parser.parse_args()

	# we need a results directory
	if len(args) == 0:
	tko_utils.dprint("ERROR: at least one results directory must "
	"be provided")
	parser.print_help()
	sys.exit(1)

	# pass the options back
	return options, args


	def format_failure_message(jobname, kernel, testname, status, reason):
	"""Format failure message with the given information.

	@param jobname: String representing the job name.
	@param kernel: String representing the kernel.
	@param testname: String representing the test name.
	@param status: String representing the test status.
	@param reason: String representing the reason.

	@return: Failure message as a string.
	"""
	format_string = "%-12s %-20s %-12s %-10s %s"
	return format_string % (jobname, kernel, testname, status, reason)


	def mailfailure(jobname, job, message):
	"""Send an email about the failure.

	@param jobname: String representing the job name.
	@param job: A job object.
	@param message: The message to mail.
	"""
	message_lines = [""]
	message_lines.append("The following tests FAILED for this job")
	message_lines.append("http://%s/results/%s" %
	(socket.gethostname(), jobname))
	message_lines.append("")
	message_lines.append(format_failure_message("Job name", "Kernel",
	"Test name", "FAIL/WARN",
	"Failure reason"))
	message_lines.append(format_failure_message("=" * 8, "=" * 6, "=" * 8,
	"=" * 8, "=" * 14))
	message_header = "\n".join(message_lines)

	subject = "AUTOTEST: FAILED tests from job %s" % jobname
	mail.send("", job.user, "", subject, message_header + message)


	def _invalidate_original_tests(orig_job_idx, retry_job_idx):
	"""Retry tests invalidates original tests.

	Whenever a retry job is complete, we want to invalidate the original
	job's test results, such that the consumers of the tko database
	(e.g. tko frontend, wmatrix) could figure out which results are the latest.

	When a retry job is parsed, we retrieve the original job's afe_job_id
	from the retry job's keyvals, which is then converted to tko job_idx and
	passed into this method as \|orig_job_idx\|.

	In this method, we are going to invalidate the rows in tko_tests that are
	associated with the original job by flipping their 'invalid' bit to True.
	In addition, in tko_tests, we also maintain a pointer from the retry results
	to the original results, so that later we can always know which rows in
	tko_tests are retries and which are the corresponding original results.
	This is done by setting the field 'invalidates_test_idx' of the tests
	associated with the retry job.

	For example, assume Job(job_idx=105) are retried by Job(job_idx=108), after
	this method is run, their tko_tests rows will look like:
	__________________________________________________________________________
	test_idx\| job_idx \| test \| ... \| invalid \| invalidates_test_idx
	10 \| 105 \| dummy_Fail.Error\| ... \| 1 \| NULL
	11 \| 105 \| dummy_Fail.Fail \| ... \| 1 \| NULL
	...
	20 \| 108 \| dummy_Fail.Error\| ... \| 0 \| 10
	21 \| 108 \| dummy_Fail.Fail \| ... \| 0 \| 11
	__________________________________________________________________________
	Note the invalid bits of the rows for Job(job_idx=105) are set to '1'.
	And the 'invalidates_test_idx' fields of the rows for Job(job_idx=108)
	are set to 10 and 11 (the test_idx of the rows for the original job).

	@param orig_job_idx: An integer representing the original job's
	tko job_idx. Tests associated with this job will
	be marked as 'invalid'.
	@param retry_job_idx: An integer representing the retry job's
	tko job_idx. The field 'invalidates_test_idx'
	of the tests associated with this job will be updated.

	"""
	msg = 'orig_job_idx: %s, retry_job_idx: %s' % (orig_job_idx, retry_job_idx)
	if not orig_job_idx or not retry_job_idx:
	tko_utils.dprint('ERROR: Could not invalidate tests: ' + msg)
	# Using django models here makes things easier, but make sure that
	# before this method is called, all other relevant transactions have been
	# committed to avoid race condition. In the long run, we might consider
	# to make the rest of parser use django models.
	orig_tests = tko_models.Test.objects.filter(job__job_idx=orig_job_idx)
	retry_tests = tko_models.Test.objects.filter(job__job_idx=retry_job_idx)

	# Invalidate original tests.
	orig_tests.update(invalid=True)

	# Maintain a dictionary that maps (test, subdir) to original tests.
	# Note that within the scope of a job, (test, subdir) uniquelly
	# identifies a test run, but 'test' does not.
	# In a control file, one could run the same test with different
	# 'subdir_tag', for example,
	# job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_1')
	# job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_2')
	# In tko, we will get
	# (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_1')
	# (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_2')
	invalidated_tests = {(orig_test.test, orig_test.subdir): orig_test
	for orig_test in orig_tests}
	for retry in retry_tests:
	# It is possible that (retry.test, retry.subdir) doesn't exist
	# in invalidated_tests. This could happen when the original job
	# didn't run some of its tests. For example, a dut goes offline
	# since the beginning of the job, in which case invalidated_tests
	# will only have one entry for 'SERVER_JOB'.
	orig_test = invalidated_tests.get((retry.test, retry.subdir), None)
	if orig_test:
	retry.invalidates_test = orig_test
	retry.save()
	tko_utils.dprint('DEBUG: Invalidated tests associated to job: ' + msg)


	def parse_one(db, jobname, path, reparse, mail_on_failure):
	"""Parse a single job. Optionally send email on failure.

	@param db: database object.
	@param jobname: the tag used to search for existing job in db,
	e.g. '1234-chromeos-test/host1'
	@param path: The path to the results to be parsed.
	@param reparse: True/False, whether this is reparsing of the job.
	@param mail_on_failure: whether to send email on FAILED test.


	"""
	tko_utils.dprint("\nScanning %s (%s)" % (jobname, path))
	old_job_idx = db.find_job(jobname)
	# old tests is a dict from tuple (test_name, subdir) to test_idx
	old_tests = {}
	if old_job_idx is not None:
	if not reparse:
	tko_utils.dprint("! Job is already parsed, done")
	return

	raw_old_tests = db.select("test_idx,subdir,test", "tko_tests",
	{"job_idx": old_job_idx})
	if raw_old_tests:
	old_tests = dict(((test, subdir), test_idx)
	for test_idx, subdir, test in raw_old_tests)

	# look up the status version
	job_keyval = models.job.read_keyval(path)
	status_version = job_keyval.get("status_version", 0)

	# parse out the job
	parser = status_lib.parser(status_version)
	job = parser.make_job(path)
	status_log = os.path.join(path, "status.log")
	if not os.path.exists(status_log):
	status_log = os.path.join(path, "status")
	if not os.path.exists(status_log):
	tko_utils.dprint("! Unable to parse job, no status file")
	return

	# parse the status logs
	tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname))
	status_lines = open(status_log).readlines()
	parser.start(job)
	tests = parser.end(status_lines)

	# parser.end can return the same object multiple times, so filter out dups
	job.tests = []
	already_added = set()
	for test in tests:
	if test not in already_added:
	already_added.add(test)
	job.tests.append(test)

	# try and port test_idx over from the old tests, but if old tests stop
	# matching up with new ones just give up
	if reparse and old_job_idx is not None:
	job.index = old_job_idx
	for test in job.tests:
	test_idx = old_tests.pop((test.testname, test.subdir), None)
	if test_idx is not None:
	test.test_idx = test_idx
	else:
	tko_utils.dprint("! Reparse returned new test "
	"testname=%r subdir=%r" %
	(test.testname, test.subdir))
	for test_idx in old_tests.itervalues():
	where = {'test_idx' : test_idx}
	db.delete('tko_iteration_result', where)
	db.delete('tko_iteration_perf_value', where)
	db.delete('tko_iteration_attributes', where)
	db.delete('tko_test_attributes', where)
	db.delete('tko_test_labels_tests', {'test_id': test_idx})
	db.delete('tko_tests', where)

	# check for failures
	message_lines = [""]
	job_successful = True
	for test in job.tests:
	if not test.subdir:
	continue
	tko_utils.dprint("* testname, status, reason: %s %s %s"
	% (test.subdir, test.status, test.reason))
	if test.status != 'GOOD':
	job_successful = False
	message_lines.append(format_failure_message(
	jobname, test.kernel.base, test.subdir,
	test.status, test.reason))
	if job_successful:
	# Check if we should not offload this test's results.
	if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False):
	# Update the gs_offloader_instructions json file.
	gs_instructions_file = os.path.join(
	path, constants.GS_OFFLOADER_INSTRUCTIONS)
	gs_offloader_instructions = {}
	if os.path.exists(gs_instructions_file):
	with open(gs_instructions_file, 'r') as f:
	gs_offloader_instructions = json.load(f)

	gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True
	with open(gs_instructions_file, 'w') as f:
	json.dump(gs_offloader_instructions, f)


	message = "\n".join(message_lines)

	# send out a email report of failure
	if len(message) > 2 and mail_on_failure:
	tko_utils.dprint("Sending email report of failure on %s to %s"
	% (jobname, job.user))
	mailfailure(jobname, job, message)

	# write the job into the database.
	db.insert_job(jobname, job,
	parent_job_id=job_keyval.get(constants.PARENT_JOB_ID, None))

	# Upload perf values to the perf dashboard, if applicable.
	for test in job.tests:
	perf_uploader.upload_test(job, test)

	# Although the cursor has autocommit, we still need to force it to commit
	# existing changes before we can use django models, otherwise it
	# will go into deadlock when django models try to start a new trasaction
	# while the current one has not finished yet.
	db.commit()

	# Handle retry job.
	orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID, None)
	if orig_afe_job_id:
	orig_job_idx = tko_models.Job.objects.get(
	afe_job_id=orig_afe_job_id).job_idx
	_invalidate_original_tests(orig_job_idx, job.index)

	# Serializing job into a binary file
	try:
	from autotest_lib.tko import tko_pb2
	from autotest_lib.tko import job_serializer

	serializer = job_serializer.JobSerializer()
	binary_file_name = os.path.join(path, "job.serialize")
	serializer.serialize_to_binary(job, jobname, binary_file_name)

	if reparse:
	site_export_file = "autotest_lib.tko.site_export"
	site_export = utils.import_site_function(__file__,
	site_export_file,
	"site_export",
	_site_export_dummy)
	site_export(binary_file_name)

	except ImportError:
	tko_utils.dprint("DEBUG: tko_pb2.py doesn't exist. Create by "
	"compiling tko/tko.proto.")

	db.commit()

	def _site_export_dummy(binary_file_name):
	pass

	def _get_job_subdirs(path):
	"""
	Returns a list of job subdirectories at path. Returns None if the test
	is itself a job directory. Does not recurse into the subdirs.
	"""
	# if there's a .machines file, use it to get the subdirs
	machine_list = os.path.join(path, ".machines")
	if os.path.exists(machine_list):
	subdirs = set(line.strip() for line in file(machine_list))
	existing_subdirs = set(subdir for subdir in subdirs
	if os.path.exists(os.path.join(path, subdir)))
	if len(existing_subdirs) != 0:
	return existing_subdirs

	# if this dir contains ONLY subdirectories, return them
	contents = set(os.listdir(path))
	contents.discard(".parse.lock")
	subdirs = set(sub for sub in contents if
	os.path.isdir(os.path.join(path, sub)))
	if len(contents) == len(subdirs) != 0:
	return subdirs

	# this is a job directory, or something else we don't understand
	return None


	def parse_leaf_path(db, path, level, reparse, mail_on_failure):
	"""Parse a leaf path.

	@param db: database handle.
	@param path: The path to the results to be parsed.
	@param level: Integer, level of subdirectories to include in the job name.
	@param reparse: True/False, whether this is reparsing of the job.
	@param mail_on_failure: whether to send email on FAILED test.

	@returns: The job name of the parsed job, e.g. '123-chromeos-test/host1'
	"""
	job_elements = path.split("/")[-level:]
	jobname = "/".join(job_elements)
	try:
	db.run_with_retry(parse_one, db, jobname, path, reparse,
	mail_on_failure)
	except Exception:
	traceback.print_exc()
	return jobname


	def parse_path(db, path, level, reparse, mail_on_failure):
	"""Parse a path

	@param db: database handle.
	@param path: The path to the results to be parsed.
	@param level: Integer, level of subdirectories to include in the job name.
	@param reparse: True/False, whether this is reparsing of the job.
	@param mail_on_failure: whether to send email on FAILED test.

	@returns: A set of job names of the parsed jobs.
	set(['123-chromeos-test/host1', '123-chromeos-test/host2'])
	"""
	processed_jobs = set()
	job_subdirs = _get_job_subdirs(path)
	if job_subdirs is not None:
	# parse status.log in current directory, if it exists. multi-machine
	# synchronous server side tests record output in this directory. without
	# this check, we do not parse these results.
	if os.path.exists(os.path.join(path, 'status.log')):
	new_job = parse_leaf_path(db, path, level, reparse, mail_on_failure)
	processed_jobs.add(new_job)
	# multi-machine job
	for subdir in job_subdirs:
	jobpath = os.path.join(path, subdir)
	new_jobs = parse_path(db, jobpath, level + 1, reparse, mail_on_failure)
	processed_jobs.update(new_jobs)
	else:
	# single machine job
	new_job = parse_leaf_path(db, path, level, reparse, mail_on_failure)
	processed_jobs.add(new_job)
	return processed_jobs


	def record_parsing(processed_jobs, duration_secs):
	"""Record the time spent on parsing to metadata db.

	@param processed_jobs: A set of job names of the parsed jobs.
	set(['123-chromeos-test/host1', '123-chromeos-test/host2'])
	@param duration_secs: Total time spent on parsing, in seconds.
	"""

	for job_name in processed_jobs:
	job_id, hostname = tko_utils.get_afe_job_id_and_hostname(job_name)
	if not job_id or not hostname:
	tko_utils.dprint('ERROR: can not parse job name %s, '
	'will not send duration to metadata db.'
	% job_name)
	continue
	else:
	job_overhead.record_state_duration(
	job_id, hostname, job_overhead.STATUS.PARSING,
	duration_secs)


	def main():
	"""Main entrance."""
	start_time = datetime.datetime.now()
	# Record the processed jobs so that
	# we can send the duration of parsing to metadata db.
	processed_jobs = set()

	options, args = parse_args()
	results_dir = os.path.abspath(args[0])
	assert os.path.exists(results_dir)

	pid_file_manager = pidfile.PidFileManager("parser", results_dir)

	if options.write_pidfile:
	pid_file_manager.open_file()

	try:
	# build up the list of job dirs to parse
	if options.singledir:
	jobs_list = [results_dir]
	else:
	jobs_list = [os.path.join(results_dir, subdir)
	for subdir in os.listdir(results_dir)]

	# build up the database
	db = tko_db.db(autocommit=False, host=options.db_host,
	user=options.db_user, password=options.db_pass,
	database=options.db_name)

	# parse all the jobs
	for path in jobs_list:
	lockfile = open(os.path.join(path, ".parse.lock"), "w")
	flags = fcntl.LOCK_EX
	if options.noblock:
	flags \|= fcntl.LOCK_NB
	try:
	fcntl.flock(lockfile, flags)
	except IOError, e:
	# lock is not available and nonblock has been requested
	if e.errno == errno.EWOULDBLOCK:
	lockfile.close()
	continue
	else:
	raise # something unexpected happened
	try:
	new_jobs = parse_path(db, path, options.level, options.reparse,
	options.mailit)
	processed_jobs.update(new_jobs)

	finally:
	fcntl.flock(lockfile, fcntl.LOCK_UN)
	lockfile.close()

	except:
	pid_file_manager.close_file(1)
	raise
	else:
	pid_file_manager.close_file(0)
	duration_secs = (datetime.datetime.now() - start_time).total_seconds()
	if options.record_duration:
	record_parsing(processed_jobs, duration_secs)


	if __name__ == "__main__":
	main()