autotest: refactor status_history into a lib outside of site_utils
CQ-DEPEND=CL:378599
BUG=chromium:641093
TEST=Ran dut_status command, verified working.
Change-Id: I9f60e2dff24683e66af00c7ce1c8a825b239e782
Reviewed-on: https://chromium-review.googlesource.com/378435
Commit-Ready: Aviv Keshet <akeshet@chromium.org>
Tested-by: Aviv Keshet <akeshet@chromium.org>
Reviewed-by: Richard Barnette <jrbarnette@google.com>
diff --git a/site_utils/balance_pools.py b/site_utils/balance_pools.py
index f614f34..c9ca4ed 100755
--- a/site_utils/balance_pools.py
+++ b/site_utils/balance_pools.py
@@ -58,8 +58,8 @@
import common
from autotest_lib.server import frontend
+from autotest_lib.server.lib import status_history
from autotest_lib.site_utils import lab_inventory
-from autotest_lib.site_utils import status_history
from autotest_lib.site_utils.suite_scheduler import constants
from chromite.lib import parallel
diff --git a/site_utils/diagnosis_utils.py b/site_utils/diagnosis_utils.py
index bc8a97e..25abb06 100644
--- a/site_utils/diagnosis_utils.py
+++ b/site_utils/diagnosis_utils.py
@@ -14,7 +14,7 @@
from autotest_lib.server import utils
from autotest_lib.server.cros.dynamic_suite import reporting
from autotest_lib.server.cros.dynamic_suite import reporting_utils
-from autotest_lib.site_utils import status_history
+from autotest_lib.server.lib import status_history
class BoardNotAvailableError(utils.TestLabException):
diff --git a/site_utils/dut_status.py b/site_utils/dut_status.py
index 09c269f..935ad60 100755
--- a/site_utils/dut_status.py
+++ b/site_utils/dut_status.py
@@ -110,7 +110,7 @@
import common
from autotest_lib.client.common_lib import time_utils
from autotest_lib.server import frontend
-from autotest_lib.site_utils import status_history
+from autotest_lib.server.lib import status_history
# The fully qualified name makes for lines that are too long, so
diff --git a/site_utils/lab_inventory.py b/site_utils/lab_inventory.py
index c1e5b2d..c0f807d 100755
--- a/site_utils/lab_inventory.py
+++ b/site_utils/lab_inventory.py
@@ -59,8 +59,8 @@
from autotest_lib.client.common_lib import time_utils
from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
from autotest_lib.server.hosts import servo_host
+from autotest_lib.server.lib import status_history
from autotest_lib.site_utils import gmail_lib
-from autotest_lib.site_utils import status_history
from autotest_lib.site_utils.suite_scheduler import constants
diff --git a/site_utils/lab_inventory_unittest.py b/site_utils/lab_inventory_unittest.py
index b4ee05d..a283ca2 100755
--- a/site_utils/lab_inventory_unittest.py
+++ b/site_utils/lab_inventory_unittest.py
@@ -9,8 +9,8 @@
import unittest
import common
+from autotest_lib.server.lib import status_history
from autotest_lib.site_utils import lab_inventory
-from autotest_lib.site_utils import status_history
class _FakeHost(object):
diff --git a/site_utils/status_history.py b/site_utils/status_history.py
deleted file mode 100755
index 0adfea5..0000000
--- a/site_utils/status_history.py
+++ /dev/null
@@ -1,656 +0,0 @@
-# Copyright 2015 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Services relating to DUT status and job history.
-
-The central abstraction of this module is the `HostJobHistory`
-class. This class provides two related pieces of information
-regarding a single DUT:
- * A history of tests and special tasks that have run on
- the DUT in a given time range.
- * Whether the DUT was "working" or "broken" at a given
- time.
-
-The "working" or "broken" status of a DUT is determined by
-the DUT's special task history. At the end of any job or
-task, the status is indicated as follows:
- * After any successful special task, the DUT is considered
- "working".
- * After any failed Repair task, the DUT is considered "broken".
- * After any other special task or after any regular test job, the
- DUT's status is considered unchanged.
-
-Definitions for terms used in the code below:
- * status task - Any special task that determines the DUT's
- status; that is, any successful task, or any failed Repair.
- * diagnosis interval - A time interval during which DUT status
- changed either from "working" to "broken", or vice versa. The
- interval starts with the last status task with the old status,
- and ends after the first status task with the new status.
-
-Diagnosis intervals are interesting because they normally contain
-the logs explaining a failure or repair event.
-
-"""
-
-import common
-from autotest_lib.frontend import setup_django_environment
-from django.db import models as django_models
-
-from autotest_lib.client.common_lib import global_config
-from autotest_lib.client.common_lib import time_utils
-from autotest_lib.frontend.afe import models as afe_models
-from autotest_lib.site_utils.suite_scheduler import constants
-
-
-# Values used to describe the diagnosis of a DUT. These values are
-# used to indicate both DUT status after a job or task, and also
-# diagnosis of whether the DUT was working at the end of a given
-# time interval.
-#
-# UNUSED: Used when there are no events recorded in a given
-# time interval.
-# UNKNOWN: For an individual event, indicates that the DUT status
-# is unchanged from the previous event. For a time interval,
-# indicates that the DUT's status can't be determined from the
-# DUT's history.
-# WORKING: Indicates that the DUT was working normally after the
-# event, or at the end of the time interval.
-# BROKEN: Indicates that the DUT needed manual repair after the
-# event, or at the end of the time interval.
-#
-UNUSED = 0
-UNKNOWN = 1
-WORKING = 2
-BROKEN = 3
-
-
-def parse_time(time_string):
- """Parse time according to a canonical form.
-
- The "canonical" form is the form in which date/time
- values are stored in the database.
-
- @param time_string Time to be parsed.
- """
- return int(time_utils.to_epoch_time(time_string))
-
-
-class _JobEvent(object):
- """Information about an event in host history.
-
- This remembers the relevant data from a single event in host
- history. An event is any change in DUT state caused by a job
- or special task. The data captured are the start and end times
- of the event, the URL of logs to the job or task causing the
- event, and a diagnosis of whether the DUT was working or failed
- afterwards.
-
- This class is an adapter around the database model objects
- describing jobs and special tasks. This is an abstract
- superclass, with concrete subclasses for `HostQueueEntry` and
- `SpecialTask` objects.
-
- @property start_time Time the job or task began execution.
- @property end_time Time the job or task finished execution.
- @property id id of the event in the AFE database.
- @property name Name of the event, derived from the AFE database.
- @property job_status Short string describing the event's final status.
- @property job_url URL to the logs for the event's job.
- @property diagnosis Working status of the DUT after the event.
-
- """
-
- get_config_value = global_config.global_config.get_config_value
- _LOG_URL_PATTERN = get_config_value('CROS', 'log_url_pattern')
-
- @classmethod
- def get_log_url(cls, afe_hostname, logdir):
- """Return a URL to job results.
-
- The URL is constructed from a base URL determined by the
- global config, plus the relative path of the job's log
- directory.
-
- @param afe_hostname Hostname for autotest frontend
- @param logdir Relative path of the results log directory.
-
- @return A URL to the requested results log.
-
- """
- return cls._LOG_URL_PATTERN % (afe_hostname, logdir)
-
-
- def __init__(self, start_time, end_time):
- self.start_time = parse_time(start_time)
- self.end_time = parse_time(end_time)
-
-
- def __cmp__(self, other):
- """Compare two jobs by their start time.
-
- This is a standard Python `__cmp__` method to allow sorting
- `_JobEvent` objects by their times.
-
- @param other The `_JobEvent` object to compare to `self`.
-
- """
- return self.start_time - other.start_time
-
-
- @property
- def id(self):
- """Return the id of the event in the AFE database."""
- raise NotImplemented()
-
-
- @property
- def name(self):
- """Return the name of the event."""
- raise NotImplemented()
-
-
- @property
- def job_status(self):
- """Return a short string describing the event's final status."""
- raise NotImplemented()
-
-
- @property
- def job_url(self):
- """Return the URL for this event's job logs."""
- raise NotImplemented()
-
-
- @property
- def diagnosis(self):
- """Return the status of the DUT after this event.
-
- The diagnosis is interpreted as follows:
- UNKNOWN - The DUT status was the same before and after
- the event.
- WORKING - The DUT appeared to be working after the event.
- BROKEN - The DUT likely required manual intervention
- after the event.
-
- @return A valid diagnosis value.
-
- """
- raise NotImplemented()
-
-
-class _SpecialTaskEvent(_JobEvent):
- """`_JobEvent` adapter for special tasks.
-
- This class wraps the standard `_JobEvent` interface around a row
- in the `afe_special_tasks` table.
-
- """
-
- @classmethod
- def get_tasks(cls, afe, host_id, start_time, end_time):
- """Return special tasks for a host in a given time range.
-
- Return a list of `_SpecialTaskEvent` objects representing all
- special tasks that ran on the given host in the given time
- range. The list is ordered as it was returned by the query
- (i.e. unordered).
-
- @param afe Autotest frontend
- @param host_id Database host id of the desired host.
- @param start_time Start time of the range of interest.
- @param end_time End time of the range of interest.
-
- @return A list of `_SpecialTaskEvent` objects.
-
- """
- query_start = time_utils.epoch_time_to_date_string(start_time)
- query_end = time_utils.epoch_time_to_date_string(end_time)
- tasks = afe.get_host_special_tasks(
- host_id,
- time_started__gte=query_start,
- time_finished__lte=query_end,
- is_complete=1)
- return [cls(afe.server, t) for t in tasks]
-
-
- @classmethod
- def get_status_task(cls, afe, host_id, end_time):
- """Return the task indicating a host's status at a given time.
-
- The task returned determines the status of the DUT; the
- diagnosis on the task indicates the diagnosis for the DUT at
- the given `end_time`.
-
- @param afe Autotest frontend
- @param host_id Database host id of the desired host.
- @param end_time Find status as of this time.
-
- @return A `_SpecialTaskEvent` object for the requested task,
- or `None` if no task was found.
-
- """
- query_end = time_utils.epoch_time_to_date_string(end_time)
- task = afe.get_host_status_task(host_id, query_end)
- return cls(afe.server, task) if task else None
-
-
- def __init__(self, afe_hostname, afetask):
- self._afe_hostname = afe_hostname
- self._afetask = afetask
- super(_SpecialTaskEvent, self).__init__(
- afetask.time_started, afetask.time_finished)
-
-
- @property
- def id(self):
- return self._afetask.id
-
-
- @property
- def name(self):
- return self._afetask.task
-
-
- @property
- def job_status(self):
- if self._afetask.is_aborted:
- return 'ABORTED'
- elif self._afetask.success:
- return 'PASS'
- else:
- return 'FAIL'
-
-
- @property
- def job_url(self):
- logdir = ('hosts/%s/%s-%s' %
- (self._afetask.host.hostname, self._afetask.id,
- self._afetask.task.lower()))
- return _SpecialTaskEvent.get_log_url(self._afe_hostname, logdir)
-
-
- @property
- def diagnosis(self):
- if self._afetask.success:
- return WORKING
- elif self._afetask.task == 'Repair':
- return BROKEN
- else:
- return UNKNOWN
-
-
-class _TestJobEvent(_JobEvent):
- """`_JobEvent` adapter for regular test jobs.
-
- This class wraps the standard `_JobEvent` interface around a row
- in the `afe_host_queue_entries` table.
-
- """
-
- @classmethod
- def get_hqes(cls, afe, host_id, start_time, end_time):
- """Return HQEs for a host in a given time range.
-
- Return a list of `_TestJobEvent` objects representing all the
- HQEs of all the jobs that ran on the given host in the given
- time range. The list is ordered as it was returned by the
- query (i.e. unordered).
-
- @param afe Autotest frontend
- @param host_id Database host id of the desired host.
- @param start_time Start time of the range of interest.
- @param end_time End time of the range of interest.
-
- @return A list of `_TestJobEvent` objects.
-
- """
- query_start = time_utils.epoch_time_to_date_string(start_time)
- query_end = time_utils.epoch_time_to_date_string(end_time)
- hqelist = afe.get_host_queue_entries(
- host_id=host_id,
- start_time=query_start,
- end_time=query_end,
- complete=1)
- return [cls(afe.server, hqe) for hqe in hqelist]
-
-
- def __init__(self, afe_hostname, hqe):
- self._afe_hostname = afe_hostname
- self._hqe = hqe
- super(_TestJobEvent, self).__init__(
- hqe.started_on, hqe.finished_on)
-
-
- @property
- def id(self):
- return self._hqe.id
-
-
- @property
- def name(self):
- return self._hqe.job.name
-
-
- @property
- def job_status(self):
- return self._hqe.status
-
-
- @property
- def job_url(self):
- logdir = '%s-%s' % (self._hqe.job.id, self._hqe.job.owner)
- return _TestJobEvent.get_log_url(self._afe_hostname, logdir)
-
-
- @property
- def diagnosis(self):
- return UNKNOWN
-
-
-class HostJobHistory(object):
- """Class to query and remember DUT execution and status history.
-
- This class is responsible for querying the database to determine
- the history of a single DUT in a time interval of interest, and
- for remembering the query results for reporting.
-
- @property hostname Host name of the DUT.
- @property start_time Start of the requested time interval.
- This field may be `None`.
- @property end_time End of the requested time interval.
- @property _afe Autotest frontend for queries.
- @property _host Database host object for the DUT.
- @property _history A list of jobs and special tasks that
- ran on the DUT in the requested time
- interval, ordered in reverse, from latest
- to earliest.
-
- @property _status_interval A list of all the jobs and special
- tasks that ran on the DUT in the
- last diagnosis interval prior to
- `end_time`, ordered from latest to
- earliest.
- @property _status_diagnosis The DUT's status as of `end_time`.
- @property _status_task The DUT's last status task as of
- `end_time`.
-
- """
-
- @classmethod
- def get_host_history(cls, afe, hostname, start_time, end_time):
- """Create a `HostJobHistory` instance for a single host.
-
- Simple factory method to construct host history from a
- hostname. Simply looks up the host in the AFE database, and
- passes it to the class constructor.
-
- @param afe Autotest frontend
- @param hostname Name of the host.
- @param start_time Start time for the history's time
- interval.
- @param end_time End time for the history's time interval.
-
- @return A new `HostJobHistory` instance.
-
- """
- afehost = afe.get_hosts(hostname=hostname)[0]
- return cls(afe, afehost, start_time, end_time)
-
-
- @classmethod
- def get_multiple_histories(cls, afe, start_time, end_time,
- board=None, pool=None):
- """Create `HostJobHistory` instances for a set of hosts.
-
- The set of hosts can be specified as "all hosts of a given
- board type", "all hosts in a given pool", or "all hosts
- of a given board and pool".
-
- @param afe Autotest frontend
- @param start_time Start time for the history's time
- interval.
- @param end_time End time for the history's time interval.
- @param board All hosts must have this board type; if
- `None`, all boards are allowed.
- @param pool All hosts must be in this pool; if
- `None`, all pools are allowed.
-
- @return A list of new `HostJobHistory` instances.
-
- """
- # If `board` or `pool` are both `None`, we could search the
- # entire database, which is more expensive than we want.
- # Our caller currently won't (can't) do this, but assert to
- # be safe.
- assert board is not None or pool is not None
- labels = []
- if board is not None:
- labels.append(constants.Labels.BOARD_PREFIX + board)
- if pool is not None:
- labels.append(constants.Labels.POOL_PREFIX + pool)
- kwargs = {'multiple_labels': labels}
- hosts = afe.get_hosts(**kwargs)
- return [cls(afe, h, start_time, end_time) for h in hosts]
-
-
- def __init__(self, afe, afehost, start_time, end_time):
- self._afe = afe
- self.hostname = afehost.hostname
- self.end_time = end_time
- self.start_time = start_time
- self._host = afehost
- # Don't spend time on queries until they're needed.
- self._history = None
- self._status_interval = None
- self._status_diagnosis = None
- self._status_task = None
-
-
- def _get_history(self, start_time, end_time):
- """Get the list of events for the given interval."""
- newtasks = _SpecialTaskEvent.get_tasks(
- self._afe, self._host.id, start_time, end_time)
- newhqes = _TestJobEvent.get_hqes(
- self._afe, self._host.id, start_time, end_time)
- newhistory = newtasks + newhqes
- newhistory.sort(reverse=True)
- return newhistory
-
-
- def __iter__(self):
- if self._history is None:
- self._history = self._get_history(self.start_time,
- self.end_time)
- return self._history.__iter__()
-
-
- def _extract_prefixed_label(self, prefix):
- labels = [l for l in self._host.labels
- if l.startswith(prefix)]
- return labels[0][len(prefix) : ] if labels else None
-
-
- @property
- def host(self):
- """Return the AFE host object for this history."""
- return self._host
-
-
- @property
- def host_board(self):
- """Return the board name for this history's DUT."""
- prefix = constants.Labels.BOARD_PREFIX
- return self._extract_prefixed_label(prefix)
-
-
- @property
- def host_pool(self):
- """Return the pool name for this history's DUT."""
- prefix = constants.Labels.POOL_PREFIX
- return self._extract_prefixed_label(prefix)
-
-
- def _init_status_task(self):
- """Fill in `self._status_diagnosis` and `_status_task`."""
- if self._status_diagnosis is not None:
- return
- self._status_task = _SpecialTaskEvent.get_status_task(
- self._afe, self._host.id, self.end_time)
- if self._status_task is not None:
- self._status_diagnosis = self._status_task.diagnosis
- else:
- self._status_diagnosis = UNKNOWN
-
-
- def _init_status_interval(self):
- """Fill in `self._status_interval`."""
- if self._status_interval is not None:
- return
- self._init_status_task()
- self._status_interval = []
- if self._status_task is None:
- return
- query_end = time_utils.epoch_time_to_date_string(self.end_time)
- interval = self._afe.get_host_diagnosis_interval(
- self._host.id, query_end,
- self._status_diagnosis != WORKING)
- if not interval:
- return
- self._status_interval = self._get_history(
- parse_time(interval[0]),
- parse_time(interval[1]))
-
-
- def diagnosis_interval(self):
- """Find this history's most recent diagnosis interval.
-
- Returns a list of `_JobEvent` instances corresponding to the
- most recent diagnosis interval occurring before this
- history's end time.
-
- The list is returned as with `self._history`, ordered from
- most to least recent.
-
- @return The list of the `_JobEvent`s in the diagnosis
- interval.
-
- """
- self._init_status_interval()
- return self._status_interval
-
-
- def last_diagnosis(self):
- """Return the diagnosis of whether the DUT is working.
-
- This searches the DUT's job history, looking for the most
- recent status task for the DUT. Return a tuple of
- `(diagnosis, task)`.
-
- The `diagnosis` entry in the tuple is one of these values:
- * UNUSED - The host's last status task is older than
- `self.start_time`.
- * WORKING - The DUT is working.
- * BROKEN - The DUT likely requires manual intervention.
- * UNKNOWN - No task could be found indicating status for
- the DUT.
-
- If the DUT was working at last check, but hasn't been used
- inside this history's time interval, the status `UNUSED` is
- returned with the last status task, instead of `WORKING`.
-
- The `task` entry in the tuple is the status task that led to
- the diagnosis. The task will be `None` if the diagnosis is
- `UNKNOWN`.
-
- @return A tuple with the DUT's diagnosis and the task that
- determined it.
-
- """
- self._init_status_task()
- diagnosis = self._status_diagnosis
- if (self.start_time is not None and
- self._status_task is not None and
- self._status_task.end_time < self.start_time and
- diagnosis == WORKING):
- diagnosis = UNUSED
- return diagnosis, self._status_task
-
-
-def get_diagnosis_interval(host_id, end_time, success):
- """Return the last diagnosis interval for a given host and time.
-
- This routine queries the database for the special tasks on a
- given host before a given time. From those tasks it selects the
- last status task before a change in status, and the first status
- task after the change. When `success` is true, the change must
- be from "working" to "broken". When false, the search is for a
- change in the opposite direction.
-
- A "successful status task" is any successful special task. A
- "failed status task" is a failed Repair task. These criteria
- are based on the definition of "status task" in the module-level
- docstring, above.
-
- This is the RPC endpoint for `AFE.get_host_diagnosis_interval()`.
-
- @param host_id Database host id of the desired host.
- @param end_time Find the last eligible interval before this time.
- @param success Whether the eligible interval should start with a
- success or a failure.
-
- @return A list containing the start time of the earliest job
- selected, and the end time of the latest job.
-
- """
- base_query = afe_models.SpecialTask.objects.filter(
- host_id=host_id, is_complete=True)
- success_query = base_query.filter(success=True)
- failure_query = base_query.filter(success=False, task='Repair')
- if success:
- query0 = success_query
- query1 = failure_query
- else:
- query0 = failure_query
- query1 = success_query
- query0 = query0.filter(time_finished__lte=end_time)
- query0 = query0.order_by('time_started').reverse()
- if not query0:
- return []
- task0 = query0[0]
- query1 = query1.filter(time_finished__gt=task0.time_finished)
- task1 = query1.order_by('time_started')[0]
- return [task0.time_started.strftime(time_utils.TIME_FMT),
- task1.time_finished.strftime(time_utils.TIME_FMT)]
-
-
-def get_status_task(host_id, end_time):
- """Get the last status task for a host before a given time.
-
- This routine returns a Django query for the AFE database to find
- the last task that finished on the given host before the given
- time that was either a successful task, or a Repair task. The
- query criteria are based on the definition of "status task" in
- the module-level docstring, above.
-
- This is the RPC endpoint for `_SpecialTaskEvent.get_status_task()`.
-
- @param host_id Database host id of the desired host.
- @param end_time End time of the range of interest.
-
- @return A Django query-set selecting the single special task of
- interest.
-
- """
- # Selects status tasks: any Repair task, or any successful task.
- status_tasks = (django_models.Q(task='Repair') |
- django_models.Q(success=True))
- # Our caller needs a Django query set in order to serialize the
- # result, so we don't resolve the query here; we just return a
- # slice with at most one element.
- return afe_models.SpecialTask.objects.filter(
- status_tasks,
- host_id=host_id,
- time_finished__lte=end_time,
- is_complete=True).order_by('time_started').reverse()[0:1]