[autotest] Drop remaining references to autotest_stats
We are neither looking at the graphite dashboard anymore, not
maintaining it really. OTOH, the autotest_stats module costs ~.25
seconds on module load time, hitting us in places where it matters.
So, drop all uses, also removing the false sense of security.
BUG=chromium:739466
TEST=unittests.
Change-Id: I0e4118653d929a5d55f4ff62b96e9c66e855efd7
Reviewed-on: https://chromium-review.googlesource.com/559832
Commit-Ready: Prathmesh Prabhu <pprabhu@chromium.org>
Tested-by: Prathmesh Prabhu <pprabhu@chromium.org>
Reviewed-by: Aviv Keshet <akeshet@chromium.org>
diff --git a/frontend/afe/json_rpc/serviceHandler.py b/frontend/afe/json_rpc/serviceHandler.py
index 577b6da..2a0fd85 100644
--- a/frontend/afe/json_rpc/serviceHandler.py
+++ b/frontend/afe/json_rpc/serviceHandler.py
@@ -32,12 +32,6 @@
from json import encoder
json_encoder = encoder.JSONEncoder()
-# TODO(akeshet): Eliminate this and replace with monarch metrics. (At the
-# moment, I don't think we can just easily swap out, because this module is
-# called by apache for rpc handling, and we don't have a ts_mon thread for that
-# yet).
-from autotest_lib.client.common_lib.cros.graphite import autotest_stats
-
json_decoder = decoder.JSONDecoder()
@@ -107,22 +101,15 @@
except KeyError:
raise BadServiceRequest(request)
- autotest_stats.Counter('rpc').increment(methName)
-
metadata = request.copy()
metadata['_type'] = 'rpc'
metadata['rpc_server'] = socket.gethostname()
- timer = autotest_stats.Timer('rpc', metadata=metadata)
-
try:
- timer.start()
meth = self.findServiceEndpoint(methName)
results['result'] = self.invokeServiceEndpoint(meth, args)
except Exception, err:
results['err_traceback'] = traceback.format_exc()
results['err'] = err
- finally:
- timer.stop(methName)
return results
diff --git a/frontend/afe/model_logic.py b/frontend/afe/model_logic.py
index 5fe544b..3bad06b 100644
--- a/frontend/afe/model_logic.py
+++ b/frontend/afe/model_logic.py
@@ -9,9 +9,6 @@
from django.db import transaction
from django.db.models.sql import query
import django.db.models.sql.where
-# TODO(akeshet): Replace with monarch stats once we know how to instrument rpc
-# handling with ts_mon.
-from autotest_lib.client.common_lib.cros.graphite import autotest_stats
from autotest_lib.frontend.afe import rdb_model_extensions
@@ -932,22 +929,18 @@
@returns: Dictionary representation of the object.
"""
serialized = {}
- timer = autotest_stats.Timer('serialize_latency.%s' % (
- type(self).__name__))
- with timer.get_client('local'):
- for field in self._meta.concrete_model._meta.local_fields:
- if field.rel is None:
- serialized[field.name] = field._get_val_from_obj(self)
- elif field.name in self.SERIALIZATION_LINKS_TO_KEEP:
- # attname will contain "_id" suffix for foreign keys,
- # e.g. HostAttribute.host will be serialized as 'host_id'.
- # Use it for easy deserialization.
- serialized[field.attname] = field._get_val_from_obj(self)
+ for field in self._meta.concrete_model._meta.local_fields:
+ if field.rel is None:
+ serialized[field.name] = field._get_val_from_obj(self)
+ elif field.name in self.SERIALIZATION_LINKS_TO_KEEP:
+ # attname will contain "_id" suffix for foreign keys,
+ # e.g. HostAttribute.host will be serialized as 'host_id'.
+ # Use it for easy deserialization.
+ serialized[field.attname] = field._get_val_from_obj(self)
if include_dependencies:
- with timer.get_client('related'):
- for link in self.SERIALIZATION_LINKS_TO_FOLLOW:
- serialized[link] = self._serialize_relation(link)
+ for link in self.SERIALIZATION_LINKS_TO_FOLLOW:
+ serialized[link] = self._serialize_relation(link)
return serialized
@@ -1117,12 +1110,8 @@
except cls.DoesNotExist:
instance = cls()
- timer = autotest_stats.Timer('deserialize_latency.%s' % (
- type(instance).__name__))
- with timer.get_client('local'):
- instance._deserialize_local(local)
- with timer.get_client('related'):
- instance._deserialize_relations(related)
+ instance._deserialize_local(local)
+ instance._deserialize_relations(related)
return instance
diff --git a/frontend/afe/rpc_interface.py b/frontend/afe/rpc_interface.py
index c186c39..25910b7 100644
--- a/frontend/afe/rpc_interface.py
+++ b/frontend/afe/rpc_interface.py
@@ -43,9 +43,6 @@
from django.db.utils import DatabaseError
import common
-# TODO(akeshet): Replace with monarch stats once we know how to instrument rpc
-# server with ts_mon.
-from autotest_lib.client.common_lib.cros.graphite import autotest_stats
from autotest_lib.client.common_lib import control_data
from autotest_lib.client.common_lib import error
from autotest_lib.client.common_lib import global_config
@@ -1655,14 +1652,9 @@
"""
getter = control_file_getter.DevServerGetter.create(build, ds)
devserver_name = ds.hostname
- timer = autotest_stats.Timer('control_files.parse.%s.%s' %
- (devserver_name.replace('.', '_'),
- suite_name.rsplit('.')[-1]))
# Get the control file for the suite.
try:
- with timer:
- control_file_in = getter.get_control_file_contents_by_name(
- suite_name)
+ control_file_in = getter.get_control_file_contents_by_name(suite_name)
except error.CrosDynamicSuiteException as e:
raise type(e)('Failed to get control file for %s '
'(devserver: %s) (error: %s)' %
@@ -1713,11 +1705,8 @@
ds = dev_server.resolve(build, hostname=hostname)
ds_name = ds.hostname
timings[constants.DOWNLOAD_STARTED_TIME] = formatted_now()
- timer = autotest_stats.Timer('control_files.stage.%s' % (
- ds_name.replace('.', '_')))
try:
- with timer:
- ds.stage_artifacts(image=build, artifacts=['test_suites'])
+ ds.stage_artifacts(image=build, artifacts=['test_suites'])
except dev_server.DevServerException as e:
raise error.StageControlFileFailure(
"Failed to stage %s on %s: %s" % (build, ds_name, e))
@@ -2003,26 +1992,24 @@
# A NOT IN query with 5000 ids took about 30ms in tests made.
# These numbers seem low enough to outweigh the disadvantages of the
# solutions described above.
- timer = autotest_stats.Timer('shard_heartbeat')
- with timer:
- shard_obj = rpc_utils.retrieve_shard(shard_hostname=shard_hostname)
- rpc_utils.persist_records_sent_from_shard(shard_obj, jobs, hqes)
- assert len(known_host_ids) == len(known_host_statuses)
- for i in range(len(known_host_ids)):
- host_model = models.Host.objects.get(pk=known_host_ids[i])
- if host_model.status != known_host_statuses[i]:
- host_model.status = known_host_statuses[i]
- host_model.save()
+ shard_obj = rpc_utils.retrieve_shard(shard_hostname=shard_hostname)
+ rpc_utils.persist_records_sent_from_shard(shard_obj, jobs, hqes)
+ assert len(known_host_ids) == len(known_host_statuses)
+ for i in range(len(known_host_ids)):
+ host_model = models.Host.objects.get(pk=known_host_ids[i])
+ if host_model.status != known_host_statuses[i]:
+ host_model.status = known_host_statuses[i]
+ host_model.save()
- hosts, jobs, suite_keyvals, inc_ids = rpc_utils.find_records_for_shard(
- shard_obj, known_job_ids=known_job_ids,
- known_host_ids=known_host_ids)
- return {
- 'hosts': [host.serialize() for host in hosts],
- 'jobs': [job.serialize() for job in jobs],
- 'suite_keyvals': [kv.serialize() for kv in suite_keyvals],
- 'incorrect_host_ids': [int(i) for i in inc_ids],
- }
+ hosts, jobs, suite_keyvals, inc_ids = rpc_utils.find_records_for_shard(
+ shard_obj, known_job_ids=known_job_ids,
+ known_host_ids=known_host_ids)
+ return {
+ 'hosts': [host.serialize() for host in hosts],
+ 'jobs': [job.serialize() for job in jobs],
+ 'suite_keyvals': [kv.serialize() for kv in suite_keyvals],
+ 'incorrect_host_ids': [int(i) for i in inc_ids],
+ }
def get_shards(**filter_data):
diff --git a/frontend/afe/rpc_utils.py b/frontend/afe/rpc_utils.py
index 576cbf9..1d7cbf7 100644
--- a/frontend/afe/rpc_utils.py
+++ b/frontend/afe/rpc_utils.py
@@ -21,9 +21,6 @@
from autotest_lib.client.common_lib import global_config
from autotest_lib.client.common_lib import time_utils
from autotest_lib.client.common_lib.cros import dev_server
-# TODO(akeshet): Replace with monarch once we know how to instrument rpc server
-# with ts_mon.
-from autotest_lib.client.common_lib.cros.graphite import autotest_stats
from autotest_lib.server import utils as server_utils
from autotest_lib.server.cros import provision
from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
@@ -927,9 +924,7 @@
@returns: Shard object
"""
- timer = autotest_stats.Timer('shard_heartbeat.retrieve_shard')
- with timer:
- return models.Shard.smart_get(shard_hostname)
+ return models.Shard.smart_get(shard_hostname)
def find_records_for_shard(shard, known_job_ids, known_host_ids):
@@ -942,16 +937,12 @@
@returns: Tuple of lists:
(hosts, jobs, suite_job_keyvals, invalid_host_ids)
"""
- timer = autotest_stats.Timer('shard_heartbeat')
- with timer.get_client('find_hosts'):
- hosts, invalid_host_ids = models.Host.assign_to_shard(
- shard, known_host_ids)
- with timer.get_client('find_jobs'):
- jobs = models.Job.assign_to_shard(shard, known_job_ids)
- with timer.get_client('find_suite_job_keyvals'):
- parent_job_ids = [job.parent_job_id for job in jobs]
- suite_job_keyvals = models.JobKeyval.objects.filter(
- job_id__in=parent_job_ids)
+ hosts, invalid_host_ids = models.Host.assign_to_shard(
+ shard, known_host_ids)
+ jobs = models.Job.assign_to_shard(shard, known_job_ids)
+ parent_job_ids = [job.parent_job_id for job in jobs]
+ suite_job_keyvals = models.JobKeyval.objects.filter(
+ job_id__in=parent_job_ids)
return hosts, jobs, suite_job_keyvals, invalid_host_ids
@@ -1015,15 +1006,11 @@
@raises error.UnallowedRecordsSentToMaster if any of the sanity checks fail.
"""
- timer = autotest_stats.Timer('shard_heartbeat')
- with timer.get_client('persist_jobs'):
- job_ids_persisted = _persist_records_with_type_sent_from_shard(
- shard, jobs, models.Job)
-
- with timer.get_client('persist_hqes'):
- _persist_records_with_type_sent_from_shard(
- shard, hqes, models.HostQueueEntry,
- job_ids_sent=job_ids_persisted)
+ job_ids_persisted = _persist_records_with_type_sent_from_shard(
+ shard, jobs, models.Job)
+ _persist_records_with_type_sent_from_shard(
+ shard, hqes, models.HostQueueEntry,
+ job_ids_sent=job_ids_persisted)
def forward_single_host_rpc_to_shard(func):
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 13507e3..a6016d8 100755
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py
@@ -869,15 +869,6 @@
metrics.Counter(
'chromeos/autotest/scheduler/scheduled_jobs_with_hosts'
).increment_by(new_jobs_with_hosts)
- # TODO(pprabhu): Decide what to do about this metric. Million dollar
- # question: What happens to jobs that were not matched. Do they stay in
- # the queue, and get processed right here in the next tick (then we want
- # a guage corresponding to the number of outstanding unmatched host
- # jobs), or are they handled somewhere else (then we need a counter
- # corresponding to failed_to_match_with_hosts jobs).
- #autotest_stats.Gauge(key).send('new_jobs_without_hosts',
- # new_jobs_need_hosts -
- # new_jobs_with_hosts)
@_calls_log_tick_msg
diff --git a/tko/db.py b/tko/db.py
index 1e23856..e331133 100644
--- a/tko/db.py
+++ b/tko/db.py
@@ -11,7 +11,6 @@
import common
from autotest_lib.client.common_lib import global_config
-from autotest_lib.client.common_lib.cros.graphite import autotest_stats
from autotest_lib.frontend import database_settings_helper
from autotest_lib.tko import utils
@@ -131,13 +130,9 @@
self.con.close()
self.con = None
- try:
- # create the db connection and cursor
- self.con = self.connect(self.host, self.database,
- self.user, self.password, self.port)
- except:
- autotest_stats.Counter('tko_db_con_error').increment()
- raise
+ # create the db connection and cursor
+ self.con = self.connect(self.host, self.database,
+ self.user, self.password, self.port)
self.cur = self.con.cursor()
@@ -178,7 +173,6 @@
try:
self._random_delay()
self._init_db()
- autotest_stats.Counter('tko_db_error').increment()
except OperationalError, e:
_log_error('%s; panic now'
% _format_operational_error(e))