[autotest] Breakdown shard_heartbeat RPC latency
The current latency of shard_heartbeat RPC on master is 1 - 2.5 mins.
The RPC is composed of a series of DB accesses.
The latency is slower than expected. We need to figure out what DB
query takes most of the time.
BUG=chromium:489357
TEST=Puppylab. Make the master send stats to testing statsd server.
Change-Id: Ibe45d0234e41b48eccb6abcf978c739dd45eb64a
Reviewed-on: https://chromium-review.googlesource.com/271796
Tested-by: Mungyung Ryu <mkryu@google.com>
Reviewed-by: Fang Deng <fdeng@chromium.org>
Commit-Queue: Mungyung Ryu <mkryu@google.com>
diff --git a/frontend/afe/rpc_utils.py b/frontend/afe/rpc_utils.py
index 389e494..4efc71c 100644
--- a/frontend/afe/rpc_utils.py
+++ b/frontend/afe/rpc_utils.py
@@ -15,6 +15,7 @@
from autotest_lib.client.common_lib import control_data, error
from autotest_lib.client.common_lib import global_config, priorities
from autotest_lib.client.common_lib import time_utils
+from autotest_lib.client.common_lib.cros.graphite import autotest_stats
from autotest_lib.server.cros import provision
from autotest_lib.server import frontend
from autotest_lib.server import utils as server_utils
@@ -1043,7 +1044,9 @@
@returns: Shard object
"""
- return models.Shard.smart_get(shard_hostname)
+ timer = autotest_stats.Timer('shard_heartbeat.retrieve_shard')
+ with timer:
+ return models.Shard.smart_get(shard_hostname)
def find_records_for_shard(shard, known_job_ids, known_host_ids):
@@ -1056,11 +1059,15 @@
@returns: Tuple of three lists for hosts, jobs, and suite job keyvals:
(hosts, jobs, suite_job_keyvals).
"""
- hosts = models.Host.assign_to_shard(shard, known_host_ids)
- jobs = models.Job.assign_to_shard(shard, known_job_ids)
- parent_job_ids = [job.parent_job_id for job in jobs]
- suite_job_keyvals = models.JobKeyval.objects.filter(
- job_id__in=parent_job_ids)
+ timer = autotest_stats.Timer('shard_heartbeat')
+ with timer.get_client('find_hosts'):
+ hosts = models.Host.assign_to_shard(shard, known_host_ids)
+ with timer.get_client('find_jobs'):
+ jobs = models.Job.assign_to_shard(shard, known_job_ids)
+ with timer.get_client('find_suite_job_keyvals'):
+ parent_job_ids = [job.parent_job_id for job in jobs]
+ suite_job_keyvals = models.JobKeyval.objects.filter(
+ job_id__in=parent_job_ids)
return hosts, jobs, suite_job_keyvals
@@ -1118,11 +1125,14 @@
@raises error.UnallowedRecordsSentToMaster if any of the sanity checks fail.
"""
- job_ids_sent = _persist_records_with_type_sent_from_shard(
- shard, jobs, models.Job)
+ timer = autotest_stats.Timer('shard_heartbeat')
+ with timer.get_client('persist_jobs'):
+ job_ids_sent = _persist_records_with_type_sent_from_shard(
+ shard, jobs, models.Job)
- _persist_records_with_type_sent_from_shard(
- shard, hqes, models.HostQueueEntry, job_ids_sent=job_ids_sent)
+ with timer.get_client('persist_hqes'):
+ _persist_records_with_type_sent_from_shard(
+ shard, hqes, models.HostQueueEntry, job_ids_sent=job_ids_sent)
def forward_single_host_rpc_to_shard(func):