Blame - crashreport_stats/management/commands/stats.py - tools/hiccup/hiccup-server

blob: 5ac2dea6d18a3b4e2ca78e8c87098c096210aa3d [file] [log] [blame]

Borjan Tchakaloff	b98dba7	2018-03-16 11:04:47 +0400	[diff] [blame]	1	"""Manage Hiccup stats.
				2
				3	This module provides a command to compute statistics of
				4	heartbeats, crashes, and versions sent from Hiccup clients.
				5	"""
				6	import datetime
				7
				8	from django.core.management.base import BaseCommand
				9	from django.db import transaction
				10	from django.db.models import Count, F, Q
				11	from django.db.models.functions import TruncDate
				12	import pytz
				13
				14	from crashreport_stats.models import (
				15	RadioVersion, RadioVersionDaily,
				16	StatsMetadata,
				17	Version, VersionDaily,
				18	)
				19	from crashreports.models import Crashreport, HeartBeat
				20
				21
				22	# pylint: disable=too-few-public-methods
				23	# Classes in this file inherit from each other and are not method containers.
				24
				25
				26	class _ReportCounterFilter():
				27	"""Filter reports matching a report counter requirements.
				28
				29	Attributes:
				30	model (django.db.model): The report model.
				31	name (str): The human-readable report counter name.
				32	field_name (str): The counter name as defined in the stats model where
				33	it is a field.
				34
				35	"""
				36
				37	def __init__(self, model, name, field_name):
				38	"""Initialise the filter.
				39
				40	Args:
				41	model (django.db.model): The report model.
				42	name (str): The human-readable report counter name.
				43	field_name (str): The counter name as defined in the stats model
				44	where it is a field.
				45
				46	"""
				47	self.model = model
				48	self.name = name
				49	self.field_name = field_name
				50
				51	def filter(self, query_objects):
				52	"""Filter the reports.
				53
				54	Args:
				55	query_objects (QuerySet): The reports to filter.
				56	Returns:
				57	QuerySet: The reports matching this report counter requirements.
				58
				59	"""
				60	# pylint: disable=no-self-use
				61	# self is potentially used by subclasses.
				62	return query_objects
				63
				64
				65	class HeartBeatCounterFilter(_ReportCounterFilter):
				66	"""The heartbeats counter filter."""
				67
				68	def __init__(self):
				69	"""Initialise the filter."""
				70	super(HeartBeatCounterFilter, self).__init__(
				71	model=HeartBeat, name='heartbeats', field_name='heartbeats')
				72
				73
				74	class CrashreportCounterFilter(_ReportCounterFilter):
				75	"""The crashreports counter filter.
				76
				77	Attributes:
				78	include_boot_reasons (list(str)): The boot reasons assumed to
				79	characterise this crashreport ("OR"ed).
				80	exclude_boot_reasons (list(str)): The boot reasons assumed to not
				81	characterise this crashreport ("AND"ed).
				82	inclusive_filter (Q): The boot reasons filter for filtering reports
				83	that should be included.
				84	exclusive_filter (Q): The boot reasons filter for filtering reports
				85	that should not be included.
				86
				87	"""
				88
				89	def __init__(
				90	self, name, field_name, include_boot_reasons=None,
				91	exclude_boot_reasons=None):
				92	"""Initialise the filter.
				93
				94	One or both of `include_boot_reasons` and `exclude_boot_reasons` must
				95	be specified.
				96
				97	Args:
				98	name (str): The human-readable report counter name.
				99	field_name (str):
				100	The counter name as defined in the stats model where it is a
				101	field.
				102	include_boot_reasons (list(str), optional):
				103	The boot reasons assumed to characterise this crashreport
				104	("OR"ed).
				105	exclude_boot_reasons (list(str), optional):
				106	The boot reasons assumed to not characterise this
				107	crashreport ("AND"ed).
				108	Raises:
				109	ValueError:
				110	None of `include_boot_reasons` and `exclude_boot_reasons` have
				111	been supplied.
				112
				113	"""
				114	if not include_boot_reasons and not exclude_boot_reasons:
				115	raise ValueError(
				116	'One or both of `include_boot_reasons` and '
				117	'`exclude_boot_reasons` must be specified.')
				118
				119	super(CrashreportCounterFilter, self).__init__(
				120	model=Crashreport, name=name, field_name=field_name)
				121
				122	# Cache the boot reasons inclusive filter
				123	self.include_boot_reasons = include_boot_reasons
				124	self.inclusive_filter = self._create_query_filter(include_boot_reasons)
				125
				126	# Cache the boot reasons exclusive filter
				127	self.exclude_boot_reasons = exclude_boot_reasons
				128	self.exclusive_filter = self._create_query_filter(exclude_boot_reasons)
				129
				130	@staticmethod
				131	def _create_query_filter(reasons):
				132	"""Combine boot reasons into one filter.
				133
				134	Args:
				135	reasons (list(str)): List of boot reasons to include in filter.
				136	Returns:
				137	django.db.models.query_utils.Q: Query that matches either of
				138	reasons as boot_reason if list is not empty, otherwise None.
				139
				140	"""
				141	if not reasons:
				142	return None
				143
				144	query = Q(boot_reason=reasons[0])
				145	for reason in reasons[1:]:
				146	query = query \| Q(boot_reason=reason)
				147	return query
				148
				149	def filter(self, query_objects):
				150	"""Filter the reports according to the inclusive and exclusive fitlers.
				151
				152	Args:
				153	query_objects (QuerySet): The reports to filter.
				154	Returns:
				155	QuerySet: The reports matching this report counter requirements.
				156
				157	"""
				158	if self.inclusive_filter:
				159	query_objects = query_objects.filter(self.inclusive_filter)
				160	if self.exclusive_filter:
				161	query_objects = query_objects.exclude(self.exclusive_filter)
				162
				163	return query_objects
				164
				165
				166	class _StatsModelsEngine():
				167	"""Stats models engine.
				168
				169	An engine to update general stats (_VersionStats) and their daily
				170	counterparts (_DailyVersionStats).
				171	"""
				172
				173	def __init__(self, stats_model, daily_stats_model, version_field_name):
				174	"""Initialise the engine.
				175
				176	Args:
				177	stats_model (_VersionStats): The _VersionStats model to update
				178	stats for.
				179	daily_stats_model (_DailyVersionStats): The _DailyVersionStats
				180	model to update stats for.
				181	version_field_name (str): The version field name as specified in
				182	the stats models.
				183
				184	"""
				185	self.stats_model = stats_model
				186	self.daily_stats_model = daily_stats_model
				187	self.version_field_name = version_field_name
				188
				189	def _valid_objects(self, query_objects):
				190	"""Filter out invalid reports.
				191
				192	Returns:
				193	QuerySet: All the valid reports.
				194
				195	"""
				196	# pylint: disable=no-self-use
				197	# self is potentially used by subclasses.
				198	return query_objects
				199
				200	def _objects_within_period(self, query_objects, up_to, starting_from=None):
				201	"""Retrieve the reports within a specific period of time.
				202
				203	The objects are filtered considering a specific period of time to allow
				204	for comparable results between subclasses. The lower bound should be
				205	omitted for the first update but always set for later calls. The upper
				206	bound must be specified to avoid race conditions.
				207
				208	Args:
				209	query_objects (QuerySet): The reports to filter.
				210	up_to (datetime): The maximum timestamp to consider (inclusive).
				211	starting_from (datetime, optional): The minimum timestamp to
				212	consider (exclusive).
				213	Returns:
				214	QuerySet: The reports received within a specific period of time.
				215
				216	"""
				217	# pylint: disable=no-self-use
				218	# self might be used by subclasses.
				219	query_objects = query_objects.filter(created_at__lte=up_to)
				220	if starting_from:
				221	query_objects = query_objects.filter(created_at__gt=starting_from)
				222
				223	return query_objects
				224
				225	def _unique_objects_per_day(self, query_objects):
				226	"""Count the unique reports per version per day.
				227
				228	Args:
				229	query_objects (QuerySet): The reports to count.
				230	Returns:
				231	QuerySet: The unique reports grouped per version per day.
				232
				233	"""
				234	return (
				235	query_objects
				236	.annotate(_report_day=TruncDate('date'))
				237	.values(self.version_field_name, '_report_day')
				238	# FIXME Agressively drop duplicates
				239	.annotate(count=Count('date', distinct=True)))
				240
				241	def delete_stats(self):
				242	"""Delete the general and daily stats instances.
				243
				244	Returns:
				245	dict(str, int): The count of deleted entries per model name.
				246
				247	"""
				248	# Clear the general stats, the daily stats will be deleted by cascading
				249	# effect
				250	_, count_per_model = self.stats_model.objects.all().delete()
				251	return count_per_model
				252
				253	def update_stats(self, report_counter, up_to, starting_from=None):
				254	"""Update the statistics of the general and daily stats entries.
				255
				256	The algorithm works as follow:
				257	1. The reports are filtered considering a specific period of time to
				258	allow for comparable results between subclasses. The lower bound
				259	should be omitted for the first update but always set for later
				260	calls. The upper bound must be specified to avoid race conditions.
				261	2. The report counter requirements are applied to the reports.
				262	3. The reports are grouped per day and per version, a counter is
				263	generated.
				264	4. Each report group count is used to update specific daily stats,
				265	while the sum of them per version updates the general stats.
				266
				267	Args:
				268	report_counter (_ReportCounterEngine): The report counter to
				269	update the stats with.
				270	up_to (datetime): The maximum timestamp to consider (inclusive).
				271	starting_from (datetime, optional): The minimum timestamp to
				272	consider (exclusive).
				273	Returns:
				274	dict(str, dict(str, int)): The number of added entries and the
				275	number of updated entries bundled in a dict, respectively
				276	hashed with the keys 'created' and 'updated', per model name.
				277
				278	"""
				279	counts_per_model = {
				280	self.stats_model: {'created': 0, 'updated': 0},
				281	self.daily_stats_model: {'created': 0, 'updated': 0}}
				282
				283	query_objects = self._valid_objects(report_counter.model.objects.all())
				284	# Only include reports from the interesting period of time
				285	query_objects = self._objects_within_period(
				286	query_objects, up_to, starting_from)
				287	# Apply the report counter requirements
				288	query_objects = report_counter.filter(query_objects)
				289	# Chain our own filters
				290	query_objects = self._unique_objects_per_day(query_objects)
				291
				292	# Explicitly use the iterator() method to avoid caching as we will
				293	# not re-use the QuerySet
				294	for query_object in query_objects.iterator():
				295	report_day = query_object['_report_day']
				296	# Use a dict to be able to dereference the field name
				297	stats, created = self.stats_model.objects.get_or_create(**{
				298	self.version_field_name: query_object[self.version_field_name],
				299	'defaults': {
				300	'first_seen_on': report_day,
				301	'released_on': report_day,
				302	}
				303	})
				304	counts_per_model[self.stats_model][(
				305	'created' if created else 'updated')] += 1
				306
				307	# Reports are coming in an unordered manner, a late report can
				308	# be older (device time wise). Make sure that the current reports
				309	# creation date is taken into account in the version history.
				310	if not created and stats.first_seen_on > report_day:
				311	# Avoid changing the released_on field if it is different than
				312	# the default value (i.e. equals to the value of first_seen_on)
				313	# since it indicates that it was manually changed.
				314	if stats.released_on == stats.first_seen_on:
				315	stats.released_on = report_day
				316	stats.first_seen_on = report_day
				317
				318	daily_stats, created = (
				319	self.daily_stats_model.objects.get_or_create(
				320	version=stats, date=report_day))
				321	counts_per_model[self.daily_stats_model][(
				322	'created' if created else 'updated')] += 1
				323
				324	setattr(
				325	stats, report_counter.field_name,
				326	F(report_counter.field_name) + query_object['count'])
				327	setattr(
				328	daily_stats, report_counter.field_name,
				329	F(report_counter.field_name) + query_object['count'])
				330
				331	stats.save()
				332	daily_stats.save()
				333
				334	return counts_per_model
				335
				336
				337	class VersionStatsEngine(_StatsModelsEngine):
				338	"""Version stats engine.
				339
				340	An engine to update a counter of general stats (Version) and their daily
				341	counterparts (VersionDaily).
				342	"""
				343
				344	def __init__(self):
				345	"""Initialise the engine."""
				346	super(VersionStatsEngine, self).__init__(
				347	stats_model=Version, daily_stats_model=VersionDaily,
				348	version_field_name='build_fingerprint')
				349
				350
				351	class RadioVersionStatsEngine(_StatsModelsEngine):
				352	"""Radio version stats engine.
				353
				354	An engine to update a counter of general stats (RadioVersion) and their
				355	daily counterparts (RadioVersionDaily).
				356	"""
				357
				358	def __init__(self):
				359	"""Initialise the engine."""
				360	super(RadioVersionStatsEngine, self).__init__(
				361	stats_model=RadioVersion, daily_stats_model=RadioVersionDaily,
				362	version_field_name='radio_version')
				363
				364	def _valid_objects(self, query_objects):
				365	# For legacy reasons, the version field might be null
				366	return query_objects.filter(radio_version__isnull=False)
				367
				368
				369	class Command(BaseCommand):
				370	"""Management command to compute Hiccup statistics."""
				371
				372	_STATS_MODELS_ENGINES = [
				373	VersionStatsEngine(),
				374	RadioVersionStatsEngine(),
				375	]
				376
				377	# All the report counters that are listed in the stats models
				378	_REPORT_COUNTER_FILTERS = [
				379	HeartBeatCounterFilter(),
				380	CrashreportCounterFilter(
				381	name='crashes', field_name='prob_crashes',
				382	include_boot_reasons=Crashreport.CRASH_BOOT_REASONS),
				383	CrashreportCounterFilter(
				384	name='smpl', field_name='smpl',
				385	include_boot_reasons=Crashreport.SMPL_BOOT_REASONS),
				386	CrashreportCounterFilter(
				387	name='other', field_name='other',
				388	exclude_boot_reasons=(
				389	Crashreport.SMPL_BOOT_REASONS
				390	+ Crashreport.CRASH_BOOT_REASONS)),
				391	]
				392
				393	help = __doc__
				394
				395	def add_arguments(self, parser):
				396	"""Add custom arguments to the command."""
				397	parser.add_argument('action', choices=['reset', 'update'])
				398
				399	def handle(self, args, *options):
				400	"""Carry out the command executive logic."""
				401	# pylint: disable=attribute-defined-outside-init
				402	# self.debug is only ever read through calls of handle().
				403	self.debug = int(options['verbosity']) >= 2
				404
				405	if options['action'] == 'reset':
				406	self.delete_all_stats()
				407	self.update_all_stats()
				408	elif options['action'] == 'update':
				409	self.update_all_stats()
				410
				411	def _success(self, msg, args, *kwargs):
				412	# pylint: disable=no-member
				413	# Members of Style are generated and cannot be statically inferred.
				414	self.stdout.write(self.style.SUCCESS(msg), args, *kwargs)
				415
				416	def delete_all_stats(self):
				417	"""Delete the statistics from all stats models."""
				418	with transaction.atomic():
				419	for engine in self._STATS_MODELS_ENGINES:
				420	counts_per_model = engine.delete_stats()
				421	if self.debug:
				422	# Default the count of deleted models to 0 if missing
				423	if not counts_per_model:
				424	counts_per_model = {
Franz-Xaver Geiger	cc1e04d	2018-08-07 11:51:51 +0200	[diff] [blame^]	425	engine.stats_model._meta.label: 0,
				426	engine.daily_stats_model._meta.label: 0}
Borjan Tchakaloff	b98dba7	2018-03-16 11:04:47 +0400	[diff] [blame]	427	for model, count in counts_per_model.items():
Franz-Xaver Geiger	cc1e04d	2018-08-07 11:51:51 +0200	[diff] [blame^]	428	name = model.split('.')[-1]
Borjan Tchakaloff	b98dba7	2018-03-16 11:04:47 +0400	[diff] [blame]	429	self._success(
Franz-Xaver Geiger	cc1e04d	2018-08-07 11:51:51 +0200	[diff] [blame^]	430	'{} {} deleted'.format(count, name))
Borjan Tchakaloff	b98dba7	2018-03-16 11:04:47 +0400	[diff] [blame]	431
				432	# Reset the metadata
				433	count, _ = StatsMetadata.objects.all().delete()
				434	if self.debug:
				435	self._success(
				436	'{} StatsMetadata deleted'.format(count))
				437
				438	def update_all_stats(self):
				439	"""Update the statistics from all stats models."""
				440	try:
				441	previous_update = StatsMetadata.objects.latest('updated_at')
				442	starting_from = previous_update.updated_at
				443	except StatsMetadata.DoesNotExist:
				444	starting_from = None
				445	# Fix the upper limit to avoid race conditions with new reports sent
				446	# while we are updating the different statistics
				447	up_to = datetime.datetime.now(tz=pytz.utc)
				448
				449	for engine in self._STATS_MODELS_ENGINES:
				450	with transaction.atomic():
				451	for filter_ in self._REPORT_COUNTER_FILTERS:
				452	counts_per_model = engine.update_stats(
				453	filter_, up_to, starting_from)
				454	if self.debug:
				455	for model, counts in counts_per_model.items():
				456	for action, count in counts.items():
				457	msg = '{} {} {} for counter {}'.format(
				458	count, model.__name__, action,
				459	filter_.name)
				460	self._success(msg)
				461
				462	StatsMetadata(updated_at=up_to).save()