blob: 5ac2dea6d18a3b4e2ca78e8c87098c096210aa3d [file] [log] [blame]
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +04001"""Manage Hiccup stats.
2
3This module provides a command to compute statistics of
4heartbeats, crashes, and versions sent from Hiccup clients.
5"""
6import datetime
7
8from django.core.management.base import BaseCommand
9from django.db import transaction
10from django.db.models import Count, F, Q
11from django.db.models.functions import TruncDate
12import pytz
13
14from crashreport_stats.models import (
15 RadioVersion, RadioVersionDaily,
16 StatsMetadata,
17 Version, VersionDaily,
18)
19from crashreports.models import Crashreport, HeartBeat
20
21
22# pylint: disable=too-few-public-methods
23# Classes in this file inherit from each other and are not method containers.
24
25
26class _ReportCounterFilter():
27 """Filter reports matching a report counter requirements.
28
29 Attributes:
30 model (django.db.model): The report model.
31 name (str): The human-readable report counter name.
32 field_name (str): The counter name as defined in the stats model where
33 it is a field.
34
35 """
36
37 def __init__(self, model, name, field_name):
38 """Initialise the filter.
39
40 Args:
41 model (django.db.model): The report model.
42 name (str): The human-readable report counter name.
43 field_name (str): The counter name as defined in the stats model
44 where it is a field.
45
46 """
47 self.model = model
48 self.name = name
49 self.field_name = field_name
50
51 def filter(self, query_objects):
52 """Filter the reports.
53
54 Args:
55 query_objects (QuerySet): The reports to filter.
56 Returns:
57 QuerySet: The reports matching this report counter requirements.
58
59 """
60 # pylint: disable=no-self-use
61 # self is potentially used by subclasses.
62 return query_objects
63
64
65class HeartBeatCounterFilter(_ReportCounterFilter):
66 """The heartbeats counter filter."""
67
68 def __init__(self):
69 """Initialise the filter."""
70 super(HeartBeatCounterFilter, self).__init__(
71 model=HeartBeat, name='heartbeats', field_name='heartbeats')
72
73
74class CrashreportCounterFilter(_ReportCounterFilter):
75 """The crashreports counter filter.
76
77 Attributes:
78 include_boot_reasons (list(str)): The boot reasons assumed to
79 characterise this crashreport ("OR"ed).
80 exclude_boot_reasons (list(str)): The boot reasons assumed to *not*
81 characterise this crashreport ("AND"ed).
82 inclusive_filter (Q): The boot reasons filter for filtering reports
83 that should be included.
84 exclusive_filter (Q): The boot reasons filter for filtering reports
85 that should *not* be included.
86
87 """
88
89 def __init__(
90 self, name, field_name, include_boot_reasons=None,
91 exclude_boot_reasons=None):
92 """Initialise the filter.
93
94 One or both of `include_boot_reasons` and `exclude_boot_reasons` must
95 be specified.
96
97 Args:
98 name (str): The human-readable report counter name.
99 field_name (str):
100 The counter name as defined in the stats model where it is a
101 field.
102 include_boot_reasons (list(str), optional):
103 The boot reasons assumed to characterise this crashreport
104 ("OR"ed).
105 exclude_boot_reasons (list(str), optional):
106 The boot reasons assumed to *not* characterise this
107 crashreport ("AND"ed).
108 Raises:
109 ValueError:
110 None of `include_boot_reasons` and `exclude_boot_reasons` have
111 been supplied.
112
113 """
114 if not include_boot_reasons and not exclude_boot_reasons:
115 raise ValueError(
116 'One or both of `include_boot_reasons` and '
117 '`exclude_boot_reasons` must be specified.')
118
119 super(CrashreportCounterFilter, self).__init__(
120 model=Crashreport, name=name, field_name=field_name)
121
122 # Cache the boot reasons inclusive filter
123 self.include_boot_reasons = include_boot_reasons
124 self.inclusive_filter = self._create_query_filter(include_boot_reasons)
125
126 # Cache the boot reasons exclusive filter
127 self.exclude_boot_reasons = exclude_boot_reasons
128 self.exclusive_filter = self._create_query_filter(exclude_boot_reasons)
129
130 @staticmethod
131 def _create_query_filter(reasons):
132 """Combine boot reasons into one filter.
133
134 Args:
135 reasons (list(str)): List of boot reasons to include in filter.
136 Returns:
137 django.db.models.query_utils.Q: Query that matches either of
138 reasons as boot_reason if list is not empty, otherwise None.
139
140 """
141 if not reasons:
142 return None
143
144 query = Q(boot_reason=reasons[0])
145 for reason in reasons[1:]:
146 query = query | Q(boot_reason=reason)
147 return query
148
149 def filter(self, query_objects):
150 """Filter the reports according to the inclusive and exclusive fitlers.
151
152 Args:
153 query_objects (QuerySet): The reports to filter.
154 Returns:
155 QuerySet: The reports matching this report counter requirements.
156
157 """
158 if self.inclusive_filter:
159 query_objects = query_objects.filter(self.inclusive_filter)
160 if self.exclusive_filter:
161 query_objects = query_objects.exclude(self.exclusive_filter)
162
163 return query_objects
164
165
166class _StatsModelsEngine():
167 """Stats models engine.
168
169 An engine to update general stats (_VersionStats) and their daily
170 counterparts (_DailyVersionStats).
171 """
172
173 def __init__(self, stats_model, daily_stats_model, version_field_name):
174 """Initialise the engine.
175
176 Args:
177 stats_model (_VersionStats): The _VersionStats model to update
178 stats for.
179 daily_stats_model (_DailyVersionStats): The _DailyVersionStats
180 model to update stats for.
181 version_field_name (str): The version field name as specified in
182 the stats models.
183
184 """
185 self.stats_model = stats_model
186 self.daily_stats_model = daily_stats_model
187 self.version_field_name = version_field_name
188
189 def _valid_objects(self, query_objects):
190 """Filter out invalid reports.
191
192 Returns:
193 QuerySet: All the valid reports.
194
195 """
196 # pylint: disable=no-self-use
197 # self is potentially used by subclasses.
198 return query_objects
199
200 def _objects_within_period(self, query_objects, up_to, starting_from=None):
201 """Retrieve the reports within a specific period of time.
202
203 The objects are filtered considering a specific period of time to allow
204 for comparable results between subclasses. The lower bound should be
205 omitted for the first update but always set for later calls. The upper
206 bound must be specified to avoid race conditions.
207
208 Args:
209 query_objects (QuerySet): The reports to filter.
210 up_to (datetime): The maximum timestamp to consider (inclusive).
211 starting_from (datetime, optional): The minimum timestamp to
212 consider (exclusive).
213 Returns:
214 QuerySet: The reports received within a specific period of time.
215
216 """
217 # pylint: disable=no-self-use
218 # self might be used by subclasses.
219 query_objects = query_objects.filter(created_at__lte=up_to)
220 if starting_from:
221 query_objects = query_objects.filter(created_at__gt=starting_from)
222
223 return query_objects
224
225 def _unique_objects_per_day(self, query_objects):
226 """Count the unique reports per version per day.
227
228 Args:
229 query_objects (QuerySet): The reports to count.
230 Returns:
231 QuerySet: The unique reports grouped per version per day.
232
233 """
234 return (
235 query_objects
236 .annotate(_report_day=TruncDate('date'))
237 .values(self.version_field_name, '_report_day')
238 # FIXME Agressively drop duplicates
239 .annotate(count=Count('date', distinct=True)))
240
241 def delete_stats(self):
242 """Delete the general and daily stats instances.
243
244 Returns:
245 dict(str, int): The count of deleted entries per model name.
246
247 """
248 # Clear the general stats, the daily stats will be deleted by cascading
249 # effect
250 _, count_per_model = self.stats_model.objects.all().delete()
251 return count_per_model
252
253 def update_stats(self, report_counter, up_to, starting_from=None):
254 """Update the statistics of the general and daily stats entries.
255
256 The algorithm works as follow:
257 1. The reports are filtered considering a specific period of time to
258 allow for comparable results between subclasses. The lower bound
259 should be omitted for the first update but always set for later
260 calls. The upper bound must be specified to avoid race conditions.
261 2. The report counter requirements are applied to the reports.
262 3. The reports are grouped per day and per version, a counter is
263 generated.
264 4. Each report group count is used to update specific daily stats,
265 while the sum of them per version updates the general stats.
266
267 Args:
268 report_counter (_ReportCounterEngine): The report counter to
269 update the stats with.
270 up_to (datetime): The maximum timestamp to consider (inclusive).
271 starting_from (datetime, optional): The minimum timestamp to
272 consider (exclusive).
273 Returns:
274 dict(str, dict(str, int)): The number of added entries and the
275 number of updated entries bundled in a dict, respectively
276 hashed with the keys 'created' and 'updated', per model name.
277
278 """
279 counts_per_model = {
280 self.stats_model: {'created': 0, 'updated': 0},
281 self.daily_stats_model: {'created': 0, 'updated': 0}}
282
283 query_objects = self._valid_objects(report_counter.model.objects.all())
284 # Only include reports from the interesting period of time
285 query_objects = self._objects_within_period(
286 query_objects, up_to, starting_from)
287 # Apply the report counter requirements
288 query_objects = report_counter.filter(query_objects)
289 # Chain our own filters
290 query_objects = self._unique_objects_per_day(query_objects)
291
292 # Explicitly use the iterator() method to avoid caching as we will
293 # not re-use the QuerySet
294 for query_object in query_objects.iterator():
295 report_day = query_object['_report_day']
296 # Use a dict to be able to dereference the field name
297 stats, created = self.stats_model.objects.get_or_create(**{
298 self.version_field_name: query_object[self.version_field_name],
299 'defaults': {
300 'first_seen_on': report_day,
301 'released_on': report_day,
302 }
303 })
304 counts_per_model[self.stats_model][(
305 'created' if created else 'updated')] += 1
306
307 # Reports are coming in an unordered manner, a late report can
308 # be older (device time wise). Make sure that the current reports
309 # creation date is taken into account in the version history.
310 if not created and stats.first_seen_on > report_day:
311 # Avoid changing the released_on field if it is different than
312 # the default value (i.e. equals to the value of first_seen_on)
313 # since it indicates that it was manually changed.
314 if stats.released_on == stats.first_seen_on:
315 stats.released_on = report_day
316 stats.first_seen_on = report_day
317
318 daily_stats, created = (
319 self.daily_stats_model.objects.get_or_create(
320 version=stats, date=report_day))
321 counts_per_model[self.daily_stats_model][(
322 'created' if created else 'updated')] += 1
323
324 setattr(
325 stats, report_counter.field_name,
326 F(report_counter.field_name) + query_object['count'])
327 setattr(
328 daily_stats, report_counter.field_name,
329 F(report_counter.field_name) + query_object['count'])
330
331 stats.save()
332 daily_stats.save()
333
334 return counts_per_model
335
336
337class VersionStatsEngine(_StatsModelsEngine):
338 """Version stats engine.
339
340 An engine to update a counter of general stats (Version) and their daily
341 counterparts (VersionDaily).
342 """
343
344 def __init__(self):
345 """Initialise the engine."""
346 super(VersionStatsEngine, self).__init__(
347 stats_model=Version, daily_stats_model=VersionDaily,
348 version_field_name='build_fingerprint')
349
350
351class RadioVersionStatsEngine(_StatsModelsEngine):
352 """Radio version stats engine.
353
354 An engine to update a counter of general stats (RadioVersion) and their
355 daily counterparts (RadioVersionDaily).
356 """
357
358 def __init__(self):
359 """Initialise the engine."""
360 super(RadioVersionStatsEngine, self).__init__(
361 stats_model=RadioVersion, daily_stats_model=RadioVersionDaily,
362 version_field_name='radio_version')
363
364 def _valid_objects(self, query_objects):
365 # For legacy reasons, the version field might be null
366 return query_objects.filter(radio_version__isnull=False)
367
368
369class Command(BaseCommand):
370 """Management command to compute Hiccup statistics."""
371
372 _STATS_MODELS_ENGINES = [
373 VersionStatsEngine(),
374 RadioVersionStatsEngine(),
375 ]
376
377 # All the report counters that are listed in the stats models
378 _REPORT_COUNTER_FILTERS = [
379 HeartBeatCounterFilter(),
380 CrashreportCounterFilter(
381 name='crashes', field_name='prob_crashes',
382 include_boot_reasons=Crashreport.CRASH_BOOT_REASONS),
383 CrashreportCounterFilter(
384 name='smpl', field_name='smpl',
385 include_boot_reasons=Crashreport.SMPL_BOOT_REASONS),
386 CrashreportCounterFilter(
387 name='other', field_name='other',
388 exclude_boot_reasons=(
389 Crashreport.SMPL_BOOT_REASONS
390 + Crashreport.CRASH_BOOT_REASONS)),
391 ]
392
393 help = __doc__
394
395 def add_arguments(self, parser):
396 """Add custom arguments to the command."""
397 parser.add_argument('action', choices=['reset', 'update'])
398
399 def handle(self, *args, **options):
400 """Carry out the command executive logic."""
401 # pylint: disable=attribute-defined-outside-init
402 # self.debug is only ever read through calls of handle().
403 self.debug = int(options['verbosity']) >= 2
404
405 if options['action'] == 'reset':
406 self.delete_all_stats()
407 self.update_all_stats()
408 elif options['action'] == 'update':
409 self.update_all_stats()
410
411 def _success(self, msg, *args, **kwargs):
412 # pylint: disable=no-member
413 # Members of Style are generated and cannot be statically inferred.
414 self.stdout.write(self.style.SUCCESS(msg), *args, **kwargs)
415
416 def delete_all_stats(self):
417 """Delete the statistics from all stats models."""
418 with transaction.atomic():
419 for engine in self._STATS_MODELS_ENGINES:
420 counts_per_model = engine.delete_stats()
421 if self.debug:
422 # Default the count of deleted models to 0 if missing
423 if not counts_per_model:
424 counts_per_model = {
Franz-Xaver Geigercc1e04d2018-08-07 11:51:51 +0200425 engine.stats_model._meta.label: 0,
426 engine.daily_stats_model._meta.label: 0}
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400427 for model, count in counts_per_model.items():
Franz-Xaver Geigercc1e04d2018-08-07 11:51:51 +0200428 name = model.split('.')[-1]
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400429 self._success(
Franz-Xaver Geigercc1e04d2018-08-07 11:51:51 +0200430 '{} {} deleted'.format(count, name))
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400431
432 # Reset the metadata
433 count, _ = StatsMetadata.objects.all().delete()
434 if self.debug:
435 self._success(
436 '{} StatsMetadata deleted'.format(count))
437
438 def update_all_stats(self):
439 """Update the statistics from all stats models."""
440 try:
441 previous_update = StatsMetadata.objects.latest('updated_at')
442 starting_from = previous_update.updated_at
443 except StatsMetadata.DoesNotExist:
444 starting_from = None
445 # Fix the upper limit to avoid race conditions with new reports sent
446 # while we are updating the different statistics
447 up_to = datetime.datetime.now(tz=pytz.utc)
448
449 for engine in self._STATS_MODELS_ENGINES:
450 with transaction.atomic():
451 for filter_ in self._REPORT_COUNTER_FILTERS:
452 counts_per_model = engine.update_stats(
453 filter_, up_to, starting_from)
454 if self.debug:
455 for model, counts in counts_per_model.items():
456 for action, count in counts.items():
457 msg = '{} {} {} for counter {}'.format(
458 count, model.__name__, action,
459 filter_.name)
460 self._success(msg)
461
462 StatsMetadata(updated_at=up_to).save()