blob: c30c2e021a71f8865825d2e62ce4800161df199c [file] [log] [blame]
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +04001"""Manage Hiccup stats.
2
3This module provides a command to compute statistics of
4heartbeats, crashes, and versions sent from Hiccup clients.
5"""
6import datetime
7
8from django.core.management.base import BaseCommand
9from django.db import transaction
10from django.db.models import Count, F, Q
11from django.db.models.functions import TruncDate
12import pytz
13
14from crashreport_stats.models import (
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +020015 RadioVersion,
16 RadioVersionDaily,
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +040017 StatsMetadata,
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +020018 Version,
19 VersionDaily,
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +040020)
21from crashreports.models import Crashreport, HeartBeat
22
23
24# pylint: disable=too-few-public-methods
25# Classes in this file inherit from each other and are not method containers.
26
27
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +020028class _ReportCounterFilter:
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +040029 """Filter reports matching a report counter requirements.
30
31 Attributes:
32 model (django.db.model): The report model.
33 name (str): The human-readable report counter name.
34 field_name (str): The counter name as defined in the stats model where
35 it is a field.
36
37 """
38
39 def __init__(self, model, name, field_name):
40 """Initialise the filter.
41
42 Args:
43 model (django.db.model): The report model.
44 name (str): The human-readable report counter name.
45 field_name (str): The counter name as defined in the stats model
46 where it is a field.
47
48 """
49 self.model = model
50 self.name = name
51 self.field_name = field_name
52
53 def filter(self, query_objects):
54 """Filter the reports.
55
56 Args:
57 query_objects (QuerySet): The reports to filter.
58 Returns:
59 QuerySet: The reports matching this report counter requirements.
60
61 """
62 # pylint: disable=no-self-use
63 # self is potentially used by subclasses.
64 return query_objects
65
66
67class HeartBeatCounterFilter(_ReportCounterFilter):
68 """The heartbeats counter filter."""
69
70 def __init__(self):
71 """Initialise the filter."""
72 super(HeartBeatCounterFilter, self).__init__(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +020073 model=HeartBeat, name="heartbeats", field_name="heartbeats"
74 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +040075
76
77class CrashreportCounterFilter(_ReportCounterFilter):
78 """The crashreports counter filter.
79
80 Attributes:
81 include_boot_reasons (list(str)): The boot reasons assumed to
82 characterise this crashreport ("OR"ed).
83 exclude_boot_reasons (list(str)): The boot reasons assumed to *not*
84 characterise this crashreport ("AND"ed).
85 inclusive_filter (Q): The boot reasons filter for filtering reports
86 that should be included.
87 exclusive_filter (Q): The boot reasons filter for filtering reports
88 that should *not* be included.
89
90 """
91
92 def __init__(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +020093 self,
94 name,
95 field_name,
96 include_boot_reasons=None,
97 exclude_boot_reasons=None,
98 ):
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +040099 """Initialise the filter.
100
101 One or both of `include_boot_reasons` and `exclude_boot_reasons` must
102 be specified.
103
104 Args:
105 name (str): The human-readable report counter name.
106 field_name (str):
107 The counter name as defined in the stats model where it is a
108 field.
109 include_boot_reasons (list(str), optional):
110 The boot reasons assumed to characterise this crashreport
111 ("OR"ed).
112 exclude_boot_reasons (list(str), optional):
113 The boot reasons assumed to *not* characterise this
114 crashreport ("AND"ed).
115 Raises:
116 ValueError:
117 None of `include_boot_reasons` and `exclude_boot_reasons` have
118 been supplied.
119
120 """
121 if not include_boot_reasons and not exclude_boot_reasons:
122 raise ValueError(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200123 "One or both of `include_boot_reasons` and "
124 "`exclude_boot_reasons` must be specified."
125 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400126
127 super(CrashreportCounterFilter, self).__init__(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200128 model=Crashreport, name=name, field_name=field_name
129 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400130
131 # Cache the boot reasons inclusive filter
132 self.include_boot_reasons = include_boot_reasons
133 self.inclusive_filter = self._create_query_filter(include_boot_reasons)
134
135 # Cache the boot reasons exclusive filter
136 self.exclude_boot_reasons = exclude_boot_reasons
137 self.exclusive_filter = self._create_query_filter(exclude_boot_reasons)
138
139 @staticmethod
140 def _create_query_filter(reasons):
141 """Combine boot reasons into one filter.
142
143 Args:
144 reasons (list(str)): List of boot reasons to include in filter.
145 Returns:
146 django.db.models.query_utils.Q: Query that matches either of
147 reasons as boot_reason if list is not empty, otherwise None.
148
149 """
150 if not reasons:
151 return None
152
153 query = Q(boot_reason=reasons[0])
154 for reason in reasons[1:]:
155 query = query | Q(boot_reason=reason)
156 return query
157
158 def filter(self, query_objects):
159 """Filter the reports according to the inclusive and exclusive fitlers.
160
161 Args:
162 query_objects (QuerySet): The reports to filter.
163 Returns:
164 QuerySet: The reports matching this report counter requirements.
165
166 """
167 if self.inclusive_filter:
168 query_objects = query_objects.filter(self.inclusive_filter)
169 if self.exclusive_filter:
170 query_objects = query_objects.exclude(self.exclusive_filter)
171
172 return query_objects
173
174
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200175class _StatsModelsEngine:
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400176 """Stats models engine.
177
178 An engine to update general stats (_VersionStats) and their daily
179 counterparts (_DailyVersionStats).
180 """
181
182 def __init__(self, stats_model, daily_stats_model, version_field_name):
183 """Initialise the engine.
184
185 Args:
186 stats_model (_VersionStats): The _VersionStats model to update
187 stats for.
188 daily_stats_model (_DailyVersionStats): The _DailyVersionStats
189 model to update stats for.
190 version_field_name (str): The version field name as specified in
191 the stats models.
192
193 """
194 self.stats_model = stats_model
195 self.daily_stats_model = daily_stats_model
196 self.version_field_name = version_field_name
197
198 def _valid_objects(self, query_objects):
199 """Filter out invalid reports.
200
201 Returns:
202 QuerySet: All the valid reports.
203
204 """
205 # pylint: disable=no-self-use
206 # self is potentially used by subclasses.
207 return query_objects
208
209 def _objects_within_period(self, query_objects, up_to, starting_from=None):
210 """Retrieve the reports within a specific period of time.
211
212 The objects are filtered considering a specific period of time to allow
213 for comparable results between subclasses. The lower bound should be
214 omitted for the first update but always set for later calls. The upper
215 bound must be specified to avoid race conditions.
216
217 Args:
218 query_objects (QuerySet): The reports to filter.
219 up_to (datetime): The maximum timestamp to consider (inclusive).
220 starting_from (datetime, optional): The minimum timestamp to
221 consider (exclusive).
222 Returns:
223 QuerySet: The reports received within a specific period of time.
224
225 """
226 # pylint: disable=no-self-use
227 # self might be used by subclasses.
228 query_objects = query_objects.filter(created_at__lte=up_to)
229 if starting_from:
230 query_objects = query_objects.filter(created_at__gt=starting_from)
231
232 return query_objects
233
234 def _unique_objects_per_day(self, query_objects):
235 """Count the unique reports per version per day.
236
237 Args:
238 query_objects (QuerySet): The reports to count.
239 Returns:
240 QuerySet: The unique reports grouped per version per day.
241
242 """
243 return (
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200244 query_objects.annotate(_report_day=TruncDate("date")).values(
245 self.version_field_name, "_report_day"
246 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400247 # FIXME Agressively drop duplicates
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200248 .annotate(count=Count("date", distinct=True))
249 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400250
251 def delete_stats(self):
252 """Delete the general and daily stats instances.
253
254 Returns:
255 dict(str, int): The count of deleted entries per model name.
256
257 """
258 # Clear the general stats, the daily stats will be deleted by cascading
259 # effect
260 _, count_per_model = self.stats_model.objects.all().delete()
261 return count_per_model
262
263 def update_stats(self, report_counter, up_to, starting_from=None):
264 """Update the statistics of the general and daily stats entries.
265
266 The algorithm works as follow:
267 1. The reports are filtered considering a specific period of time to
268 allow for comparable results between subclasses. The lower bound
269 should be omitted for the first update but always set for later
270 calls. The upper bound must be specified to avoid race conditions.
271 2. The report counter requirements are applied to the reports.
272 3. The reports are grouped per day and per version, a counter is
273 generated.
274 4. Each report group count is used to update specific daily stats,
275 while the sum of them per version updates the general stats.
276
277 Args:
278 report_counter (_ReportCounterEngine): The report counter to
279 update the stats with.
280 up_to (datetime): The maximum timestamp to consider (inclusive).
281 starting_from (datetime, optional): The minimum timestamp to
282 consider (exclusive).
283 Returns:
284 dict(str, dict(str, int)): The number of added entries and the
285 number of updated entries bundled in a dict, respectively
286 hashed with the keys 'created' and 'updated', per model name.
287
288 """
289 counts_per_model = {
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200290 self.stats_model: {"created": 0, "updated": 0},
291 self.daily_stats_model: {"created": 0, "updated": 0},
292 }
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400293
294 query_objects = self._valid_objects(report_counter.model.objects.all())
295 # Only include reports from the interesting period of time
296 query_objects = self._objects_within_period(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200297 query_objects, up_to, starting_from
298 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400299 # Apply the report counter requirements
300 query_objects = report_counter.filter(query_objects)
301 # Chain our own filters
302 query_objects = self._unique_objects_per_day(query_objects)
303
304 # Explicitly use the iterator() method to avoid caching as we will
305 # not re-use the QuerySet
306 for query_object in query_objects.iterator():
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200307 report_day = query_object["_report_day"]
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400308 # Use a dict to be able to dereference the field name
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200309 stats, created = self.stats_model.objects.get_or_create(
310 **{
311 self.version_field_name: query_object[
312 self.version_field_name
313 ],
314 "defaults": {
315 "first_seen_on": report_day,
316 "released_on": report_day,
317 },
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400318 }
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200319 )
320 counts_per_model[self.stats_model][
321 ("created" if created else "updated")
322 ] += 1
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400323
324 # Reports are coming in an unordered manner, a late report can
325 # be older (device time wise). Make sure that the current reports
326 # creation date is taken into account in the version history.
327 if not created and stats.first_seen_on > report_day:
328 # Avoid changing the released_on field if it is different than
329 # the default value (i.e. equals to the value of first_seen_on)
330 # since it indicates that it was manually changed.
331 if stats.released_on == stats.first_seen_on:
332 stats.released_on = report_day
333 stats.first_seen_on = report_day
334
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200335 daily_stats, created = self.daily_stats_model.objects.get_or_create(
336 version=stats, date=report_day
337 )
338 counts_per_model[self.daily_stats_model][
339 ("created" if created else "updated")
340 ] += 1
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400341
342 setattr(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200343 stats,
344 report_counter.field_name,
345 F(report_counter.field_name) + query_object["count"],
346 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400347 setattr(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200348 daily_stats,
349 report_counter.field_name,
350 F(report_counter.field_name) + query_object["count"],
351 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400352
353 stats.save()
354 daily_stats.save()
355
356 return counts_per_model
357
358
359class VersionStatsEngine(_StatsModelsEngine):
360 """Version stats engine.
361
362 An engine to update a counter of general stats (Version) and their daily
363 counterparts (VersionDaily).
364 """
365
366 def __init__(self):
367 """Initialise the engine."""
368 super(VersionStatsEngine, self).__init__(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200369 stats_model=Version,
370 daily_stats_model=VersionDaily,
371 version_field_name="build_fingerprint",
372 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400373
374
375class RadioVersionStatsEngine(_StatsModelsEngine):
376 """Radio version stats engine.
377
378 An engine to update a counter of general stats (RadioVersion) and their
379 daily counterparts (RadioVersionDaily).
380 """
381
382 def __init__(self):
383 """Initialise the engine."""
384 super(RadioVersionStatsEngine, self).__init__(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200385 stats_model=RadioVersion,
386 daily_stats_model=RadioVersionDaily,
387 version_field_name="radio_version",
388 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400389
390 def _valid_objects(self, query_objects):
391 # For legacy reasons, the version field might be null
392 return query_objects.filter(radio_version__isnull=False)
393
394
395class Command(BaseCommand):
396 """Management command to compute Hiccup statistics."""
397
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200398 _STATS_MODELS_ENGINES = [VersionStatsEngine(), RadioVersionStatsEngine()]
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400399
400 # All the report counters that are listed in the stats models
401 _REPORT_COUNTER_FILTERS = [
402 HeartBeatCounterFilter(),
403 CrashreportCounterFilter(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200404 name="crashes",
405 field_name="prob_crashes",
406 include_boot_reasons=Crashreport.CRASH_BOOT_REASONS,
407 ),
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400408 CrashreportCounterFilter(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200409 name="smpl",
410 field_name="smpl",
411 include_boot_reasons=Crashreport.SMPL_BOOT_REASONS,
412 ),
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400413 CrashreportCounterFilter(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200414 name="other",
415 field_name="other",
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400416 exclude_boot_reasons=(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200417 Crashreport.SMPL_BOOT_REASONS + Crashreport.CRASH_BOOT_REASONS
418 ),
419 ),
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400420 ]
421
422 help = __doc__
423
424 def add_arguments(self, parser):
425 """Add custom arguments to the command."""
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200426 parser.add_argument("action", choices=["reset", "update"])
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400427
428 def handle(self, *args, **options):
429 """Carry out the command executive logic."""
430 # pylint: disable=attribute-defined-outside-init
431 # self.debug is only ever read through calls of handle().
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200432 self.debug = int(options["verbosity"]) >= 2
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400433
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200434 if options["action"] == "reset":
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400435 self.delete_all_stats()
436 self.update_all_stats()
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200437 elif options["action"] == "update":
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400438 self.update_all_stats()
439
440 def _success(self, msg, *args, **kwargs):
441 # pylint: disable=no-member
442 # Members of Style are generated and cannot be statically inferred.
443 self.stdout.write(self.style.SUCCESS(msg), *args, **kwargs)
444
445 def delete_all_stats(self):
446 """Delete the statistics from all stats models."""
447 with transaction.atomic():
448 for engine in self._STATS_MODELS_ENGINES:
449 counts_per_model = engine.delete_stats()
450 if self.debug:
451 # Default the count of deleted models to 0 if missing
452 if not counts_per_model:
453 counts_per_model = {
Franz-Xaver Geigercc1e04d2018-08-07 11:51:51 +0200454 engine.stats_model._meta.label: 0,
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200455 engine.daily_stats_model._meta.label: 0,
456 }
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400457 for model, count in counts_per_model.items():
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200458 name = model.split(".")[-1]
459 self._success("{} {} deleted".format(count, name))
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400460
461 # Reset the metadata
462 count, _ = StatsMetadata.objects.all().delete()
463 if self.debug:
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200464 self._success("{} StatsMetadata deleted".format(count))
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400465
466 def update_all_stats(self):
467 """Update the statistics from all stats models."""
468 try:
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200469 previous_update = StatsMetadata.objects.latest("updated_at")
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400470 starting_from = previous_update.updated_at
471 except StatsMetadata.DoesNotExist:
472 starting_from = None
473 # Fix the upper limit to avoid race conditions with new reports sent
474 # while we are updating the different statistics
475 up_to = datetime.datetime.now(tz=pytz.utc)
476
477 for engine in self._STATS_MODELS_ENGINES:
478 with transaction.atomic():
479 for filter_ in self._REPORT_COUNTER_FILTERS:
480 counts_per_model = engine.update_stats(
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200481 filter_, up_to, starting_from
482 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400483 if self.debug:
484 for model, counts in counts_per_model.items():
485 for action, count in counts.items():
Mitja Nikolauscb50f2c2018-08-24 13:54:48 +0200486 msg = "{} {} {} for counter {}".format(
487 count, model.__name__, action, filter_.name
488 )
Borjan Tchakaloffb98dba72018-03-16 11:04:47 +0400489 self._success(msg)
490
491 StatsMetadata(updated_at=up_to).save()