Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2015 Intel Corporation |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 | * IN THE SOFTWARE. |
| 22 | * |
| 23 | */ |
| 24 | |
Damien Lespiau | 515cec1 | 2015-06-25 23:59:21 +0100 | [diff] [blame] | 25 | #include <math.h> |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 26 | #include <stdlib.h> |
Damien Lespiau | 203c384 | 2015-06-26 13:55:01 +0100 | [diff] [blame] | 27 | #include <string.h> |
Damien Lespiau | 515cec1 | 2015-06-25 23:59:21 +0100 | [diff] [blame] | 28 | |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 29 | #include "igt_core.h" |
| 30 | #include "igt_stats.h" |
| 31 | |
Damien Lespiau | 4a89a84 | 2015-06-27 09:45:42 +0100 | [diff] [blame] | 32 | #define U64_MAX ((uint64_t)~0ULL) |
| 33 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 34 | #define sorted_value(stats, i) (stats->is_float ? stats->sorted_f[i] : stats->sorted_u64[i]) |
| 35 | #define unsorted_value(stats, i) (stats->is_float ? stats->values_f[i] : stats->values_u64[i]) |
| 36 | |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 37 | /** |
| 38 | * SECTION:igt_stats |
| 39 | * @short_description: Tools for statistical analysis |
| 40 | * @title: Stats |
Thomas Wood | f0381d1 | 2015-09-07 09:26:01 +0100 | [diff] [blame] | 41 | * @include: igt.h |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 42 | * |
| 43 | * Various tools to make sense of data. |
| 44 | * |
| 45 | * #igt_stats_t is a container of data samples. igt_stats_push() is used to add |
| 46 | * new samples and various results (mean, variance, standard deviation, ...) |
| 47 | * can then be retrieved. |
| 48 | * |
| 49 | * |[ |
| 50 | * igt_stats_t stats; |
| 51 | * |
| 52 | * igt_stats_init(&stats, 8); |
| 53 | * |
| 54 | * igt_stats_push(&stats, 2); |
| 55 | * igt_stats_push(&stats, 4); |
| 56 | * igt_stats_push(&stats, 4); |
| 57 | * igt_stats_push(&stats, 4); |
| 58 | * igt_stats_push(&stats, 5); |
| 59 | * igt_stats_push(&stats, 5); |
| 60 | * igt_stats_push(&stats, 7); |
| 61 | * igt_stats_push(&stats, 9); |
| 62 | * |
| 63 | * printf("Mean: %lf\n", igt_stats_get_mean(&stats)); |
| 64 | * |
| 65 | * igt_stats_fini(&stats); |
| 66 | * ]| |
| 67 | */ |
| 68 | |
Damien Lespiau | 00432ff | 2015-06-27 17:34:51 +0100 | [diff] [blame] | 69 | static unsigned int get_new_capacity(int need) |
| 70 | { |
| 71 | unsigned int new_capacity; |
| 72 | |
| 73 | /* taken from Python's list */ |
| 74 | new_capacity = (need >> 6) + (need < 9 ? 3 : 6); |
| 75 | new_capacity += need; |
| 76 | |
| 77 | return new_capacity; |
| 78 | } |
| 79 | |
| 80 | static void igt_stats_ensure_capacity(igt_stats_t *stats, |
| 81 | unsigned int n_additional_values) |
| 82 | { |
| 83 | unsigned int new_n_values = stats->n_values + n_additional_values; |
| 84 | unsigned int new_capacity; |
| 85 | |
| 86 | if (new_n_values <= stats->capacity) |
| 87 | return; |
| 88 | |
| 89 | new_capacity = get_new_capacity(new_n_values); |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 90 | stats->values_u64 = realloc(stats->values_u64, |
| 91 | sizeof(*stats->values_u64) * new_capacity); |
| 92 | igt_assert(stats->values_u64); |
Damien Lespiau | 00432ff | 2015-06-27 17:34:51 +0100 | [diff] [blame] | 93 | |
| 94 | stats->capacity = new_capacity; |
| 95 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 96 | free(stats->sorted_u64); |
| 97 | stats->sorted_u64 = NULL; |
Damien Lespiau | 00432ff | 2015-06-27 17:34:51 +0100 | [diff] [blame] | 98 | } |
| 99 | |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 100 | /** |
| 101 | * igt_stats_init: |
| 102 | * @stats: An #igt_stats_t instance |
Damien Lespiau | 66e0bf6 | 2015-06-27 17:49:40 +0100 | [diff] [blame] | 103 | * |
| 104 | * Initializes an #igt_stats_t instance. igt_stats_fini() must be called once |
| 105 | * finished with @stats. |
| 106 | */ |
| 107 | void igt_stats_init(igt_stats_t *stats) |
| 108 | { |
| 109 | memset(stats, 0, sizeof(*stats)); |
| 110 | |
| 111 | igt_stats_ensure_capacity(stats, 128); |
| 112 | |
| 113 | stats->min = U64_MAX; |
| 114 | stats->max = 0; |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * igt_stats_init_with_size: |
| 119 | * @stats: An #igt_stats_t instance |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 120 | * @capacity: Number of data samples @stats can contain |
| 121 | * |
Damien Lespiau | 66e0bf6 | 2015-06-27 17:49:40 +0100 | [diff] [blame] | 122 | * Like igt_stats_init() but with a size to avoid reallocating the underlying |
| 123 | * array(s) when pushing new values. Useful if we have a good idea of the |
| 124 | * number of data points we want @stats to hold. |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 125 | * |
Damien Lespiau | 66e0bf6 | 2015-06-27 17:49:40 +0100 | [diff] [blame] | 126 | * igt_stats_fini() must be called once finished with @stats. |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 127 | */ |
Damien Lespiau | 66e0bf6 | 2015-06-27 17:49:40 +0100 | [diff] [blame] | 128 | void igt_stats_init_with_size(igt_stats_t *stats, unsigned int capacity) |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 129 | { |
Damien Lespiau | 203c384 | 2015-06-26 13:55:01 +0100 | [diff] [blame] | 130 | memset(stats, 0, sizeof(*stats)); |
| 131 | |
Damien Lespiau | 00432ff | 2015-06-27 17:34:51 +0100 | [diff] [blame] | 132 | igt_stats_ensure_capacity(stats, capacity); |
Damien Lespiau | 4a89a84 | 2015-06-27 09:45:42 +0100 | [diff] [blame] | 133 | |
| 134 | stats->min = U64_MAX; |
| 135 | stats->max = 0; |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 136 | stats->range[0] = HUGE_VAL; |
| 137 | stats->range[1] = -HUGE_VAL; |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 138 | } |
| 139 | |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 140 | /** |
| 141 | * igt_stats_fini: |
| 142 | * @stats: An #igt_stats_t instance |
| 143 | * |
| 144 | * Frees resources allocated in igt_stats_init(). |
| 145 | */ |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 146 | void igt_stats_fini(igt_stats_t *stats) |
| 147 | { |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 148 | free(stats->values_u64); |
| 149 | free(stats->sorted_u64); |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 150 | } |
| 151 | |
Damien Lespiau | da123ad | 2015-06-26 18:04:34 +0100 | [diff] [blame] | 152 | |
| 153 | /** |
| 154 | * igt_stats_is_population: |
| 155 | * @stats: An #igt_stats_t instance |
| 156 | * |
| 157 | * Returns: #true if @stats represents a population, #false if only a sample. |
| 158 | * |
| 159 | * See igt_stats_set_population() for more details. |
| 160 | */ |
| 161 | bool igt_stats_is_population(igt_stats_t *stats) |
| 162 | { |
| 163 | return stats->is_population; |
| 164 | } |
| 165 | |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 166 | /** |
Damien Lespiau | 3a5cf84 | 2015-06-26 17:02:09 +0100 | [diff] [blame] | 167 | * igt_stats_set_population: |
| 168 | * @stats: An #igt_stats_t instance |
| 169 | * @full_population: Whether we're dealing with sample data or a full |
| 170 | * population |
| 171 | * |
| 172 | * In statistics, we usually deal with a subset of the full data (which may be |
| 173 | * a continuous or infinite set). Data analysis is then done on a sample of |
| 174 | * this population. |
| 175 | * |
| 176 | * This has some importance as only having a sample of the data leads to |
| 177 | * [biased estimators](https://en.wikipedia.org/wiki/Bias_of_an_estimator). We |
| 178 | * currently used the information given by this method to apply |
| 179 | * [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction) |
| 180 | * to the variance. |
| 181 | * |
Damien Lespiau | 17ed69f | 2015-06-26 18:19:42 +0100 | [diff] [blame] | 182 | * Note that even if we manage to have an unbiased variance by multiplying |
| 183 | * a sample variance by the Bessel's correction, n/(n - 1), the standard |
| 184 | * deviation derived from the unbiased variance isn't itself unbiased. |
| 185 | * Statisticians talk about a "corrected" standard deviation. |
| 186 | * |
Damien Lespiau | 3a5cf84 | 2015-06-26 17:02:09 +0100 | [diff] [blame] | 187 | * When giving #true to this function, the data set in @stats is considered a |
| 188 | * full population. It's considered a sample of a bigger population otherwise. |
| 189 | * |
| 190 | * When newly created, @stats defaults to holding sample data. |
| 191 | */ |
| 192 | void igt_stats_set_population(igt_stats_t *stats, bool full_population) |
| 193 | { |
| 194 | if (full_population == stats->is_population) |
| 195 | return; |
| 196 | |
| 197 | stats->is_population = full_population; |
| 198 | stats->mean_variance_valid = false; |
| 199 | } |
| 200 | |
| 201 | /** |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 202 | * igt_stats_push: |
| 203 | * @stats: An #igt_stats_t instance |
| 204 | * @value: An integer value |
| 205 | * |
| 206 | * Adds a new value to the @stats dataset. |
| 207 | */ |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 208 | void igt_stats_push(igt_stats_t *stats, uint64_t value) |
| 209 | { |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 210 | if (stats->is_float) { |
| 211 | igt_stats_push_float(stats, value); |
| 212 | return; |
| 213 | } |
| 214 | |
Damien Lespiau | 00432ff | 2015-06-27 17:34:51 +0100 | [diff] [blame] | 215 | igt_stats_ensure_capacity(stats, 1); |
| 216 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 217 | stats->values_u64[stats->n_values++] = value; |
Damien Lespiau | 4a89a84 | 2015-06-27 09:45:42 +0100 | [diff] [blame] | 218 | |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 219 | stats->mean_variance_valid = false; |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 220 | stats->sorted_array_valid = false; |
Damien Lespiau | 4a89a84 | 2015-06-27 09:45:42 +0100 | [diff] [blame] | 221 | |
| 222 | if (value < stats->min) |
| 223 | stats->min = value; |
| 224 | if (value > stats->max) |
| 225 | stats->max = value; |
| 226 | } |
| 227 | |
| 228 | /** |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 229 | * igt_stats_push: |
| 230 | * @stats: An #igt_stats_t instance |
| 231 | * @value: An floating point |
| 232 | * |
| 233 | * Adds a new value to the @stats dataset and converts the igt_stats from |
| 234 | * an integer collection to a floating point one. |
| 235 | */ |
| 236 | void igt_stats_push_float(igt_stats_t *stats, double value) |
| 237 | { |
| 238 | igt_stats_ensure_capacity(stats, 1); |
| 239 | |
| 240 | if (!stats->is_float) { |
| 241 | int n; |
| 242 | |
| 243 | for (n = 0; n < stats->n_values; n++) |
| 244 | stats->values_f[n] = stats->values_u64[n]; |
| 245 | |
| 246 | stats->is_float = true; |
| 247 | } |
| 248 | |
| 249 | stats->values_f[stats->n_values++] = value; |
| 250 | |
| 251 | stats->mean_variance_valid = false; |
| 252 | stats->sorted_array_valid = false; |
| 253 | |
| 254 | if (value < stats->range[0]) |
| 255 | stats->range[0] = value; |
| 256 | if (value > stats->range[1]) |
| 257 | stats->range[1] = value; |
| 258 | } |
| 259 | |
| 260 | /** |
Damien Lespiau | 3839bac | 2015-06-27 15:32:23 +0100 | [diff] [blame] | 261 | * igt_stats_push_array: |
| 262 | * @stats: An #igt_stats_t instance |
| 263 | * @values: (array length=n_values): A pointer to an array of data points |
| 264 | * @n_values: The number of data points to add |
| 265 | * |
| 266 | * Adds an array of values to the @stats dataset. |
| 267 | */ |
| 268 | void igt_stats_push_array(igt_stats_t *stats, |
| 269 | const uint64_t *values, unsigned int n_values) |
| 270 | { |
| 271 | unsigned int i; |
| 272 | |
Damien Lespiau | 00432ff | 2015-06-27 17:34:51 +0100 | [diff] [blame] | 273 | igt_stats_ensure_capacity(stats, n_values); |
| 274 | |
Damien Lespiau | 3839bac | 2015-06-27 15:32:23 +0100 | [diff] [blame] | 275 | for (i = 0; i < n_values; i++) |
| 276 | igt_stats_push(stats, values[i]); |
| 277 | } |
| 278 | |
| 279 | /** |
Damien Lespiau | 4a89a84 | 2015-06-27 09:45:42 +0100 | [diff] [blame] | 280 | * igt_stats_get_min: |
| 281 | * @stats: An #igt_stats_t instance |
| 282 | * |
| 283 | * Retrieves the minimal value in @stats |
| 284 | */ |
| 285 | uint64_t igt_stats_get_min(igt_stats_t *stats) |
| 286 | { |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 287 | igt_assert(!stats->is_float); |
Damien Lespiau | 4a89a84 | 2015-06-27 09:45:42 +0100 | [diff] [blame] | 288 | return stats->min; |
| 289 | } |
| 290 | |
| 291 | /** |
| 292 | * igt_stats_get_max: |
| 293 | * @stats: An #igt_stats_t instance |
| 294 | * |
| 295 | * Retrieves the maximum value in @stats |
| 296 | */ |
| 297 | uint64_t igt_stats_get_max(igt_stats_t *stats) |
| 298 | { |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 299 | igt_assert(!stats->is_float); |
Damien Lespiau | 4a89a84 | 2015-06-27 09:45:42 +0100 | [diff] [blame] | 300 | return stats->max; |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 301 | } |
| 302 | |
Damien Lespiau | 0e4c175 | 2015-06-27 11:12:01 +0100 | [diff] [blame] | 303 | /** |
| 304 | * igt_stats_get_range: |
| 305 | * @stats: An #igt_stats_t instance |
| 306 | * |
| 307 | * Retrieves the range of the values in @stats. The range is the difference |
| 308 | * between the highest and the lowest value. |
| 309 | * |
| 310 | * The range can be a deceiving characterization of the values, because there |
| 311 | * can be extreme minimal and maximum values that are just anomalies. Prefer |
Damien Lespiau | fabde38 | 2015-06-27 15:49:26 +0100 | [diff] [blame] | 312 | * the interquatile range (see igt_stats_get_iqr()) or an histogram. |
Damien Lespiau | 0e4c175 | 2015-06-27 11:12:01 +0100 | [diff] [blame] | 313 | */ |
| 314 | uint64_t igt_stats_get_range(igt_stats_t *stats) |
| 315 | { |
| 316 | return igt_stats_get_max(stats) - igt_stats_get_min(stats); |
| 317 | } |
| 318 | |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 319 | static int cmp_u64(const void *pa, const void *pb) |
| 320 | { |
| 321 | const uint64_t *a = pa, *b = pb; |
| 322 | |
| 323 | if (*a < *b) |
| 324 | return -1; |
| 325 | if (*a > *b) |
| 326 | return 1; |
| 327 | return 0; |
| 328 | } |
| 329 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 330 | static int cmp_f(const void *pa, const void *pb) |
| 331 | { |
| 332 | const double *a = pa, *b = pb; |
| 333 | |
| 334 | if (*a < *b) |
| 335 | return -1; |
| 336 | if (*a > *b) |
| 337 | return 1; |
| 338 | return 0; |
| 339 | } |
| 340 | |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 341 | static void igt_stats_ensure_sorted_values(igt_stats_t *stats) |
| 342 | { |
| 343 | if (stats->sorted_array_valid) |
| 344 | return; |
| 345 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 346 | if (!stats->sorted_u64) { |
Damien Lespiau | 00432ff | 2015-06-27 17:34:51 +0100 | [diff] [blame] | 347 | /* |
| 348 | * igt_stats_ensure_capacity() will free ->sorted when the |
| 349 | * capacity increases, which also correspond to an invalidation |
| 350 | * of the sorted array. We'll then reallocate it here on |
| 351 | * demand. |
| 352 | */ |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 353 | stats->sorted_u64 = calloc(stats->capacity, |
| 354 | sizeof(*stats->values_u64)); |
| 355 | igt_assert(stats->sorted_u64); |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 356 | } |
| 357 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 358 | memcpy(stats->sorted_u64, stats->values_u64, |
| 359 | sizeof(*stats->values_u64) * stats->n_values); |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 360 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 361 | qsort(stats->sorted_u64, stats->n_values, sizeof(*stats->values_u64), |
| 362 | stats->is_float ? cmp_f : cmp_u64); |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 363 | |
| 364 | stats->sorted_array_valid = true; |
| 365 | } |
| 366 | |
| 367 | /* |
| 368 | * We use Tukey's hinge for our quartiles determination. |
| 369 | * ends (end, lower_end) are exclusive. |
| 370 | */ |
| 371 | static double |
| 372 | igt_stats_get_median_internal(igt_stats_t *stats, |
| 373 | unsigned int start, unsigned int end, |
| 374 | unsigned int *lower_end /* out */, |
| 375 | unsigned int *upper_start /* out */) |
| 376 | { |
| 377 | unsigned int mid, n_values = end - start; |
| 378 | double median; |
| 379 | |
| 380 | igt_stats_ensure_sorted_values(stats); |
| 381 | |
| 382 | /* odd number of data points */ |
| 383 | if (n_values % 2 == 1) { |
| 384 | /* median is the value in the middle (actual datum) */ |
| 385 | mid = start + n_values / 2; |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 386 | median = sorted_value(stats, mid); |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 387 | |
| 388 | /* the two halves contain the median value */ |
| 389 | if (lower_end) |
| 390 | *lower_end = mid + 1; |
| 391 | if (upper_start) |
| 392 | *upper_start = mid; |
| 393 | |
| 394 | /* even number of data points */ |
| 395 | } else { |
| 396 | /* |
| 397 | * The middle is in between two indexes, 'mid' points at the |
| 398 | * lower one. The median is then the average between those two |
| 399 | * values. |
| 400 | */ |
| 401 | mid = start + n_values / 2 - 1; |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 402 | median = (sorted_value(stats, mid) + sorted_value(stats, mid+1))/2.; |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 403 | |
| 404 | if (lower_end) |
| 405 | *lower_end = mid + 1; |
| 406 | if (upper_start) |
| 407 | *upper_start = mid + 1; |
| 408 | } |
| 409 | |
| 410 | return median; |
| 411 | } |
| 412 | |
| 413 | /** |
| 414 | * igt_stats_get_quartiles: |
| 415 | * @stats: An #igt_stats_t instance |
| 416 | * @q1: (out): lower or 25th quartile |
| 417 | * @q2: (out): median or 50th quartile |
| 418 | * @q3: (out): upper or 75th quartile |
| 419 | * |
| 420 | * Retrieves the [quartiles](https://en.wikipedia.org/wiki/Quartile) of the |
| 421 | * @stats dataset. |
| 422 | */ |
| 423 | void igt_stats_get_quartiles(igt_stats_t *stats, |
| 424 | double *q1, double *q2, double *q3) |
| 425 | { |
| 426 | unsigned int lower_end, upper_start; |
| 427 | double ret; |
| 428 | |
| 429 | if (stats->n_values < 3) { |
| 430 | if (q1) |
| 431 | *q1 = 0.; |
| 432 | if (q2) |
| 433 | *q2 = 0.; |
| 434 | if (q3) |
| 435 | *q3 = 0.; |
| 436 | return; |
| 437 | } |
| 438 | |
| 439 | ret = igt_stats_get_median_internal(stats, 0, stats->n_values, |
| 440 | &lower_end, &upper_start); |
| 441 | if (q2) |
| 442 | *q2 = ret; |
| 443 | |
| 444 | ret = igt_stats_get_median_internal(stats, 0, lower_end, NULL, NULL); |
| 445 | if (q1) |
| 446 | *q1 = ret; |
| 447 | |
| 448 | ret = igt_stats_get_median_internal(stats, upper_start, stats->n_values, |
| 449 | NULL, NULL); |
| 450 | if (q3) |
| 451 | *q3 = ret; |
| 452 | } |
| 453 | |
| 454 | /** |
Damien Lespiau | fabde38 | 2015-06-27 15:49:26 +0100 | [diff] [blame] | 455 | * igt_stats_get_iqr: |
| 456 | * @stats: An #igt_stats_t instance |
| 457 | * |
| 458 | * Retrieves the |
| 459 | * [interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) |
| 460 | * (IQR) of the @stats dataset. |
| 461 | */ |
| 462 | double igt_stats_get_iqr(igt_stats_t *stats) |
| 463 | { |
| 464 | double q1, q3; |
| 465 | |
| 466 | igt_stats_get_quartiles(stats, &q1, NULL, &q3); |
| 467 | return (q3 - q1); |
| 468 | } |
| 469 | |
| 470 | /** |
Damien Lespiau | 1b8997b | 2015-06-27 15:33:58 +0100 | [diff] [blame] | 471 | * igt_stats_get_median: |
| 472 | * @stats: An #igt_stats_t instance |
| 473 | * |
| 474 | * Retrieves the median of the @stats dataset. |
| 475 | */ |
| 476 | double igt_stats_get_median(igt_stats_t *stats) |
| 477 | { |
| 478 | return igt_stats_get_median_internal(stats, 0, stats->n_values, |
| 479 | NULL, NULL); |
| 480 | } |
| 481 | |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 482 | /* |
| 483 | * Algorithm popularised by Knuth in: |
| 484 | * |
| 485 | * The Art of Computer Programming, volume 2: Seminumerical Algorithms, |
| 486 | * 3rd edn., p. 232. Boston: Addison-Wesley |
| 487 | * |
| 488 | * Source: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance |
| 489 | */ |
| 490 | static void igt_stats_knuth_mean_variance(igt_stats_t *stats) |
| 491 | { |
| 492 | double mean = 0., m2 = 0.; |
| 493 | unsigned int i; |
| 494 | |
| 495 | if (stats->mean_variance_valid) |
| 496 | return; |
| 497 | |
| 498 | for (i = 0; i < stats->n_values; i++) { |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 499 | double delta = unsorted_value(stats, i) - mean; |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 500 | |
| 501 | mean += delta / (i + 1); |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 502 | m2 += delta * (unsorted_value(stats, i) - mean); |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 503 | } |
| 504 | |
| 505 | stats->mean = mean; |
Damien Lespiau | 3a5cf84 | 2015-06-26 17:02:09 +0100 | [diff] [blame] | 506 | if (stats->n_values > 1 && !stats->is_population) |
| 507 | stats->variance = m2 / (stats->n_values - 1); |
| 508 | else |
| 509 | stats->variance = m2 / stats->n_values; |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 510 | stats->mean_variance_valid = true; |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 511 | } |
| 512 | |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 513 | /** |
| 514 | * igt_stats_get_mean: |
| 515 | * @stats: An #igt_stats_t instance |
| 516 | * |
| 517 | * Retrieves the mean of the @stats dataset. |
| 518 | */ |
Damien Lespiau | e55a11d | 2015-06-25 23:44:20 +0100 | [diff] [blame] | 519 | double igt_stats_get_mean(igt_stats_t *stats) |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 520 | { |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 521 | igt_stats_knuth_mean_variance(stats); |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 522 | |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 523 | return stats->mean; |
| 524 | } |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 525 | |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 526 | /** |
| 527 | * igt_stats_get_variance: |
| 528 | * @stats: An #igt_stats_t instance |
| 529 | * |
| 530 | * Retrieves the variance of the @stats dataset. |
| 531 | */ |
Damien Lespiau | 05c10f9 | 2015-06-25 23:57:49 +0100 | [diff] [blame] | 532 | double igt_stats_get_variance(igt_stats_t *stats) |
| 533 | { |
| 534 | igt_stats_knuth_mean_variance(stats); |
| 535 | |
| 536 | return stats->variance; |
Damien Lespiau | 06f5f70 | 2015-06-25 12:07:56 +0100 | [diff] [blame] | 537 | } |
Damien Lespiau | 515cec1 | 2015-06-25 23:59:21 +0100 | [diff] [blame] | 538 | |
Damien Lespiau | a2f6fd3 | 2015-06-26 16:57:55 +0100 | [diff] [blame] | 539 | /** |
| 540 | * igt_stats_get_std_deviation: |
| 541 | * @stats: An #igt_stats_t instance |
| 542 | * |
| 543 | * Retrieves the standard deviation of the @stats dataset. |
| 544 | */ |
Damien Lespiau | 515cec1 | 2015-06-25 23:59:21 +0100 | [diff] [blame] | 545 | double igt_stats_get_std_deviation(igt_stats_t *stats) |
| 546 | { |
| 547 | igt_stats_knuth_mean_variance(stats); |
| 548 | |
| 549 | return sqrt(stats->variance); |
| 550 | } |
Chris Wilson | 19135a3 | 2015-07-01 13:50:02 +0100 | [diff] [blame] | 551 | |
| 552 | /** |
| 553 | * igt_stats_get_iqm: |
| 554 | * @stats: An #igt_stats_t instance |
| 555 | * |
Damien Lespiau | 643aab2 | 2015-07-02 00:16:07 +0100 | [diff] [blame] | 556 | * Retrieves the |
| 557 | * [interquartile mean](https://en.wikipedia.org/wiki/Interquartile_mean) (IQM) |
| 558 | * of the @stats dataset. |
Chris Wilson | 19135a3 | 2015-07-01 13:50:02 +0100 | [diff] [blame] | 559 | * |
| 560 | * The interquartile mean is a "statistical measure of central tendency". |
| 561 | * It is a truncated mean that discards the lowest and highest 25% of values, |
| 562 | * and calculates the mean value of the remaining central values. |
Damien Lespiau | 643aab2 | 2015-07-02 00:16:07 +0100 | [diff] [blame] | 563 | * |
| 564 | * It's useful to hide outliers in measurements (due to cold cache etc). |
Chris Wilson | 19135a3 | 2015-07-01 13:50:02 +0100 | [diff] [blame] | 565 | */ |
| 566 | double igt_stats_get_iqm(igt_stats_t *stats) |
| 567 | { |
| 568 | unsigned int q1, q3, i; |
| 569 | double mean; |
| 570 | |
| 571 | igt_stats_ensure_sorted_values(stats); |
| 572 | |
| 573 | q1 = (stats->n_values + 3) / 4; |
| 574 | q3 = 3 * stats->n_values / 4; |
| 575 | |
| 576 | mean = 0; |
| 577 | for (i = 0; i <= q3 - q1; i++) |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 578 | mean += (sorted_value(stats, q1 + i) - mean) / (i + 1); |
Chris Wilson | 19135a3 | 2015-07-01 13:50:02 +0100 | [diff] [blame] | 579 | |
| 580 | if (stats->n_values % 4) { |
| 581 | double rem = .5 * (stats->n_values % 4) / 4; |
| 582 | |
| 583 | q1 = (stats->n_values) / 4; |
| 584 | q3 = (3 * stats->n_values + 3) / 4; |
| 585 | |
Chris Wilson | 8506cdc | 2015-07-19 15:01:42 +0100 | [diff] [blame] | 586 | mean += rem * (sorted_value(stats, q1) - mean) / i++; |
| 587 | mean += rem * (sorted_value(stats, q3) - mean) / i++; |
Chris Wilson | 19135a3 | 2015-07-01 13:50:02 +0100 | [diff] [blame] | 588 | } |
| 589 | |
| 590 | return mean; |
| 591 | } |
Chris Wilson | 2d305f6 | 2015-07-01 18:52:46 +0100 | [diff] [blame] | 592 | |
| 593 | /** |
| 594 | * igt_stats_get_trimean: |
| 595 | * @stats: An #igt_stats_t instance |
| 596 | * |
Damien Lespiau | 643aab2 | 2015-07-02 00:16:07 +0100 | [diff] [blame] | 597 | * Retrieves the [trimean](https://en.wikipedia.org/wiki/Trimean) of the @stats |
| 598 | * dataset. |
Chris Wilson | 2d305f6 | 2015-07-01 18:52:46 +0100 | [diff] [blame] | 599 | * |
| 600 | * The trimean is a the most efficient 3-point L-estimator, even more |
| 601 | * robust than the median at estimating the average of a sample population. |
| 602 | */ |
| 603 | double igt_stats_get_trimean(igt_stats_t *stats) |
| 604 | { |
| 605 | double q1, q2, q3; |
| 606 | igt_stats_get_quartiles(stats, &q1, &q2, &q3); |
| 607 | return (q1 + 2*q2 + q3) / 4; |
| 608 | } |
Daniel Vetter | 4755804 | 2016-07-27 14:04:17 +0200 | [diff] [blame] | 609 | |
| 610 | /** |
| 611 | * igt_mean_init: |
| 612 | * @m: tracking structure |
| 613 | * |
| 614 | * Initializes or resets @m. |
| 615 | */ |
| 616 | void igt_mean_init(struct igt_mean *m) |
| 617 | { |
| 618 | memset(m, 0, sizeof(*m)); |
| 619 | m->max = -HUGE_VAL; |
| 620 | m->min = HUGE_VAL; |
| 621 | } |
| 622 | |
| 623 | /** |
| 624 | * igt_mean_add: |
| 625 | * @m: tracking structure |
| 626 | * @v: value |
| 627 | * |
| 628 | * Adds a new value @v to @m. |
| 629 | */ |
| 630 | void igt_mean_add(struct igt_mean *m, double v) |
| 631 | { |
| 632 | double delta = v - m->mean; |
| 633 | m->mean += delta / ++m->count; |
| 634 | m->sq += delta * (v - m->mean); |
| 635 | if (v < m->min) |
| 636 | m->min = v; |
| 637 | if (v > m->max) |
| 638 | m->max = v; |
| 639 | } |
| 640 | |
| 641 | /** |
| 642 | * igt_mean_get: |
| 643 | * @m: tracking structure |
| 644 | * |
| 645 | * Computes the current mean of the samples tracked in @m. |
| 646 | */ |
| 647 | double igt_mean_get(struct igt_mean *m) |
| 648 | { |
| 649 | return m->mean; |
| 650 | } |
| 651 | |
| 652 | /** |
| 653 | * igt_mean_get_variance: |
| 654 | * @m: tracking structure |
| 655 | * |
| 656 | * Computes the current variance of the samples tracked in @m. |
| 657 | */ |
| 658 | double igt_mean_get_variance(struct igt_mean *m) |
| 659 | { |
| 660 | return m->sq / m->count; |
| 661 | } |
| 662 | |