blob: 2859977b7f37dfe48f47ab76a553e1fbea4e2d4b [file] [log] [blame]
Martin KaFai Lau5db58fa2016-11-11 10:55:11 -08001/*
2 * Copyright (c) 2016 Facebook
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8#define _GNU_SOURCE
9#include <linux/types.h>
10#include <stdio.h>
11#include <unistd.h>
12#include <linux/bpf.h>
13#include <errno.h>
14#include <string.h>
15#include <assert.h>
16#include <sched.h>
17#include <sys/wait.h>
18#include <sys/stat.h>
19#include <fcntl.h>
20#include <stdlib.h>
21#include <time.h>
22#include "libbpf.h"
23
24#define min(a, b) ((a) < (b) ? (a) : (b))
25#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
26#define container_of(ptr, type, member) ({ \
27 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
28 (type *)( (char *)__mptr - offsetof(type,member) );})
29
30static int nr_cpus;
31static unsigned long long *dist_keys;
32static unsigned int dist_key_counts;
33
34struct list_head {
35 struct list_head *next, *prev;
36};
37
38static inline void INIT_LIST_HEAD(struct list_head *list)
39{
40 list->next = list;
41 list->prev = list;
42}
43
44static inline int list_empty(const struct list_head *head)
45{
46 return head->next == head;
47}
48
49static inline void __list_add(struct list_head *new,
50 struct list_head *prev,
51 struct list_head *next)
52{
53 next->prev = new;
54 new->next = next;
55 new->prev = prev;
56 prev->next = new;
57}
58
59static inline void list_add(struct list_head *new, struct list_head *head)
60{
61 __list_add(new, head, head->next);
62}
63
64static inline void __list_del(struct list_head *prev, struct list_head *next)
65{
66 next->prev = prev;
67 prev->next = next;
68}
69
70static inline void __list_del_entry(struct list_head *entry)
71{
72 __list_del(entry->prev, entry->next);
73}
74
75static inline void list_move(struct list_head *list, struct list_head *head)
76{
77 __list_del_entry(list);
78 list_add(list, head);
79}
80
81#define list_entry(ptr, type, member) \
82 container_of(ptr, type, member)
83
84#define list_last_entry(ptr, type, member) \
85 list_entry((ptr)->prev, type, member)
86
87struct pfect_lru_node {
88 struct list_head list;
89 unsigned long long key;
90};
91
92struct pfect_lru {
93 struct list_head list;
94 struct pfect_lru_node *free_nodes;
95 unsigned int cur_size;
96 unsigned int lru_size;
97 unsigned int nr_unique;
98 unsigned int nr_misses;
99 unsigned int total;
100 int map_fd;
101};
102
103static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
104 unsigned int nr_possible_elems)
105{
106 lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
107 sizeof(unsigned long long),
108 sizeof(struct pfect_lru_node *),
109 nr_possible_elems, 0);
110 assert(lru->map_fd != -1);
111
112 lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
113 assert(lru->free_nodes);
114
115 INIT_LIST_HEAD(&lru->list);
116 lru->cur_size = 0;
117 lru->lru_size = lru_size;
118 lru->nr_unique = lru->nr_misses = lru->total = 0;
119}
120
121static void pfect_lru_destroy(struct pfect_lru *lru)
122{
123 close(lru->map_fd);
124 free(lru->free_nodes);
125}
126
127static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
128 unsigned long long key)
129{
130 struct pfect_lru_node *node = NULL;
131 int seen = 0;
132
133 lru->total++;
134 if (!bpf_lookup_elem(lru->map_fd, &key, &node)) {
135 if (node) {
136 list_move(&node->list, &lru->list);
137 return 1;
138 }
139 seen = 1;
140 }
141
142 if (lru->cur_size < lru->lru_size) {
143 node = &lru->free_nodes[lru->cur_size++];
144 INIT_LIST_HEAD(&node->list);
145 } else {
146 struct pfect_lru_node *null_node = NULL;
147
148 node = list_last_entry(&lru->list,
149 struct pfect_lru_node,
150 list);
151 bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
152 }
153
154 node->key = key;
155 list_move(&node->list, &lru->list);
156
157 lru->nr_misses++;
158 if (seen) {
159 assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
160 } else {
161 lru->nr_unique++;
162 assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
163 }
164
165 return seen;
166}
167
168static unsigned int read_keys(const char *dist_file,
169 unsigned long long **keys)
170{
171 struct stat fst;
172 unsigned long long *retkeys;
173 unsigned int counts = 0;
174 int dist_fd;
175 char *b, *l;
176 int i;
177
178 dist_fd = open(dist_file, 0);
179 assert(dist_fd != -1);
180
181 assert(fstat(dist_fd, &fst) == 0);
182 b = malloc(fst.st_size);
183 assert(b);
184
185 assert(read(dist_fd, b, fst.st_size) == fst.st_size);
186 close(dist_fd);
187 for (i = 0; i < fst.st_size; i++) {
188 if (b[i] == '\n')
189 counts++;
190 }
191 counts++; /* in case the last line has no \n */
192
193 retkeys = malloc(counts * sizeof(unsigned long long));
194 assert(retkeys);
195
196 counts = 0;
197 for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
198 retkeys[counts++] = strtoull(l, NULL, 10);
199 free(b);
200
201 *keys = retkeys;
202
203 return counts;
204}
205
206static int create_map(int map_type, int map_flags, unsigned int size)
207{
208 int map_fd;
209
210 map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
211 sizeof(unsigned long long), size, map_flags);
212
213 if (map_fd == -1)
214 perror("bpf_create_map");
215
216 return map_fd;
217}
218
219static int sched_next_online(int pid, int next_to_try)
220{
221 cpu_set_t cpuset;
222
223 if (next_to_try == nr_cpus)
224 return -1;
225
226 while (next_to_try < nr_cpus) {
227 CPU_ZERO(&cpuset);
228 CPU_SET(next_to_try++, &cpuset);
229 if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
230 break;
231 }
232
233 return next_to_try;
234}
235
236static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
237 void *data)
238{
239 int next_sched_cpu = 0;
240 pid_t pid[tasks];
241 int i;
242
243 for (i = 0; i < tasks; i++) {
244 pid[i] = fork();
245 if (pid[i] == 0) {
246 next_sched_cpu = sched_next_online(0, next_sched_cpu);
247 fn(i, data);
248 exit(0);
249 } else if (pid[i] == -1) {
250 printf("couldn't spawn #%d process\n", i);
251 exit(1);
252 }
253 /* It is mostly redundant and just allow the parent
254 * process to update next_shced_cpu for the next child
255 * process
256 */
257 next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
258 }
259 for (i = 0; i < tasks; i++) {
260 int status;
261
262 assert(waitpid(pid[i], &status, 0) == pid[i]);
263 assert(status == 0);
264 }
265}
266
267static void do_test_lru_dist(int task, void *data)
268{
269 unsigned int nr_misses = 0;
270 struct pfect_lru pfect_lru;
271 unsigned long long key, value = 1234;
272 unsigned int i;
273
274 unsigned int lru_map_fd = ((unsigned int *)data)[0];
275 unsigned int lru_size = ((unsigned int *)data)[1];
276 unsigned long long key_offset = task * dist_key_counts;
277
278 pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
279
280 for (i = 0; i < dist_key_counts; i++) {
281 key = dist_keys[i] + key_offset;
282
283 pfect_lru_lookup_or_insert(&pfect_lru, key);
284
285 if (!bpf_lookup_elem(lru_map_fd, &key, &value))
286 continue;
287
288 if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
289 printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n",
290 key, errno);
291 assert(0);
292 }
293
294 nr_misses++;
295 }
296
297 printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
298 task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
299 dist_key_counts);
300 printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
301 task, pfect_lru.nr_unique, pfect_lru.total,
302 pfect_lru.nr_misses, pfect_lru.total);
303
304 pfect_lru_destroy(&pfect_lru);
305 close(lru_map_fd);
306}
307
308static void test_parallel_lru_dist(int map_type, int map_flags,
309 int nr_tasks, unsigned int lru_size)
310{
311 int child_data[2];
312 int lru_map_fd;
313
314 printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
315 map_flags);
316
317 if (map_flags & BPF_F_NO_COMMON_LRU)
318 lru_map_fd = create_map(map_type, map_flags,
319 nr_cpus * lru_size);
320 else
321 lru_map_fd = create_map(map_type, map_flags,
322 nr_tasks * lru_size);
323 assert(lru_map_fd != -1);
324
325 child_data[0] = lru_map_fd;
326 child_data[1] = lru_size;
327
328 run_parallel(nr_tasks, do_test_lru_dist, child_data);
329
330 close(lru_map_fd);
331}
332
333static void test_lru_loss0(int map_type, int map_flags)
334{
335 unsigned long long key, value[nr_cpus];
336 unsigned int old_unused_losses = 0;
337 unsigned int new_unused_losses = 0;
338 unsigned int used_losses = 0;
339 int map_fd;
340
341 printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
342 map_flags);
343
344 assert(sched_next_online(0, 0) != -1);
345
346 if (map_flags & BPF_F_NO_COMMON_LRU)
347 map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
348 else
349 map_fd = create_map(map_type, map_flags, 900);
350
351 assert(map_fd != -1);
352
353 value[0] = 1234;
354
355 for (key = 1; key <= 1000; key++) {
356 int start_key, end_key;
357
358 assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
359
360 start_key = 101;
361 end_key = min(key, 900);
362
363 while (start_key <= end_key) {
364 bpf_lookup_elem(map_fd, &start_key, value);
365 start_key++;
366 }
367 }
368
369 for (key = 1; key <= 1000; key++) {
370 if (bpf_lookup_elem(map_fd, &key, value)) {
371 if (key <= 100)
372 old_unused_losses++;
373 else if (key <= 900)
374 used_losses++;
375 else
376 new_unused_losses++;
377 }
378 }
379
380 close(map_fd);
381
382 printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
383 "newer-elem-losses:%d(/100)\n",
384 old_unused_losses, used_losses, new_unused_losses);
385}
386
387static void test_lru_loss1(int map_type, int map_flags)
388{
389 unsigned long long key, value[nr_cpus];
390 int map_fd;
391 unsigned int nr_losses = 0;
392
393 printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
394 map_flags);
395
396 assert(sched_next_online(0, 0) != -1);
397
398 if (map_flags & BPF_F_NO_COMMON_LRU)
399 map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
400 else
401 map_fd = create_map(map_type, map_flags, 1000);
402
403 assert(map_fd != -1);
404
405 value[0] = 1234;
406
407 for (key = 1; key <= 1000; key++)
408 assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST));
409
410 for (key = 1; key <= 1000; key++) {
411 if (bpf_lookup_elem(map_fd, &key, value))
412 nr_losses++;
413 }
414
415 close(map_fd);
416
417 printf("nr_losses:%d(/1000)\n", nr_losses);
418}
419
420static void do_test_parallel_lru_loss(int task, void *data)
421{
422 const unsigned int nr_stable_elems = 1000;
423 const unsigned int nr_repeats = 100000;
424
425 int map_fd = *(int *)data;
426 unsigned long long stable_base;
427 unsigned long long key, value[nr_cpus];
428 unsigned long long next_ins_key;
429 unsigned int nr_losses = 0;
430 unsigned int i;
431
432 stable_base = task * nr_repeats * 2 + 1;
433 next_ins_key = stable_base;
434 value[0] = 1234;
435 for (i = 0; i < nr_stable_elems; i++) {
436 assert(bpf_update_elem(map_fd, &next_ins_key, value,
437 BPF_NOEXIST) == 0);
438 next_ins_key++;
439 }
440
441 for (i = 0; i < nr_repeats; i++) {
442 int rn;
443
444 rn = rand();
445
446 if (rn % 10) {
447 key = rn % nr_stable_elems + stable_base;
448 bpf_lookup_elem(map_fd, &key, value);
449 } else {
450 bpf_update_elem(map_fd, &next_ins_key, value,
451 BPF_NOEXIST);
452 next_ins_key++;
453 }
454 }
455
456 key = stable_base;
457 for (i = 0; i < nr_stable_elems; i++) {
458 if (bpf_lookup_elem(map_fd, &key, value))
459 nr_losses++;
460 key++;
461 }
462
463 printf(" task:%d nr_losses:%u\n", task, nr_losses);
464}
465
466static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
467{
468 int map_fd;
469
470 printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
471 map_flags);
472
473 /* Give 20% more than the active working set */
474 if (map_flags & BPF_F_NO_COMMON_LRU)
475 map_fd = create_map(map_type, map_flags,
476 nr_cpus * (1000 + 200));
477 else
478 map_fd = create_map(map_type, map_flags,
479 nr_tasks * (1000 + 200));
480
481 assert(map_fd != -1);
482
483 run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
484
485 close(map_fd);
486}
487
488int main(int argc, char **argv)
489{
490 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
491 int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
492 const char *dist_file;
493 int nr_tasks = 1;
494 int lru_size;
495 int f;
496
497 if (argc < 4) {
498 printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
499 argv[0]);
500 return -1;
501 }
502
503 dist_file = argv[1];
504 lru_size = atoi(argv[2]);
505 nr_tasks = atoi(argv[3]);
506
507 setbuf(stdout, NULL);
508
509 assert(!setrlimit(RLIMIT_MEMLOCK, &r));
510
511 srand(time(NULL));
512
513 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
514 assert(nr_cpus != -1);
515 printf("nr_cpus:%d\n\n", nr_cpus);
516
517 nr_tasks = min(nr_tasks, nr_cpus);
518
519 dist_key_counts = read_keys(dist_file, &dist_keys);
520 if (!dist_key_counts) {
521 printf("%s has no key\n", dist_file);
522 return -1;
523 }
524
525 for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
526 test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
527 test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
528 test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
529 nr_tasks);
530 test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
531 nr_tasks, lru_size);
532 printf("\n");
533 }
534
535 free(dist_keys);
536
537 return 0;
538}