blob: c79c372db2da48c29461d56397b8ee5eb49fe79e [file] [log] [blame]
Andrea Arcangelic47174f2015-09-04 15:47:23 -07001/*
2 * Stress userfaultfd syscall.
3 *
4 * Copyright (C) 2015 Red Hat, Inc.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
8 *
9 * This test allocates two virtual areas and bounces the physical
10 * memory across the two virtual areas (from area_src to area_dst)
11 * using userfaultfd.
12 *
13 * There are three threads running per CPU:
14 *
15 * 1) one per-CPU thread takes a per-page pthread_mutex in a random
16 * page of the area_dst (while the physical page may still be in
17 * area_src), and increments a per-page counter in the same page,
18 * and checks its value against a verification region.
19 *
20 * 2) another per-CPU thread handles the userfaults generated by
21 * thread 1 above. userfaultfd blocking reads or poll() modes are
22 * exercised interleaved.
23 *
24 * 3) one last per-CPU thread transfers the memory in the background
25 * at maximum bandwidth (if not already transferred by thread
26 * 2). Each cpu thread takes cares of transferring a portion of the
27 * area.
28 *
29 * When all threads of type 3 completed the transfer, one bounce is
30 * complete. area_src and area_dst are then swapped. All threads are
31 * respawned and so the bounce is immediately restarted in the
32 * opposite direction.
33 *
34 * per-CPU threads 1 by triggering userfaults inside
35 * pthread_mutex_lock will also verify the atomicity of the memory
36 * transfer (UFFDIO_COPY).
37 *
38 * The program takes two parameters: the amounts of physical memory in
39 * megabytes (MiB) of the area and the number of bounces to execute.
40 *
41 * # 100MiB 99999 bounces
42 * ./userfaultfd 100 99999
43 *
44 * # 1GiB 99 bounces
45 * ./userfaultfd 1000 99
46 *
47 * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
48 * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
49 */
50
51#define _GNU_SOURCE
52#include <stdio.h>
53#include <errno.h>
54#include <unistd.h>
55#include <stdlib.h>
56#include <sys/types.h>
57#include <sys/stat.h>
58#include <fcntl.h>
59#include <time.h>
60#include <signal.h>
61#include <poll.h>
62#include <string.h>
63#include <sys/mman.h>
64#include <sys/syscall.h>
65#include <sys/ioctl.h>
66#include <pthread.h>
Thierry Redingd0a87112015-09-22 14:58:52 -070067#include <linux/userfaultfd.h>
Andrea Arcangelic47174f2015-09-04 15:47:23 -070068
Michael Ellerman56ed8f12015-09-22 14:58:58 -070069#ifdef __NR_userfaultfd
Andrea Arcangelic47174f2015-09-04 15:47:23 -070070
71static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
72
73#define BOUNCE_RANDOM (1<<0)
74#define BOUNCE_RACINGFAULTS (1<<1)
75#define BOUNCE_VERIFY (1<<2)
76#define BOUNCE_POLL (1<<3)
77static int bounces;
78
Mike Kravetz9903bd72017-02-22 15:43:07 -080079#ifdef HUGETLB_TEST
80static int huge_fd;
81static char *huge_fd_off0;
82#endif
Andrea Arcangelic47174f2015-09-04 15:47:23 -070083static unsigned long long *count_verify;
Mike Rapoport6228b8f2017-02-22 15:44:01 -080084static int uffd, uffd_flags, finished, *pipefd;
Andrea Arcangelic47174f2015-09-04 15:47:23 -070085static char *area_src, *area_dst;
86static char *zeropage;
87pthread_attr_t attr;
88
89/* pthread_mutex_t starts at page offset 0 */
90#define area_mutex(___area, ___nr) \
91 ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
92/*
93 * count is placed in the page after pthread_mutex_t naturally aligned
94 * to avoid non alignment faults on non-x86 archs.
95 */
96#define area_count(___area, ___nr) \
97 ((volatile unsigned long long *) ((unsigned long) \
98 ((___area) + (___nr)*page_size + \
99 sizeof(pthread_mutex_t) + \
100 sizeof(unsigned long long) - 1) & \
101 ~(unsigned long)(sizeof(unsigned long long) \
102 - 1)))
103
Mike Rapoport419624d2017-02-22 15:43:46 -0800104#if !defined(HUGETLB_TEST) && !defined(SHMEM_TEST)
Mike Kravetz9903bd72017-02-22 15:43:07 -0800105
Mike Rapoport419624d2017-02-22 15:43:46 -0800106/* Anonymous memory */
Mike Kravetz9903bd72017-02-22 15:43:07 -0800107#define EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
108 (1 << _UFFDIO_COPY) | \
109 (1 << _UFFDIO_ZEROPAGE))
110
111static int release_pages(char *rel_area)
112{
113 int ret = 0;
114
115 if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
116 perror("madvise");
117 ret = 1;
118 }
119
120 return ret;
121}
122
123static void allocate_area(void **alloc_area)
124{
125 if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) {
126 fprintf(stderr, "out of memory\n");
127 *alloc_area = NULL;
128 }
129}
130
Mike Rapoport419624d2017-02-22 15:43:46 -0800131#else /* HUGETLB_TEST or SHMEM_TEST */
Mike Kravetz9903bd72017-02-22 15:43:07 -0800132
Mike Rapoportcac67322017-02-22 15:43:40 -0800133#define EXPECTED_IOCTLS UFFD_API_RANGE_IOCTLS_BASIC
Mike Kravetz9903bd72017-02-22 15:43:07 -0800134
Mike Rapoport419624d2017-02-22 15:43:46 -0800135#ifdef HUGETLB_TEST
136
137/* HugeTLB memory */
Mike Kravetz9903bd72017-02-22 15:43:07 -0800138static int release_pages(char *rel_area)
139{
140 int ret = 0;
141
142 if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
143 rel_area == huge_fd_off0 ? 0 :
144 nr_pages * page_size,
145 nr_pages * page_size)) {
146 perror("fallocate");
147 ret = 1;
148 }
149
150 return ret;
151}
152
153
154static void allocate_area(void **alloc_area)
155{
156 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
157 MAP_PRIVATE | MAP_HUGETLB, huge_fd,
158 *alloc_area == area_src ? 0 :
159 nr_pages * page_size);
160 if (*alloc_area == MAP_FAILED) {
161 fprintf(stderr, "mmap of hugetlbfs file failed\n");
162 *alloc_area = NULL;
163 }
164
165 if (*alloc_area == area_src)
166 huge_fd_off0 = *alloc_area;
167}
168
Mike Rapoport419624d2017-02-22 15:43:46 -0800169#elif defined(SHMEM_TEST)
170
171/* Shared memory */
172static int release_pages(char *rel_area)
173{
174 int ret = 0;
175
176 if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
177 perror("madvise");
178 ret = 1;
179 }
180
181 return ret;
182}
183
184static void allocate_area(void **alloc_area)
185{
186 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
187 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
188 if (*alloc_area == MAP_FAILED) {
189 fprintf(stderr, "shared memory mmap failed\n");
190 *alloc_area = NULL;
191 }
192}
193
194#else /* SHMEM_TEST */
195#error "Undefined test type"
Mike Kravetz9903bd72017-02-22 15:43:07 -0800196#endif /* HUGETLB_TEST */
197
Mike Rapoport419624d2017-02-22 15:43:46 -0800198#endif /* !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) */
199
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700200static int my_bcmp(char *str1, char *str2, size_t n)
201{
202 unsigned long i;
203 for (i = 0; i < n; i++)
204 if (str1[i] != str2[i])
205 return 1;
206 return 0;
207}
208
209static void *locking_thread(void *arg)
210{
211 unsigned long cpu = (unsigned long) arg;
212 struct random_data rand;
213 unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
214 int32_t rand_nr;
215 unsigned long long count;
216 char randstate[64];
217 unsigned int seed;
218 time_t start;
219
220 if (bounces & BOUNCE_RANDOM) {
221 seed = (unsigned int) time(NULL) - bounces;
222 if (!(bounces & BOUNCE_RACINGFAULTS))
223 seed += cpu;
224 bzero(&rand, sizeof(rand));
225 bzero(&randstate, sizeof(randstate));
226 if (initstate_r(seed, randstate, sizeof(randstate), &rand))
227 fprintf(stderr, "srandom_r error\n"), exit(1);
228 } else {
229 page_nr = -bounces;
230 if (!(bounces & BOUNCE_RACINGFAULTS))
231 page_nr += cpu * nr_pages_per_cpu;
232 }
233
234 while (!finished) {
235 if (bounces & BOUNCE_RANDOM) {
236 if (random_r(&rand, &rand_nr))
237 fprintf(stderr, "random_r 1 error\n"), exit(1);
238 page_nr = rand_nr;
239 if (sizeof(page_nr) > sizeof(rand_nr)) {
240 if (random_r(&rand, &rand_nr))
241 fprintf(stderr, "random_r 2 error\n"), exit(1);
Geert Uytterhoevenaf8713b2015-09-08 14:58:25 -0700242 page_nr |= (((unsigned long) rand_nr) << 16) <<
243 16;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700244 }
245 } else
246 page_nr += 1;
247 page_nr %= nr_pages;
248
249 start = time(NULL);
250 if (bounces & BOUNCE_VERIFY) {
251 count = *area_count(area_dst, page_nr);
252 if (!count)
253 fprintf(stderr,
254 "page_nr %lu wrong count %Lu %Lu\n",
255 page_nr, count,
256 count_verify[page_nr]), exit(1);
257
258
259 /*
260 * We can't use bcmp (or memcmp) because that
261 * returns 0 erroneously if the memory is
262 * changing under it (even if the end of the
263 * page is never changing and always
264 * different).
265 */
266#if 1
267 if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
268 page_size))
269 fprintf(stderr,
270 "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
271 page_nr, count,
272 count_verify[page_nr]), exit(1);
273#else
274 unsigned long loops;
275
276 loops = 0;
277 /* uncomment the below line to test with mutex */
278 /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
279 while (!bcmp(area_dst + page_nr * page_size, zeropage,
280 page_size)) {
281 loops += 1;
282 if (loops > 10)
283 break;
284 }
285 /* uncomment below line to test with mutex */
286 /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
287 if (loops) {
288 fprintf(stderr,
289 "page_nr %lu all zero thread %lu %p %lu\n",
290 page_nr, cpu, area_dst + page_nr * page_size,
291 loops);
292 if (loops > 10)
293 exit(1);
294 }
295#endif
296 }
297
298 pthread_mutex_lock(area_mutex(area_dst, page_nr));
299 count = *area_count(area_dst, page_nr);
300 if (count != count_verify[page_nr]) {
301 fprintf(stderr,
302 "page_nr %lu memory corruption %Lu %Lu\n",
303 page_nr, count,
304 count_verify[page_nr]), exit(1);
305 }
306 count++;
307 *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
308 pthread_mutex_unlock(area_mutex(area_dst, page_nr));
309
310 if (time(NULL) - start > 1)
311 fprintf(stderr,
312 "userfault too slow %ld "
313 "possible false positive with overcommit\n",
314 time(NULL) - start);
315 }
316
317 return NULL;
318}
319
Mike Rapoportaa0d2722017-02-22 15:44:04 -0800320static int copy_page(int ufd, unsigned long offset)
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700321{
322 struct uffdio_copy uffdio_copy;
323
324 if (offset >= nr_pages * page_size)
325 fprintf(stderr, "unexpected offset %lu\n",
326 offset), exit(1);
327 uffdio_copy.dst = (unsigned long) area_dst + offset;
328 uffdio_copy.src = (unsigned long) area_src + offset;
329 uffdio_copy.len = page_size;
330 uffdio_copy.mode = 0;
331 uffdio_copy.copy = 0;
Mike Rapoportaa0d2722017-02-22 15:44:04 -0800332 if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700333 /* real retval in ufdio_copy.copy */
334 if (uffdio_copy.copy != -EEXIST)
335 fprintf(stderr, "UFFDIO_COPY error %Ld\n",
336 uffdio_copy.copy), exit(1);
337 } else if (uffdio_copy.copy != page_size) {
338 fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
339 uffdio_copy.copy), exit(1);
340 } else
341 return 1;
342 return 0;
343}
344
345static void *uffd_poll_thread(void *arg)
346{
347 unsigned long cpu = (unsigned long) arg;
348 struct pollfd pollfd[2];
349 struct uffd_msg msg;
350 int ret;
351 unsigned long offset;
352 char tmp_chr;
353 unsigned long userfaults = 0;
354
355 pollfd[0].fd = uffd;
356 pollfd[0].events = POLLIN;
357 pollfd[1].fd = pipefd[cpu*2];
358 pollfd[1].events = POLLIN;
359
360 for (;;) {
361 ret = poll(pollfd, 2, -1);
362 if (!ret)
363 fprintf(stderr, "poll error %d\n", ret), exit(1);
364 if (ret < 0)
365 perror("poll"), exit(1);
366 if (pollfd[1].revents & POLLIN) {
367 if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
368 fprintf(stderr, "read pipefd error\n"),
369 exit(1);
370 break;
371 }
372 if (!(pollfd[0].revents & POLLIN))
373 fprintf(stderr, "pollfd[0].revents %d\n",
374 pollfd[0].revents), exit(1);
375 ret = read(uffd, &msg, sizeof(msg));
376 if (ret < 0) {
377 if (errno == EAGAIN)
378 continue;
379 perror("nonblocking read error"), exit(1);
380 }
381 if (msg.event != UFFD_EVENT_PAGEFAULT)
382 fprintf(stderr, "unexpected msg event %u\n",
383 msg.event), exit(1);
384 if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
385 fprintf(stderr, "unexpected write fault\n"), exit(1);
Geert Uytterhoevenaf8713b2015-09-08 14:58:25 -0700386 offset = (char *)(unsigned long)msg.arg.pagefault.address -
387 area_dst;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700388 offset &= ~(page_size-1);
Mike Rapoportaa0d2722017-02-22 15:44:04 -0800389 if (copy_page(uffd, offset))
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700390 userfaults++;
391 }
392 return (void *)userfaults;
393}
394
395pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
396
397static void *uffd_read_thread(void *arg)
398{
399 unsigned long *this_cpu_userfaults;
400 struct uffd_msg msg;
401 unsigned long offset;
402 int ret;
403
404 this_cpu_userfaults = (unsigned long *) arg;
405 *this_cpu_userfaults = 0;
406
407 pthread_mutex_unlock(&uffd_read_mutex);
408 /* from here cancellation is ok */
409
410 for (;;) {
411 ret = read(uffd, &msg, sizeof(msg));
412 if (ret != sizeof(msg)) {
413 if (ret < 0)
414 perror("blocking read error"), exit(1);
415 else
416 fprintf(stderr, "short read\n"), exit(1);
417 }
418 if (msg.event != UFFD_EVENT_PAGEFAULT)
419 fprintf(stderr, "unexpected msg event %u\n",
420 msg.event), exit(1);
421 if (bounces & BOUNCE_VERIFY &&
422 msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
423 fprintf(stderr, "unexpected write fault\n"), exit(1);
Geert Uytterhoevenaf8713b2015-09-08 14:58:25 -0700424 offset = (char *)(unsigned long)msg.arg.pagefault.address -
425 area_dst;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700426 offset &= ~(page_size-1);
Mike Rapoportaa0d2722017-02-22 15:44:04 -0800427 if (copy_page(uffd, offset))
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700428 (*this_cpu_userfaults)++;
429 }
430 return (void *)NULL;
431}
432
433static void *background_thread(void *arg)
434{
435 unsigned long cpu = (unsigned long) arg;
436 unsigned long page_nr;
437
438 for (page_nr = cpu * nr_pages_per_cpu;
439 page_nr < (cpu+1) * nr_pages_per_cpu;
440 page_nr++)
Mike Rapoportaa0d2722017-02-22 15:44:04 -0800441 copy_page(uffd, page_nr * page_size);
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700442
443 return NULL;
444}
445
446static int stress(unsigned long *userfaults)
447{
448 unsigned long cpu;
449 pthread_t locking_threads[nr_cpus];
450 pthread_t uffd_threads[nr_cpus];
451 pthread_t background_threads[nr_cpus];
452 void **_userfaults = (void **) userfaults;
453
454 finished = 0;
455 for (cpu = 0; cpu < nr_cpus; cpu++) {
456 if (pthread_create(&locking_threads[cpu], &attr,
457 locking_thread, (void *)cpu))
458 return 1;
459 if (bounces & BOUNCE_POLL) {
460 if (pthread_create(&uffd_threads[cpu], &attr,
461 uffd_poll_thread, (void *)cpu))
462 return 1;
463 } else {
464 if (pthread_create(&uffd_threads[cpu], &attr,
465 uffd_read_thread,
466 &_userfaults[cpu]))
467 return 1;
468 pthread_mutex_lock(&uffd_read_mutex);
469 }
470 if (pthread_create(&background_threads[cpu], &attr,
471 background_thread, (void *)cpu))
472 return 1;
473 }
474 for (cpu = 0; cpu < nr_cpus; cpu++)
475 if (pthread_join(background_threads[cpu], NULL))
476 return 1;
477
478 /*
479 * Be strict and immediately zap area_src, the whole area has
480 * been transferred already by the background treads. The
481 * area_src could then be faulted in in a racy way by still
482 * running uffdio_threads reading zeropages after we zapped
483 * area_src (but they're guaranteed to get -EEXIST from
484 * UFFDIO_COPY without writing zero pages into area_dst
485 * because the background threads already completed).
486 */
Mike Kravetz9903bd72017-02-22 15:43:07 -0800487 if (release_pages(area_src))
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700488 return 1;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700489
490 for (cpu = 0; cpu < nr_cpus; cpu++) {
491 char c;
492 if (bounces & BOUNCE_POLL) {
493 if (write(pipefd[cpu*2+1], &c, 1) != 1) {
494 fprintf(stderr, "pipefd write error\n");
495 return 1;
496 }
497 if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
498 return 1;
499 } else {
500 if (pthread_cancel(uffd_threads[cpu]))
501 return 1;
502 if (pthread_join(uffd_threads[cpu], NULL))
503 return 1;
504 }
505 }
506
507 finished = 1;
508 for (cpu = 0; cpu < nr_cpus; cpu++)
509 if (pthread_join(locking_threads[cpu], NULL))
510 return 1;
511
512 return 0;
513}
514
Mike Rapoport6228b8f2017-02-22 15:44:01 -0800515static int userfaultfd_open(void)
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700516{
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700517 struct uffdio_api uffdio_api;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700518
519 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
520 if (uffd < 0) {
521 fprintf(stderr,
522 "userfaultfd syscall not available in this kernel\n");
523 return 1;
524 }
525 uffd_flags = fcntl(uffd, F_GETFD, NULL);
526
527 uffdio_api.api = UFFD_API;
528 uffdio_api.features = 0;
529 if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
530 fprintf(stderr, "UFFDIO_API\n");
531 return 1;
532 }
533 if (uffdio_api.api != UFFD_API) {
534 fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
535 return 1;
536 }
537
Mike Rapoport6228b8f2017-02-22 15:44:01 -0800538 return 0;
539}
540
541static int userfaultfd_stress(void)
542{
543 void *area;
544 char *tmp_area;
545 unsigned long nr;
546 struct uffdio_register uffdio_register;
547 unsigned long cpu;
548 int err;
549 unsigned long userfaults[nr_cpus];
550
551 allocate_area((void **)&area_src);
552 if (!area_src)
553 return 1;
554 allocate_area((void **)&area_dst);
555 if (!area_dst)
556 return 1;
557
558 if (userfaultfd_open() < 0)
559 return 1;
560
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700561 count_verify = malloc(nr_pages * sizeof(unsigned long long));
562 if (!count_verify) {
563 perror("count_verify");
564 return 1;
565 }
566
567 for (nr = 0; nr < nr_pages; nr++) {
568 *area_mutex(area_src, nr) = (pthread_mutex_t)
569 PTHREAD_MUTEX_INITIALIZER;
570 count_verify[nr] = *area_count(area_src, nr) = 1;
Andrea Arcangeli1f5fee22015-09-22 14:59:00 -0700571 /*
572 * In the transition between 255 to 256, powerpc will
573 * read out of order in my_bcmp and see both bytes as
574 * zero, so leave a placeholder below always non-zero
575 * after the count, to avoid my_bcmp to trigger false
576 * positives.
577 */
578 *(area_count(area_src, nr) + 1) = 1;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700579 }
580
581 pipefd = malloc(sizeof(int) * nr_cpus * 2);
582 if (!pipefd) {
583 perror("pipefd");
584 return 1;
585 }
586 for (cpu = 0; cpu < nr_cpus; cpu++) {
587 if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
588 perror("pipe");
589 return 1;
590 }
591 }
592
593 if (posix_memalign(&area, page_size, page_size)) {
594 fprintf(stderr, "out of memory\n");
595 return 1;
596 }
597 zeropage = area;
598 bzero(zeropage, page_size);
599
600 pthread_mutex_lock(&uffd_read_mutex);
601
602 pthread_attr_init(&attr);
603 pthread_attr_setstacksize(&attr, 16*1024*1024);
604
Andrea Arcangelia5932bf2015-09-22 14:59:03 -0700605 err = 0;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700606 while (bounces--) {
607 unsigned long expected_ioctls;
608
609 printf("bounces: %d, mode:", bounces);
610 if (bounces & BOUNCE_RANDOM)
611 printf(" rnd");
612 if (bounces & BOUNCE_RACINGFAULTS)
613 printf(" racing");
614 if (bounces & BOUNCE_VERIFY)
615 printf(" ver");
616 if (bounces & BOUNCE_POLL)
617 printf(" poll");
618 printf(", ");
619 fflush(stdout);
620
621 if (bounces & BOUNCE_POLL)
622 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
623 else
624 fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
625
626 /* register */
627 uffdio_register.range.start = (unsigned long) area_dst;
628 uffdio_register.range.len = nr_pages * page_size;
629 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
630 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
631 fprintf(stderr, "register failure\n");
632 return 1;
633 }
Mike Kravetz9903bd72017-02-22 15:43:07 -0800634 expected_ioctls = EXPECTED_IOCTLS;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700635 if ((uffdio_register.ioctls & expected_ioctls) !=
636 expected_ioctls) {
637 fprintf(stderr,
638 "unexpected missing ioctl for anon memory\n");
639 return 1;
640 }
641
642 /*
643 * The madvise done previously isn't enough: some
644 * uffd_thread could have read userfaults (one of
645 * those already resolved by the background thread)
646 * and it may be in the process of calling
647 * UFFDIO_COPY. UFFDIO_COPY will read the zapped
648 * area_src and it would map a zero page in it (of
649 * course such a UFFDIO_COPY is perfectly safe as it'd
650 * return -EEXIST). The problem comes at the next
651 * bounce though: that racing UFFDIO_COPY would
652 * generate zeropages in the area_src, so invalidating
653 * the previous MADV_DONTNEED. Without this additional
654 * MADV_DONTNEED those zeropages leftovers in the
655 * area_src would lead to -EEXIST failure during the
656 * next bounce, effectively leaving a zeropage in the
657 * area_dst.
658 *
659 * Try to comment this out madvise to see the memory
660 * corruption being caught pretty quick.
661 *
662 * khugepaged is also inhibited to collapse THP after
663 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
664 * required to MADV_DONTNEED here.
665 */
Mike Kravetz9903bd72017-02-22 15:43:07 -0800666 if (release_pages(area_dst))
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700667 return 1;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700668
669 /* bounce pass */
670 if (stress(userfaults))
671 return 1;
672
673 /* unregister */
674 if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
675 fprintf(stderr, "register failure\n");
676 return 1;
677 }
678
679 /* verification */
680 if (bounces & BOUNCE_VERIFY) {
681 for (nr = 0; nr < nr_pages; nr++) {
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700682 if (*area_count(area_dst, nr) != count_verify[nr]) {
683 fprintf(stderr,
684 "error area_count %Lu %Lu %lu\n",
685 *area_count(area_src, nr),
686 count_verify[nr],
687 nr);
Andrea Arcangelia5932bf2015-09-22 14:59:03 -0700688 err = 1;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700689 bounces = 0;
690 }
691 }
692 }
693
694 /* prepare next bounce */
695 tmp_area = area_src;
696 area_src = area_dst;
697 area_dst = tmp_area;
698
699 printf("userfaults:");
700 for (cpu = 0; cpu < nr_cpus; cpu++)
701 printf(" %lu", userfaults[cpu]);
702 printf("\n");
703 }
704
Andrea Arcangelia5932bf2015-09-22 14:59:03 -0700705 return err;
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700706}
707
Mike Kravetz9903bd72017-02-22 15:43:07 -0800708#ifndef HUGETLB_TEST
709
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700710int main(int argc, char **argv)
711{
712 if (argc < 3)
713 fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
714 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
715 page_size = sysconf(_SC_PAGE_SIZE);
Andrea Arcangeli1f5fee22015-09-22 14:59:00 -0700716 if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
717 > page_size)
Andrea Arcangelic47174f2015-09-04 15:47:23 -0700718 fprintf(stderr, "Impossible to run this test\n"), exit(2);
719 nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size /
720 nr_cpus;
721 if (!nr_pages_per_cpu) {
722 fprintf(stderr, "invalid MiB\n");
723 fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
724 }
725 bounces = atoi(argv[2]);
726 if (bounces <= 0) {
727 fprintf(stderr, "invalid bounces\n");
728 fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
729 }
730 nr_pages = nr_pages_per_cpu * nr_cpus;
731 printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
732 nr_pages, nr_pages_per_cpu);
733 return userfaultfd_stress();
734}
Michael Ellerman56ed8f12015-09-22 14:58:58 -0700735
Mike Kravetz9903bd72017-02-22 15:43:07 -0800736#else /* HUGETLB_TEST */
737
738/*
739 * Copied from mlock2-tests.c
740 */
741unsigned long default_huge_page_size(void)
742{
743 unsigned long hps = 0;
744 char *line = NULL;
745 size_t linelen = 0;
746 FILE *f = fopen("/proc/meminfo", "r");
747
748 if (!f)
749 return 0;
750 while (getline(&line, &linelen, f) > 0) {
751 if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
752 hps <<= 10;
753 break;
754 }
755 }
756
757 free(line);
758 fclose(f);
759 return hps;
760}
761
762int main(int argc, char **argv)
763{
764 if (argc < 4)
765 fprintf(stderr, "Usage: <MiB> <bounces> <hugetlbfs_file>\n"),
766 exit(1);
767 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
768 page_size = default_huge_page_size();
769 if (!page_size)
770 fprintf(stderr, "Unable to determine huge page size\n"),
771 exit(2);
772 if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
773 > page_size)
774 fprintf(stderr, "Impossible to run this test\n"), exit(2);
775 nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size /
776 nr_cpus;
777 if (!nr_pages_per_cpu) {
778 fprintf(stderr, "invalid MiB\n");
779 fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
780 }
781 bounces = atoi(argv[2]);
782 if (bounces <= 0) {
783 fprintf(stderr, "invalid bounces\n");
784 fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
785 }
786 nr_pages = nr_pages_per_cpu * nr_cpus;
787 huge_fd = open(argv[3], O_CREAT | O_RDWR, 0755);
788 if (huge_fd < 0) {
789 fprintf(stderr, "Open of %s failed", argv[3]);
790 perror("open");
791 exit(1);
792 }
793 if (ftruncate(huge_fd, 0)) {
794 fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
795 perror("ftruncate");
796 exit(1);
797 }
798 printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
799 nr_pages, nr_pages_per_cpu);
800 return userfaultfd_stress();
801}
802
803#endif
Michael Ellerman56ed8f12015-09-22 14:58:58 -0700804#else /* __NR_userfaultfd */
805
806#warning "missing __NR_userfaultfd definition"
807
808int main(void)
809{
810 printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
811 return 0;
812}
813
814#endif /* __NR_userfaultfd */