blob: 1eadc491dc49ee5acae91bde937e4bc1100dcefa [file] [log] [blame]
Jan Stancek6ce219c2012-10-26 15:31:17 +02001/*
2 * Copyright (C) 2012 Linux Test Project, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it
13 * is free of the rightful claim of any third person regarding
14 * infringement or the like. Any license provided herein, whether
15 * implied or otherwise, applies only to this software file. Patent
16 * licenses, if any, provided herein do not apply to combinations of
17 * this program with other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 * 02110-1301, USA.
23 */
24
25/*
26 * use migrate_pages() and check that address is on correct node
27 * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
28 * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
29 * 3. process A can migrate shared mem only with CAP_SYS_NICE
30 * 4. process A can migrate non-shared mem in process B with same effective uid
31 * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
32 */
33#include <sys/types.h>
34#include <sys/syscall.h>
35#include <sys/wait.h>
36#include <sys/mman.h>
37#include <errno.h>
38#if HAVE_NUMA_H
39#include <numa.h>
40#endif
41#if HAVE_NUMAIF_H
42#include <numaif.h>
43#endif
44#include <stdio.h>
45#include <stdlib.h>
46#include <unistd.h>
47#include <pwd.h>
48#include "config.h"
49#include "test.h"
50#include "usctest.h"
51#include "safe_macros.h"
52#include "linux_syscall_numbers.h"
53#include "numa_helper.h"
54#include "migrate_pages_common.h"
55
56/*
57 * This is an estimated minimum of free mem required to migrate this
58 * process to another node as migrate_pages will fail if there is not
59 * enough free space on node. While running this test on x86_64
60 * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
61 * architecture with largest (non-huge) page size (16k), this limit
62 * is set to 2048*16k == 32M.
63 */
64#define NODE_MIN_FREEMEM (32*1024*1024)
65
66char *TCID = "migrate_pages02";
Wanlong Gao354ebb42012-12-07 10:10:04 +080067int TST_TOTAL = 1;
Jan Stancek6ce219c2012-10-26 15:31:17 +020068
69#if defined(__NR_migrate_pages) && HAVE_NUMA_H && HAVE_NUMAIF_H
70static const char nobody_uid[] = "nobody";
71static struct passwd *ltpuser;
72static int *nodes, nodeA, nodeB;
73static int num_nodes;
74
75static void setup(void);
76static void cleanup(void);
77
78option_t options[] = {
Wanlong Gao354ebb42012-12-07 10:10:04 +080079 {NULL, NULL, NULL}
Jan Stancek6ce219c2012-10-26 15:31:17 +020080};
81
82static void print_mem_stats(pid_t pid, int node)
83{
84 char s[64];
85 long long node_size, freep;
86
87 if (pid == 0)
88 pid = getpid();
89
90 tst_resm(TINFO, "mem_stats pid: %d, node: %d", pid, node);
91
92 /* dump pid's VM info */
93 sprintf(s, "cat /proc/%d/status", pid);
94 system(s);
95 sprintf(s, "cat /proc/%d/numa_maps", pid);
96 system(s);
97
98 /* dump node free mem */
99 node_size = numa_node_size64(node, &freep);
100 tst_resm(TINFO, "Node id: %d, size: %lld, free: %lld",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800101 node, node_size, freep);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200102}
103
104static int migrate_to_node(pid_t pid, int node)
105{
106 unsigned long nodemask_size, max_node;
107 unsigned long *old_nodes, *new_nodes;
108 int i;
109
110 tst_resm(TINFO, "pid(%d) migrate pid %d to node -> %d",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800111 getpid(), pid, node);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200112 max_node = get_max_node();
Wanlong Gao354ebb42012-12-07 10:10:04 +0800113 nodemask_size = max_node / 8 + 1;
Jan Stancek6ce219c2012-10-26 15:31:17 +0200114 old_nodes = SAFE_MALLOC(NULL, nodemask_size);
115 new_nodes = SAFE_MALLOC(NULL, nodemask_size);
116
117 memset(old_nodes, 0, nodemask_size);
118 memset(new_nodes, 0, nodemask_size);
119 for (i = 0; i < num_nodes; i++)
120 set_bit(old_nodes, nodes[i], 1);
121 set_bit(new_nodes, node, 1);
122
Jan Stancek359980f2013-02-15 10:16:05 +0100123 TEST(ltp_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
124 new_nodes));
Jan Stancek6ce219c2012-10-26 15:31:17 +0200125 if (TEST_RETURN != 0) {
126 if (TEST_RETURN < 0)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800127 tst_resm(TFAIL | TERRNO, "migrate_pages failed "
128 "ret: %ld, ", TEST_RETURN);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200129 else
130 tst_resm(TWARN, "migrate_pages could not migrate all "
Wanlong Gao354ebb42012-12-07 10:10:04 +0800131 "pages, not migrated: %ld", TEST_RETURN);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200132 print_mem_stats(pid, node);
133 }
134 free(old_nodes);
135 free(new_nodes);
136 return TEST_RETURN;
137}
138
139static int addr_on_node(void *addr)
140{
141 int node;
142 int ret;
143
Jan Stancek359980f2013-02-15 10:16:05 +0100144 ret = ltp_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800145 (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200146 if (ret == -1) {
147 tst_resm(TBROK | TERRNO, "error getting memory policy "
Wanlong Gao354ebb42012-12-07 10:10:04 +0800148 "for page %p", addr);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200149 }
150 return node;
151}
152
153static int check_addr_on_node(void *addr, int exp_node)
154{
155 int node;
156
157 node = addr_on_node(addr);
158 if (node == exp_node) {
159 tst_resm(TPASS, "pid(%d) addr %p is on expected node: %d",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800160 getpid(), addr, exp_node);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200161 return 0;
162 } else {
163 tst_resm(TFAIL, "pid(%d) addr %p not on expected node: %d "
Wanlong Gao354ebb42012-12-07 10:10:04 +0800164 ", expected %d", getpid(), addr, node, exp_node);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200165 print_mem_stats(0, exp_node);
166 return 1;
167 }
168}
169
Wanlong Gao354ebb42012-12-07 10:10:04 +0800170static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
Jan Stancek6ce219c2012-10-26 15:31:17 +0200171{
172 char *testp, *testp2;
173 int ret, status;
174 pid_t child;
175
176 /* parent can migrate its non-shared memory */
177 tst_resm(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
178 testp = SAFE_MALLOC(NULL, getpagesize());
179 testp[0] = 0;
180 tst_resm(TINFO, "private anonymous: %p", testp);
181 migrate_to_node(0, node2);
182 check_addr_on_node(testp, node2);
183 migrate_to_node(0, node1);
184 check_addr_on_node(testp, node1);
185 free(testp);
186
187 /* parent can migrate shared memory with CAP_SYS_NICE */
Wanlong Gao354ebb42012-12-07 10:10:04 +0800188 testp2 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
189 MAP_ANONYMOUS | MAP_SHARED, 0, 0);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200190 if (testp2 == MAP_FAILED)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800191 tst_brkm(TBROK | TERRNO, cleanup, "mmap failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200192 testp2[0] = 1;
193 tst_resm(TINFO, "shared anonymous: %p", testp2);
194 migrate_to_node(0, node2);
195 check_addr_on_node(testp2, node2);
196
197 /* shared mem is on node2, try to migrate in child to node1 */
198 fflush(stdout);
199 child = fork();
200 switch (child) {
201 case -1:
Wanlong Gao354ebb42012-12-07 10:10:04 +0800202 tst_brkm(TBROK | TERRNO, cleanup, "fork");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200203 break;
204 case 0:
205 tst_resm(TINFO, "child shared anonymous, cap_sys_nice: %d",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800206 cap_sys_nice);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200207 testp = SAFE_MALLOC(NULL, getpagesize());
208 testp[0] = 1;
209 testp2[0] = 1;
210 if (!cap_sys_nice)
211 if (seteuid(ltpuser->pw_uid) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800212 tst_brkm(TBROK | TERRNO, NULL,
213 "seteuid failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200214
215 migrate_to_node(0, node1);
216 /* child can migrate non-shared memory */
217 ret = check_addr_on_node(testp, node1);
218
219 free(testp);
220 munmap(testp2, getpagesize());
221 exit(ret);
222 default:
223 if (waitpid(child, &status, 0) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800224 tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200225 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
226 tst_resm(TFAIL, "child returns %d", status);
227 if (cap_sys_nice)
228 /* child can migrate shared memory only
229 * with CAP_SYS_NICE */
230 check_addr_on_node(testp2, node1);
231 else
232 check_addr_on_node(testp2, node2);
233 munmap(testp2, getpagesize());
234 }
235}
236
Wanlong Gao354ebb42012-12-07 10:10:04 +0800237static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
Jan Stancek6ce219c2012-10-26 15:31:17 +0200238{
239 char *testp;
240 int status, ret, tmp;
241 pid_t child;
242 int child_ready[2];
243 int pages_migrated[2];
244
245 /* setup pipes to synchronize child/parent */
246 if (pipe(child_ready) == -1)
247 tst_resm(TBROK | TERRNO, "pipe #1 failed");
248 if (pipe(pages_migrated) == -1)
249 tst_resm(TBROK | TERRNO, "pipe #2 failed");
250
251 tst_resm(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
252
253 fflush(stdout);
254 child = fork();
255 switch (child) {
256 case -1:
Wanlong Gao354ebb42012-12-07 10:10:04 +0800257 tst_brkm(TBROK | TERRNO, cleanup, "fork");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200258 break;
259 case 0:
260 close(child_ready[0]);
261 close(pages_migrated[1]);
262
263 testp = SAFE_MALLOC(NULL, getpagesize());
264 testp[0] = 0;
265
266 /* make sure we are on node1 */
267 migrate_to_node(0, node1);
268 check_addr_on_node(testp, node1);
269
270 if (seteuid(ltpuser->pw_uid) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800271 tst_brkm(TBROK | TERRNO, NULL, "seteuid failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200272
273 /* signal parent it's OK to migrate child and wait */
274 if (write(child_ready[1], &tmp, 1) != 1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800275 tst_brkm(TBROK | TERRNO, NULL, "write #1 failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200276 if (read(pages_migrated[0], &tmp, 1) != 1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800277 tst_brkm(TBROK | TERRNO, NULL, "read #1 failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200278
279 /* parent can migrate child process with same euid */
280 /* parent can migrate child process with CAP_SYS_NICE */
281 ret = check_addr_on_node(testp, node2);
282
283 free(testp);
284 close(child_ready[1]);
285 close(pages_migrated[0]);
286 exit(ret);
287 default:
288 close(child_ready[1]);
289 close(pages_migrated[0]);
290
291 if (!cap_sys_nice)
292 if (seteuid(ltpuser->pw_uid) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800293 tst_brkm(TBROK | TERRNO, NULL,
294 "seteuid failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200295
296 /* wait until child is ready on node1, then migrate and
297 * signal to check current node */
298 if (read(child_ready[0], &tmp, 1) != 1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800299 tst_brkm(TBROK | TERRNO, NULL, "read #2 failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200300 migrate_to_node(child, node2);
301 if (write(pages_migrated[1], &tmp, 1) != 1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800302 tst_brkm(TBROK | TERRNO, NULL, "write #2 failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200303
304 if (waitpid(child, &status, 0) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800305 tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200306 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
307 tst_resm(TFAIL, "child returns %d", status);
308 close(child_ready[0]);
309 close(pages_migrated[1]);
310
311 /* reset euid, so this testcase can be used in loop */
312 if (!cap_sys_nice)
313 if (seteuid(0) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800314 tst_brkm(TBROK | TERRNO, NULL,
315 "seteuid failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200316 }
317}
318
319int main(int argc, char *argv[])
320{
321 int lc;
322 char *msg;
323
324 msg = parse_opts(argc, argv, options, NULL);
325 if (msg != NULL)
326 tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);
327
328 setup();
329 for (lc = 0; TEST_LOOPING(lc); lc++) {
330 Tst_count = 0;
331 test_migrate_current_process(nodeA, nodeB, 1);
332 test_migrate_current_process(nodeA, nodeB, 0);
333 test_migrate_other_process(nodeA, nodeB, 1);
334 test_migrate_other_process(nodeA, nodeB, 0);
335 }
336 cleanup();
337 tst_exit();
338}
339
340static void setup(void)
341{
342 int ret, i, j;
343 int pagesize = getpagesize();
344 void *p;
345
346 tst_require_root(NULL);
Jan Stancek359980f2013-02-15 10:16:05 +0100347 TEST(ltp_syscall(__NR_migrate_pages, 0, 0, NULL, NULL));
Jan Stancek6ce219c2012-10-26 15:31:17 +0200348
349 if (numa_available() == -1)
350 tst_brkm(TCONF, NULL, "NUMA not available");
351
352 ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
353 if (ret < 0)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800354 tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes(): %d", ret);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200355
356 if (num_nodes < 2)
357 tst_brkm(TCONF, NULL, "at least 2 allowed NUMA nodes"
Wanlong Gao354ebb42012-12-07 10:10:04 +0800358 " are required");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200359 else if (tst_kvercmp(2, 6, 18) < 0)
360 tst_brkm(TCONF, NULL, "2.6.18 or greater kernel required");
361
362 /*
363 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
364 * The reason is that:
365 * 1. migrate_pages() is expected to succeed
366 * 2. this test avoids hitting:
367 * Bug 870326 - migrate_pages() reports success, but pages are
368 * not moved to desired node
369 * https://bugzilla.redhat.com/show_bug.cgi?id=870326
370 */
371 nodeA = nodeB = -1;
372 for (i = 0; i < num_nodes; i++) {
373 p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
374 if (p == NULL)
375 break;
376 memset(p, 0xff, NODE_MIN_FREEMEM);
377
378 j = 0;
379 while (j < NODE_MIN_FREEMEM) {
Wanlong Gao354ebb42012-12-07 10:10:04 +0800380 if (addr_on_node(p + j) != nodes[i])
Jan Stancek6ce219c2012-10-26 15:31:17 +0200381 break;
382 j += pagesize;
383 }
384 numa_free(p, NODE_MIN_FREEMEM);
385
386 if (j >= NODE_MIN_FREEMEM) {
387 if (nodeA == -1)
388 nodeA = nodes[i];
389 else if (nodeB == -1)
390 nodeB = nodes[i];
391 else
392 break;
393 }
394 }
395
396 if (nodeA == -1 || nodeB == -1)
397 tst_brkm(TCONF, NULL, "at least 2 NUMA nodes with "
Wanlong Gao354ebb42012-12-07 10:10:04 +0800398 "free mem > %d are needed", NODE_MIN_FREEMEM);
Jan Stancek6ce219c2012-10-26 15:31:17 +0200399 tst_resm(TINFO, "Using nodes: %d %d", nodeA, nodeB);
400
401 ltpuser = getpwnam(nobody_uid);
402 if (ltpuser == NULL)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800403 tst_brkm(TBROK | TERRNO, NULL, "getpwnam failed");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200404
405 TEST_PAUSE;
406}
407
408static void cleanup(void)
409{
410 free(nodes);
411 TEST_CLEANUP;
412}
413
414#else /* __NR_migrate_pages */
415int main(void)
416{
417 tst_brkm(TCONF, NULL, "System doesn't support __NR_migrate_pages"
Wanlong Gao354ebb42012-12-07 10:10:04 +0800418 " or libnuma is not available");
Jan Stancek6ce219c2012-10-26 15:31:17 +0200419}
420#endif