blob: 37cf18feda161b1e06d4705c99e3c46077f4bf98 [file] [log] [blame]
Garrett Cooperecd667e2011-01-19 01:06:18 -08001#include "config.h"
Caspar Zhang6009edf2012-02-20 18:42:36 +08002#include <sys/types.h>
3#include <sys/mman.h>
4#include <sys/mount.h>
5#include <sys/stat.h>
6#include <sys/wait.h>
7#include <errno.h>
8#include <fcntl.h>
9#if HAVE_NUMA_H
Zhouping Liu9b70f092012-02-15 23:18:48 +080010#include <numa.h>
Caspar Zhang6009edf2012-02-20 18:42:36 +080011#endif
12#if HAVE_NUMAIF_H
Garrett Cooperecd667e2011-01-19 01:06:18 -080013#include <numaif.h>
14#endif
Caspar Zhang6009edf2012-02-20 18:42:36 +080015#include <stdarg.h>
16#include <stdio.h>
17#include <string.h>
18#include <unistd.h>
19
20#include "test.h"
Caspar Zhang6009edf2012-02-20 18:42:36 +080021#include "safe_macros.h"
Zhouping Liu448c1ee2013-03-19 11:40:49 +080022#include "safe_file_ops.h"
Caspar Zhang79667fa2012-03-12 14:41:46 +080023#include "mem.h"
Caspar Zhanga98ac192012-08-09 14:15:41 +080024#include "numa_helper.h"
Caspar Zhang6009edf2012-02-20 18:42:36 +080025
Caspar Zhanga7f0eed2012-02-28 15:44:04 +080026/* OOM */
Caspar Zhang6009edf2012-02-20 18:42:36 +080027
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +080028static int alloc_mem(long int length, int testcase)
Caspar Zhang6009edf2012-02-20 18:42:36 +080029{
Jan Stancek7adb6ce2014-07-29 14:04:04 +020030 char *s;
31 long i, pagesz = getpagesize();
Caspar Zhang6009edf2012-02-20 18:42:36 +080032
33 tst_resm(TINFO, "allocating %ld bytes.", length);
Jan Stancek30ac84c2014-07-25 09:58:47 +020034
Wanlong Gao354ebb42012-12-07 10:10:04 +080035 s = mmap(NULL, length, PROT_READ | PROT_WRITE,
36 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
Jan Stancek30ac84c2014-07-25 09:58:47 +020037 if (s == MAP_FAILED)
38 return errno;
39
Caspar Zhang6009edf2012-02-20 18:42:36 +080040 if (testcase == MLOCK && mlock(s, length) == -1)
Jan Stancek30ac84c2014-07-25 09:58:47 +020041 return errno;
Caspar Zhang6009edf2012-02-20 18:42:36 +080042#ifdef HAVE_MADV_MERGEABLE
Wanlong Gao354ebb42012-12-07 10:10:04 +080043 if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
Jan Stancek30ac84c2014-07-25 09:58:47 +020044 return errno;
Caspar Zhang6009edf2012-02-20 18:42:36 +080045#endif
Jan Stancek7adb6ce2014-07-29 14:04:04 +020046 for (i = 0; i < length; i += pagesz)
47 s[i] = '\a';
Caspar Zhang6009edf2012-02-20 18:42:36 +080048
49 return 0;
50}
51
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +080052static void test_alloc(int testcase, int lite)
Caspar Zhang6009edf2012-02-20 18:42:36 +080053{
Jan Stancek30ac84c2014-07-25 09:58:47 +020054 int ret;
55
56 if (lite) {
57 ret = alloc_mem(TESTMEM + MB, testcase);
58 } else {
59 ret = 0;
60 while (!ret)
61 ret = alloc_mem(LENGTH, testcase);
62 }
63 exit(ret);
Caspar Zhang6009edf2012-02-20 18:42:36 +080064}
Garrett Cooperecd667e2011-01-19 01:06:18 -080065
Jan Stancek30ac84c2014-07-25 09:58:47 +020066/*
67 * oom - allocates memory according to specified testcase and checks
68 * desired outcome (e.g. child killed, operation failed with ENOMEM)
69 * @testcase: selects how child allocates memory
70 * valid choices are: OVERCOMMIT, NORMAL, MLOCK and KSM
71 * @lite: if non-zero, child makes only single TESTMEM+MB allocation
72 * if zero, child keeps allocating memory until it gets killed
73 * or some operation fails
74 * @retcode: expected return code of child process
75 * if matches child ret code, this function reports PASS,
76 * otherwise it reports FAIL
77 * @allow_sigkill: if zero and child is killed, this function reports FAIL
78 * if non-zero, then if child is killed by SIGKILL
79 * it is considered as PASS
80 */
81void oom(int testcase, int lite, int retcode, int allow_sigkill)
Garrett Cooperecd667e2011-01-19 01:06:18 -080082{
83 pid_t pid;
84 int status;
Garrett Cooperecd667e2011-01-19 01:06:18 -080085
Caspar Zhang05697c42012-02-20 19:20:57 +080086 switch (pid = fork()) {
Garrett Cooperecd667e2011-01-19 01:06:18 -080087 case -1:
Wanlong Gao354ebb42012-12-07 10:10:04 +080088 tst_brkm(TBROK | TERRNO, cleanup, "fork");
Garrett Cooperecd667e2011-01-19 01:06:18 -080089 case 0:
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +080090 test_alloc(testcase, lite);
Garrett Cooperecd667e2011-01-19 01:06:18 -080091 default:
92 break;
93 }
Zhouping Liu62d50b72013-03-19 11:40:50 +080094
Garrett Cooperecd667e2011-01-19 01:06:18 -080095 tst_resm(TINFO, "expected victim is %d.", pid);
96 if (waitpid(-1, &status, 0) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +080097 tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
Garrett Cooperecd667e2011-01-19 01:06:18 -080098
Jan Stancek30ac84c2014-07-25 09:58:47 +020099 if (WIFSIGNALED(status)) {
100 if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
101 tst_resm(TPASS, "victim signalled: (%d) %s",
102 SIGKILL,
103 tst_strsig(SIGKILL));
104 } else {
105 tst_resm(TFAIL, "victim signalled: (%d) %s",
106 WTERMSIG(status),
107 tst_strsig(WTERMSIG(status)));
108 }
109 } else if (WIFEXITED(status) && WEXITSTATUS(status) == retcode) {
110 tst_resm(TPASS, "victim retcode: (%d) %s",
111 retcode, strerror(retcode));
Garrett Cooperecd667e2011-01-19 01:06:18 -0800112 } else {
Jan Stancek30ac84c2014-07-25 09:58:47 +0200113 tst_resm(TFAIL, "victim unexpectedly ended with retcode: %d, "
114 "expected: %d", WEXITSTATUS(status), retcode);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800115 }
116}
117
Zhouping Liud6b67f82013-04-16 22:30:08 +0800118static void set_global_mempolicy(int mempolicy)
Caspar Zhang6009edf2012-02-20 18:42:36 +0800119{
Zhouping Liu62d50b72013-03-19 11:40:50 +0800120#if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
121 && HAVE_MPOL_CONSTANTS
Lans Zhangcb2967b2013-04-17 16:35:27 +0800122 unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
Zhouping Liu65ca1bd2013-05-13 16:43:28 +0800123 int num_nodes, *nodes;
Zhouping Liu62d50b72013-03-19 11:40:50 +0800124 int ret;
125
126 if (mempolicy) {
127 ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
128 if (ret != 0)
129 tst_brkm(TBROK|TERRNO, cleanup,
130 "get_allowed_nodes_arr");
131 if (num_nodes < 2) {
132 tst_resm(TINFO, "mempolicy need NUMA system support");
133 free(nodes);
134 return;
135 }
136 switch(mempolicy) {
137 case MPOL_BIND:
138 /* bind the second node */
Lans Zhangcb2967b2013-04-17 16:35:27 +0800139 set_node(nmask, nodes[1]);
Zhouping Liu62d50b72013-03-19 11:40:50 +0800140 break;
141 case MPOL_INTERLEAVE:
142 case MPOL_PREFERRED:
143 if (num_nodes == 2) {
144 tst_resm(TINFO, "The mempolicy need "
145 "more than 2 numa nodes");
146 free(nodes);
147 return;
148 } else {
149 /* Using the 2nd,3rd node */
Lans Zhangcb2967b2013-04-17 16:35:27 +0800150 set_node(nmask, nodes[1]);
151 set_node(nmask, nodes[2]);
Zhouping Liu62d50b72013-03-19 11:40:50 +0800152 }
153 break;
154 default:
155 tst_brkm(TBROK|TERRNO, cleanup, "Bad mempolicy mode");
156 }
Lans Zhangcb2967b2013-04-17 16:35:27 +0800157 if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
Zhouping Liu62d50b72013-03-19 11:40:50 +0800158 tst_brkm(TBROK|TERRNO, cleanup, "set_mempolicy");
159 }
160#endif
Zhouping Liud6b67f82013-04-16 22:30:08 +0800161}
162
Jan Stancek30ac84c2014-07-25 09:58:47 +0200163void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
Zhouping Liud6b67f82013-04-16 22:30:08 +0800164{
165 set_global_mempolicy(mempolicy);
Caspar Zhang54616fd2012-02-20 20:10:02 +0800166
Caspar Zhang6009edf2012-02-20 18:42:36 +0800167 tst_resm(TINFO, "start normal OOM testing.");
Jan Stancek30ac84c2014-07-25 09:58:47 +0200168 oom(NORMAL, lite, retcode, allow_sigkill);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800169
170 tst_resm(TINFO, "start OOM testing for mlocked pages.");
Jan Stancek30ac84c2014-07-25 09:58:47 +0200171 oom(MLOCK, lite, retcode, allow_sigkill);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800172
Xiaoguang Wangfc0e1392014-04-28 20:26:09 +0800173 if (access(PATH_KSM, F_OK) == -1) {
174 tst_resm(TINFO, "KSM configuration is not enabled, "
175 "skip OOM test for KSM pags");
176 } else {
177 tst_resm(TINFO, "start OOM testing for KSM pages.");
Jan Stancek30ac84c2014-07-25 09:58:47 +0200178 oom(KSM, lite, retcode, allow_sigkill);
Xiaoguang Wangfc0e1392014-04-28 20:26:09 +0800179 }
Caspar Zhang6009edf2012-02-20 18:42:36 +0800180}
181
Caspar Zhanga7f0eed2012-02-28 15:44:04 +0800182/* KSM */
Caspar Zhang6009edf2012-02-20 18:42:36 +0800183
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800184static void check(char *path, long int value)
Caspar Zhang6009edf2012-02-20 18:42:36 +0800185{
Zhouping Liue0963392013-04-23 15:12:55 +0800186 char fullpath[BUFSIZ];
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800187 long actual_val;
Caspar Zhang6009edf2012-02-20 18:42:36 +0800188
Zhouping Liue0963392013-04-23 15:12:55 +0800189 snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
190 SAFE_FILE_SCANF(cleanup, fullpath, "%ld", &actual_val);
Zhouping Liuf9ae09a2012-06-14 12:01:49 +0800191
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800192 tst_resm(TINFO, "%s is %ld.", path, actual_val);
193 if (actual_val != value)
Caspar Zhang6009edf2012-02-20 18:42:36 +0800194 tst_resm(TFAIL, "%s is not %ld.", path, value);
195}
196
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800197static void wait_ksmd_done(void)
Zhouping Liu60c41a52012-06-14 12:01:29 +0800198{
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800199 long pages_shared, pages_sharing, pages_volatile, pages_unshared;
200 long old_pages_shared = 0, old_pages_sharing = 0;
201 long old_pages_volatile = 0, old_pages_unshared = 0;
202 int changing = 1, count = 0;
Zhouping Liu60c41a52012-06-14 12:01:29 +0800203
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800204 while (changing) {
Caspar Zhang5ad42f62012-07-09 17:51:53 +0800205 sleep(10);
Zhouping Liu60c41a52012-06-14 12:01:29 +0800206 count++;
207
Zhouping Liue0963392013-04-23 15:12:55 +0800208 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_shared",
209 "%ld", &pages_shared);
Zhouping Liu60c41a52012-06-14 12:01:29 +0800210
Zhouping Liue0963392013-04-23 15:12:55 +0800211 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_sharing",
212 "%ld", &pages_sharing);
Zhouping Liu60c41a52012-06-14 12:01:29 +0800213
Zhouping Liue0963392013-04-23 15:12:55 +0800214 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_volatile",
215 "%ld", &pages_volatile);
Zhouping Liu60c41a52012-06-14 12:01:29 +0800216
Zhouping Liue0963392013-04-23 15:12:55 +0800217 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_unshared",
218 "%ld", &pages_unshared);
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800219
220 if (pages_shared != old_pages_shared ||
221 pages_sharing != old_pages_sharing ||
222 pages_volatile != old_pages_volatile ||
223 pages_unshared != old_pages_unshared) {
Wanlong Gao354ebb42012-12-07 10:10:04 +0800224 old_pages_shared = pages_shared;
225 old_pages_sharing = pages_sharing;
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800226 old_pages_volatile = pages_volatile;
Zhouping Liu60c41a52012-06-14 12:01:29 +0800227 old_pages_unshared = pages_unshared;
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800228 } else {
229 changing = 0;
Zhouping Liu60c41a52012-06-14 12:01:29 +0800230 }
231 }
232
Caspar Zhangf838d3e2012-07-09 17:51:52 +0800233 tst_resm(TINFO, "ksm daemon takes %ds to scan all mergeable pages",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800234 count * 10);
Zhouping Liu60c41a52012-06-14 12:01:29 +0800235}
236
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800237static void group_check(int run, int pages_shared, int pages_sharing,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800238 int pages_volatile, int pages_unshared,
239 int sleep_millisecs, int pages_to_scan)
Garrett Cooperecd667e2011-01-19 01:06:18 -0800240{
Zhouping Liu60c41a52012-06-14 12:01:29 +0800241 /* wait for ksm daemon to scan all mergeable pages. */
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800242 wait_ksmd_done();
Garrett Cooperecd667e2011-01-19 01:06:18 -0800243
Caspar Zhang6009edf2012-02-20 18:42:36 +0800244 tst_resm(TINFO, "check!");
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800245 check("run", run);
246 check("pages_shared", pages_shared);
247 check("pages_sharing", pages_sharing);
248 check("pages_volatile", pages_volatile);
249 check("pages_unshared", pages_unshared);
250 check("sleep_millisecs", sleep_millisecs);
251 check("pages_to_scan", pages_to_scan);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800252}
253
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800254static void verify(char **memory, char value, int proc,
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800255 int start, int end, int start2, int end2)
Caspar Zhang6009edf2012-02-20 18:42:36 +0800256{
257 int i, j;
258 void *s = NULL;
259
260 s = malloc((end - start) * (end2 - start2));
261 if (s == NULL)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800262 tst_brkm(TBROK | TERRNO, tst_exit, "malloc");
Caspar Zhang6009edf2012-02-20 18:42:36 +0800263
264 tst_resm(TINFO, "child %d verifies memory content.", proc);
265 memset(s, value, (end - start) * (end2 - start2));
Zhouping Liuf23ce912013-01-04 14:12:10 +0800266 if (memcmp(memory[start], s, (end - start) * (end2 - start2))
Wanlong Gao354ebb42012-12-07 10:10:04 +0800267 != 0)
Caspar Zhang6009edf2012-02-20 18:42:36 +0800268 for (j = start; j < end; j++)
269 for (i = start2; i < end2; i++)
Zhouping Liuf23ce912013-01-04 14:12:10 +0800270 if (memory[j][i] != value)
Caspar Zhang6009edf2012-02-20 18:42:36 +0800271 tst_resm(TFAIL, "child %d has %c at "
Wanlong Gao354ebb42012-12-07 10:10:04 +0800272 "%d,%d,%d.",
Zhouping Liuf23ce912013-01-04 14:12:10 +0800273 proc, memory[j][i], proc,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800274 j, i);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800275 free(s);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800276}
277
Caspar Zhang6009edf2012-02-20 18:42:36 +0800278void write_memcg(void)
Garrett Cooperecd667e2011-01-19 01:06:18 -0800279{
Zhouping Liue0963392013-04-23 15:12:55 +0800280 SAFE_FILE_PRINTF(NULL, MEMCG_LIMIT, "%ld", TESTMEM);
Garrett Cooper366a9282011-02-23 21:29:07 -0800281
Zhouping Liue0963392013-04-23 15:12:55 +0800282 SAFE_FILE_PRINTF(NULL, MEMCG_PATH_NEW "/tasks", "%d", getpid());
Garrett Cooperecd667e2011-01-19 01:06:18 -0800283}
284
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800285struct ksm_merge_data {
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800286 char data;
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800287 unsigned int mergeable_size;
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800288};
289
290static void ksm_child_memset(int child_num, int size, int total_unit,
Zhouping Liuf23ce912013-01-04 14:12:10 +0800291 struct ksm_merge_data ksm_merge_data, char **memory)
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800292{
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800293 int i = 0, j;
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800294 int unit = size / total_unit;
295
296 tst_resm(TINFO, "child %d continues...", child_num);
297
298 if (ksm_merge_data.mergeable_size == size * MB) {
299 tst_resm(TINFO, "child %d allocates %d MB filled with '%c'",
300 child_num, size, ksm_merge_data.data);
301
302 } else {
303 tst_resm(TINFO, "child %d allocates %d MB filled with '%c'"
304 " except one page with 'e'",
305 child_num, size, ksm_merge_data.data);
306 }
307
308 for (j = 0; j < total_unit; j++) {
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800309 for (i = 0; (unsigned int)i < unit * MB; i++)
Zhouping Liuf23ce912013-01-04 14:12:10 +0800310 memory[j][i] = ksm_merge_data.data;
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800311 }
312
313 /* if it contains unshared page, then set 'e' char
314 * at the end of the last page
315 */
316 if (ksm_merge_data.mergeable_size < size * MB)
Zhouping Liuf23ce912013-01-04 14:12:10 +0800317 memory[j-1][i-1] = 'e';
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800318}
319
320static void create_ksm_child(int child_num, int size, int unit,
321 struct ksm_merge_data *ksm_merge_data)
322{
323 int j, total_unit;
Zhouping Liuf23ce912013-01-04 14:12:10 +0800324 char **memory;
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800325
326 /* The total units in all */
327 total_unit = size / unit;
328
Zhouping Liuf23ce912013-01-04 14:12:10 +0800329 /* Apply for the space for memory */
Cyril Hrubisd218f342014-09-23 13:14:56 +0200330 memory = malloc(total_unit * sizeof(char *));
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800331 for (j = 0; j < total_unit; j++) {
Zhouping Liuf23ce912013-01-04 14:12:10 +0800332 memory[j] = mmap(NULL, unit * MB, PROT_READ|PROT_WRITE,
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800333 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
Zhouping Liuf23ce912013-01-04 14:12:10 +0800334 if (memory[j] == MAP_FAILED)
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800335 tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
336#ifdef HAVE_MADV_MERGEABLE
Zhouping Liuf23ce912013-01-04 14:12:10 +0800337 if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800338 tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
339#endif
340 }
341
342 tst_resm(TINFO, "child %d stops.", child_num);
343 if (raise(SIGSTOP) == -1)
344 tst_brkm(TBROK|TERRNO, tst_exit, "kill");
345 fflush(stdout);
346
347 for (j = 0; j < 4; j++) {
348
349 ksm_child_memset(child_num, size, total_unit,
Zhouping Liuf23ce912013-01-04 14:12:10 +0800350 ksm_merge_data[j], memory);
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800351
352 fflush(stdout);
353
354 tst_resm(TINFO, "child %d stops.", child_num);
355 if (raise(SIGSTOP) == -1)
356 tst_brkm(TBROK|TERRNO, tst_exit, "kill");
357
358 if (ksm_merge_data[j].mergeable_size < size * MB) {
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800359 verify(memory, 'e', child_num, total_unit - 1,
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800360 total_unit, unit * MB - 1, unit * MB);
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800361 verify(memory, ksm_merge_data[j].data, child_num,
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800362 0, total_unit, 0, unit * MB - 1);
363 } else {
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800364 verify(memory, ksm_merge_data[j].data, child_num,
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800365 0, total_unit, 0, unit * MB);
366 }
367 }
368
369 tst_resm(TINFO, "child %d finished.", child_num);
370}
371
372static void stop_ksm_children(int *child, int num)
373{
374 int k, status;
375
376 tst_resm(TINFO, "wait for all children to stop.");
377 for (k = 0; k < num; k++) {
378 if (waitpid(child[k], &status, WUNTRACED) == -1)
379 tst_brkm(TBROK|TERRNO, cleanup, "waitpid");
380 if (!WIFSTOPPED(status))
381 tst_brkm(TBROK, cleanup, "child %d was not stopped", k);
382 }
383}
384
385static void resume_ksm_children(int *child, int num)
386{
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800387 int k;
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800388
389 tst_resm(TINFO, "resume all children.");
390 for (k = 0; k < num; k++) {
391 if (kill(child[k], SIGCONT) == -1)
392 tst_brkm(TBROK|TERRNO, cleanup, "kill child[%d]", k);
393 }
394 fflush(stdout);
395}
396
Garrett Cooperecd667e2011-01-19 01:06:18 -0800397void create_same_memory(int size, int num, int unit)
398{
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800399 int i, j, status, *child;
400 unsigned long ps, pages;
401 struct ksm_merge_data **ksm_data;
402
403 struct ksm_merge_data ksm_data0[] = {
404 {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
405 };
406 struct ksm_merge_data ksm_data1[] = {
407 {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
408 };
409 struct ksm_merge_data ksm_data2[] = {
410 {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
411 };
Caspar Zhang346aab22011-05-12 00:52:37 +0800412
413 ps = sysconf(_SC_PAGE_SIZE);
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800414 pages = MB / ps;
Garrett Cooperecd667e2011-01-19 01:06:18 -0800415
Cyril Hrubisd218f342014-09-23 13:14:56 +0200416 ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800417 /* Since from third child, the data is same with the first child's */
418 for (i = 0; i < num - 3; i++) {
Cyril Hrubisd218f342014-09-23 13:14:56 +0200419 ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800420 for (j = 0; j < 4; j++) {
421 ksm_data[i][j].data = ksm_data0[j].data;
422 ksm_data[i][j].mergeable_size =
423 ksm_data0[j].mergeable_size;
424 }
425 }
426
Cyril Hrubisd218f342014-09-23 13:14:56 +0200427 child = malloc(num * sizeof(int));
Garrett Cooperecd667e2011-01-19 01:06:18 -0800428 if (child == NULL)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800429 tst_brkm(TBROK | TERRNO, cleanup, "malloc");
Garrett Cooperecd667e2011-01-19 01:06:18 -0800430
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800431 for (i = 0; i < num; i++) {
432 fflush(stdout);
433 switch (child[i] = fork()) {
Garrett Cooperecd667e2011-01-19 01:06:18 -0800434 case -1:
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800435 tst_brkm(TBROK|TERRNO, cleanup, "fork");
Garrett Cooperecd667e2011-01-19 01:06:18 -0800436 case 0:
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800437 if (i == 0) {
438 create_ksm_child(i, size, unit, ksm_data0);
439 exit(0);
440 } else if (i == 1) {
441 create_ksm_child(i, size, unit, ksm_data1);
442 exit(0);
443 } else if (i == 2) {
444 create_ksm_child(i, size, unit, ksm_data2);
445 exit(0);
446 } else {
447 create_ksm_child(i, size, unit, ksm_data[i-3]);
448 exit(0);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800449 }
Garrett Cooperecd667e2011-01-19 01:06:18 -0800450 }
451 }
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800452
453 stop_ksm_children(child, num);
454
Garrett Cooperecd667e2011-01-19 01:06:18 -0800455 tst_resm(TINFO, "KSM merging...");
Zhouping Liue0963392013-04-23 15:12:55 +0800456 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
457 SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
458 size * pages *num);
459 SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
Garrett Cooperecd667e2011-01-19 01:06:18 -0800460
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800461 resume_ksm_children(child, num);
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800462 group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800463
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800464 stop_ksm_children(child, num);
465 resume_ksm_children(child, num);
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800466 group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800467
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800468 stop_ksm_children(child, num);
469 resume_ksm_children(child, num);
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800470 group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800471
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800472 stop_ksm_children(child, num);
473 resume_ksm_children(child, num);
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800474 group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800475
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800476 stop_ksm_children(child, num);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800477
Garrett Cooperecd667e2011-01-19 01:06:18 -0800478 tst_resm(TINFO, "KSM unmerging...");
Zhouping Liue0963392013-04-23 15:12:55 +0800479 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800480
481 resume_ksm_children(child, num);
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800482 group_check(2, 0, 0, 0, 0, 0, size * pages * num);
Garrett Cooperecd667e2011-01-19 01:06:18 -0800483
Garrett Cooperecd667e2011-01-19 01:06:18 -0800484 tst_resm(TINFO, "stop KSM.");
Zhouping Liue0963392013-04-23 15:12:55 +0800485 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "0");
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800486 group_check(0, 0, 0, 0, 0, 0, size * pages * num);
Zhouping Liuf2dc2772013-01-04 14:12:09 +0800487
Garrett Cooperecd667e2011-01-19 01:06:18 -0800488 while (waitpid(-1, &status, WUNTRACED | WCONTINUED) > 0)
489 if (WEXITSTATUS(status) != 0)
490 tst_resm(TFAIL, "child exit status is %d",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800491 WEXITSTATUS(status));
Garrett Cooperecd667e2011-01-19 01:06:18 -0800492}
493
Zhouping Liu65ca1bd2013-05-13 16:43:28 +0800494void test_ksm_merge_across_nodes(unsigned long nr_pages)
495{
496 char **memory;
497 int i, ret;
498 int num_nodes, *nodes;
499 unsigned long length;
500 unsigned long pagesize;
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800501
502#if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
503 && HAVE_MPOL_CONSTANTS
Zhouping Liu65ca1bd2013-05-13 16:43:28 +0800504 unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800505#endif
Zhouping Liu65ca1bd2013-05-13 16:43:28 +0800506
507 ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
508 if (ret != 0)
509 tst_brkm(TBROK|TERRNO, cleanup, "get_allowed_nodes_arr");
510 if (num_nodes < 2) {
511 tst_resm(TINFO, "need NUMA system support");
512 free(nodes);
513 return;
514 }
515
516 pagesize = sysconf(_SC_PAGE_SIZE);
517 length = nr_pages * pagesize;
518
Cyril Hrubisd218f342014-09-23 13:14:56 +0200519 memory = malloc(num_nodes * sizeof(char *));
Zhouping Liu65ca1bd2013-05-13 16:43:28 +0800520 for (i = 0; i < num_nodes; i++) {
521 memory[i] = mmap(NULL, length, PROT_READ|PROT_WRITE,
522 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
523 if (memory[i] == MAP_FAILED)
524 tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
DAN LI8cda96c2013-05-17 10:32:16 +0800525#ifdef HAVE_MADV_MERGEABLE
Zhouping Liu65ca1bd2013-05-13 16:43:28 +0800526 if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
527 tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
DAN LI8cda96c2013-05-17 10:32:16 +0800528#endif
Zhouping Liu65ca1bd2013-05-13 16:43:28 +0800529
530#if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
531 && HAVE_MPOL_CONSTANTS
532 clean_node(nmask);
533 set_node(nmask, nodes[i]);
534 /*
535 * Use mbind() to make sure each node contains
536 * length size memory.
537 */
538 ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
539 if (ret == -1)
540 tst_brkm(TBROK|TERRNO, tst_exit, "mbind");
541#endif
542
543 memset(memory[i], 10, length);
544 }
545
546 SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
547 SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
548 nr_pages * num_nodes);
549 /*
550 * merge_across_nodes setting can be changed only when there
551 * are no ksm shared pages in system, so set run 2 to unmerge
552 * pages first, then to 1 after changing merge_across_nodes,
553 * to remerge according to the new setting.
554 */
555 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
556 wait_ksmd_done();
557 tst_resm(TINFO, "Start to test KSM with merge_across_nodes=1");
558 SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "1");
559 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
560 group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
561 nr_pages * num_nodes);
562
563 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
564 wait_ksmd_done();
565 tst_resm(TINFO, "Start to test KSM with merge_across_nodes=0");
566 SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "0");
567 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
568 group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
569 0, 0, 0, nr_pages * num_nodes);
570
571 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
572 wait_ksmd_done();
573}
574
Garrett Cooperecd667e2011-01-19 01:06:18 -0800575void check_ksm_options(int *size, int *num, int *unit)
576{
577 if (opt_size) {
578 *size = atoi(opt_sizestr);
579 if (*size < 1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800580 tst_brkm(TBROK, cleanup, "size cannot be less than 1.");
Garrett Cooperecd667e2011-01-19 01:06:18 -0800581 }
582 if (opt_unit) {
583 *unit = atoi(opt_unitstr);
584 if (*unit > *size)
585 tst_brkm(TBROK, cleanup,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800586 "unit cannot be greater than size.");
Garrett Cooperecd667e2011-01-19 01:06:18 -0800587 if (*size % *unit != 0)
588 tst_brkm(TBROK, cleanup,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800589 "the remainder of division of size by unit is "
590 "not zero.");
Garrett Cooperecd667e2011-01-19 01:06:18 -0800591 }
592 if (opt_num) {
593 *num = atoi(opt_numstr);
594 if (*num < 3)
595 tst_brkm(TBROK, cleanup,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800596 "process number cannot be less 3.");
Garrett Cooperecd667e2011-01-19 01:06:18 -0800597 }
598}
Caspar Zhang233e1222011-12-23 13:02:56 +0800599
Caspar Zhang6009edf2012-02-20 18:42:36 +0800600void ksm_usage(void)
601{
602 printf(" -n Number of processes\n");
603 printf(" -s Memory allocation size in MB\n");
604 printf(" -u Memory allocation unit in MB\n");
605}
606
Zhouping Liu0b341932013-04-16 22:30:07 +0800607/* THP */
608
609static int alloc_transparent_hugepages(int nr_thps, int hg_aligned)
610{
611 unsigned long hugepagesize, size;
612 void *addr;
613 int ret;
614
615 hugepagesize = read_meminfo("Hugepagesize:") * KB;
616 size = nr_thps * hugepagesize;
617
618 if (hg_aligned) {
619 ret = posix_memalign(&addr, hugepagesize, size);
620 if (ret != 0) {
621 printf("posix_memalign failed\n");
622 return -1;
623 }
624 } else {
625 addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
626 MAP_PRIVATE|MAP_ANON, -1, 0);
627 if (addr == MAP_FAILED) {
628 perror("mmap");
629 return -1;
630 }
631 }
632
633 memset(addr, 10, size);
634
635 tst_resm(TINFO, "child[%d] stop here", getpid());
636 /*
637 * stop here, until the father finish to calculate
638 * all the transparent hugepages.
639 */
640 if (raise(SIGSTOP) == -1) {
641 perror("kill");
642 return -1;
643 }
644
645 return 0;
646}
647
648static void khugepaged_scan_done(void)
649{
650 int changing = 1, count = 0, interval;
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800651 long old_pages_collapsed = 0, old_max_ptes_none = 0,
652 old_pages_to_scan = 0;
Zhouping Liu0b341932013-04-16 22:30:07 +0800653 long pages_collapsed = 0, max_ptes_none = 0, pages_to_scan = 0;
654
655 /*
656 * as 'khugepaged' run 100% during testing, so 5s is an
657 * enough interval for us to recognize if 'khugepaged'
658 * finish scanning proceses' anonymous hugepages or not.
659 */
660 interval = 5;
661
662 while (changing) {
663 sleep(interval);
664 count++;
665
666 SAFE_FILE_SCANF(cleanup, PATH_KHPD "pages_collapsed",
667 "%ld", &pages_collapsed);
668 SAFE_FILE_SCANF(cleanup, PATH_KHPD "max_ptes_none",
669 "%ld", &max_ptes_none);
670 SAFE_FILE_SCANF(cleanup, PATH_KHPD "pages_to_scan",
671 "%ld", &pages_to_scan);
672
673 if (pages_collapsed != old_pages_collapsed ||
674 max_ptes_none != old_max_ptes_none ||
675 pages_to_scan != old_pages_to_scan) {
676 old_pages_collapsed = pages_collapsed;
677 old_max_ptes_none = max_ptes_none;
678 old_pages_to_scan = pages_to_scan;
679 } else {
680 changing = 0;
681 }
682 }
683
684 tst_resm(TINFO, "khugepaged daemon takes %ds to scan all thp pages",
685 count * interval);
686}
687
688static void verify_thp_size(int *children, int nr_children, int nr_thps)
689{
690 FILE *fp;
691 char path[BUFSIZ], buf[BUFSIZ], line[BUFSIZ];
692 int i, ret;
693 long expect_thps; /* the amount of per child's transparent hugepages */
694 long val, actual_thps;
695 long hugepagesize;
696
697 hugepagesize = read_meminfo("Hugepagesize:");
698 expect_thps = nr_thps * hugepagesize;
699
700 for (i = 0; i < nr_children; i++) {
701 actual_thps = 0;
702
703 snprintf(path, BUFSIZ, "/proc/%d/smaps", children[i]);
704 fp = fopen(path, "r");
705 while (fgets(line, BUFSIZ, fp) != NULL) {
706 ret = sscanf(line, "%64s %ld", buf, &val);
707 if (ret == 2 && val != 0) {
708 if (strcmp(buf, "AnonHugePages:") == 0)
709 actual_thps += val;
710 }
711 }
712
713 if (actual_thps != expect_thps)
714 tst_resm(TFAIL, "child[%d] got %ldKB thps - expect %ld"
715 "KB thps", getpid(), actual_thps, expect_thps);
716 fclose(fp);
717 }
718}
719
Zhouping Liud6b67f82013-04-16 22:30:08 +0800720void test_transparent_hugepage(int nr_children, int nr_thps,
721 int hg_aligned, int mempolicy)
Zhouping Liu0b341932013-04-16 22:30:07 +0800722{
723 unsigned long hugepagesize, memfree;
724 int i, *pids, ret, status;
Zhouping Liu0b341932013-04-16 22:30:07 +0800725
Zhouping Liud6b67f82013-04-16 22:30:08 +0800726 if (mempolicy)
727 set_global_mempolicy(mempolicy);
728
Zhouping Liu0b341932013-04-16 22:30:07 +0800729 memfree = read_meminfo("MemFree:");
730 tst_resm(TINFO, "The current MemFree is %luMB", memfree / KB);
731 if (memfree < MB)
732 tst_resm(TCONF, "Not enough memory for testing");
733
734 hugepagesize = read_meminfo("Hugepagesize:");
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800735 tst_resm(TINFO, "The current Hugepagesize is %luMB", hugepagesize / KB);
Zhouping Liu0b341932013-04-16 22:30:07 +0800736
737 pids = malloc(nr_children * sizeof(int));
738 if (pids == NULL)
739 tst_brkm(TBROK | TERRNO, cleanup, "malloc");
740
741 for (i = 0; i < nr_children; i++) {
742 switch (pids[i] = fork()) {
743 case -1:
744 tst_brkm(TBROK | TERRNO, cleanup, "fork");
745
746 case 0:
747 ret = alloc_transparent_hugepages(nr_thps, hg_aligned);
748 exit(ret);
749 }
750 }
751
752 tst_resm(TINFO, "Stop all children...");
753 for (i = 0; i < nr_children; i++) {
754 if (waitpid(pids[i], &status, WUNTRACED) == -1)
755 tst_brkm(TBROK|TERRNO, cleanup, "waitpid");
756 if (!WIFSTOPPED(status))
757 tst_brkm(TBROK, cleanup,
758 "child[%d] was not stoppted", pids[i]);
759 }
760
761 tst_resm(TINFO, "Start to scan all transparent hugepages...");
762 khugepaged_scan_done();
763
764 tst_resm(TINFO, "Start to verify transparent hugepage size...");
765 verify_thp_size(pids, nr_children, nr_thps);
766
767 tst_resm(TINFO, "Wake up all children...");
768 for (i = 0; i < nr_children; i++) {
769 if (kill(pids[i], SIGCONT) == -1)
770 tst_brkm(TBROK | TERRNO, cleanup,
771 "signal continue child[%d]", pids[i]);
772 }
773
774 /* wait all children finish their task */
775 for (i = 0; i < nr_children; i++) {
776 if (waitpid(pids[i], &status, 0) == -1)
777 tst_brkm(TBROK|TERRNO, cleanup, "waitpid %d", pids[i]);
778
779 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
780 tst_resm(TFAIL, "the child[%d] unexpectedly failed:"
781 " %d", pids[i], status);
782 }
783}
784
785void check_thp_options(int *nr_children, int *nr_thps)
786{
787 if (opt_nr_children)
788 *nr_children = SAFE_STRTOL(NULL, opt_nr_children_str,
789 0, LONG_MAX);
790 if (opt_nr_thps)
791 *nr_thps = SAFE_STRTOL(NULL, opt_nr_thps_str, 0, LONG_MAX);
792}
793
794void thp_usage(void)
795{
796 printf(" -n Number of processes\n");
797 printf(" -N Number of transparent hugepages\n");
798}
799
Caspar Zhanga7f0eed2012-02-28 15:44:04 +0800800/* cpuset/memcg */
Caspar Zhang6009edf2012-02-20 18:42:36 +0800801
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800802static void gather_node_cpus(char *cpus, long nd)
Caspar Zhang70422712012-02-20 19:54:22 +0800803{
804 int ncpus = 0;
805 int i;
Zhouping Liu448c1ee2013-03-19 11:40:49 +0800806 long online;
Caspar Zhang70422712012-02-20 19:54:22 +0800807 char buf[BUFSIZ];
Zhouping Liu448c1ee2013-03-19 11:40:49 +0800808 char path[BUFSIZ], path1[BUFSIZ];
Caspar Zhang70422712012-02-20 19:54:22 +0800809
810 while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
811 ncpus++;
812
Zhouping Liu448c1ee2013-03-19 11:40:49 +0800813 for (i = 0; i < ncpus; i++) {
814 snprintf(path, BUFSIZ,
815 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
Jan Stancekdf7d4842013-08-27 14:16:19 +0200816 if (path_exist(path)) {
Zhouping Liu448c1ee2013-03-19 11:40:49 +0800817 snprintf(path1, BUFSIZ, "%s/online", path);
818 /*
Dave Kleikamp371375c2013-08-20 15:34:39 -0500819 * if there is no online knob, then the cpu cannot
820 * be taken offline
Zhouping Liu448c1ee2013-03-19 11:40:49 +0800821 */
Dave Kleikamp371375c2013-08-20 15:34:39 -0500822 if (path_exist(path1)) {
823 SAFE_FILE_SCANF(cleanup, path1, "%ld", &online);
824 if (online == 0)
825 continue;
826 }
Caspar Zhang70422712012-02-20 19:54:22 +0800827 sprintf(buf, "%d,", i);
828 strcat(cpus, buf);
829 }
Zhouping Liu448c1ee2013-03-19 11:40:49 +0800830 }
Caspar Zhang70422712012-02-20 19:54:22 +0800831 /* Remove the trailing comma. */
832 cpus[strlen(cpus) - 1] = '\0';
833}
834
Caspar Zhangff92e942012-02-29 10:58:15 +0800835void read_cpuset_files(char *prefix, char *filename, char *retbuf)
Caspar Zhang70422712012-02-20 19:54:22 +0800836{
Caspar Zhang70422712012-02-20 19:54:22 +0800837 int fd;
Caspar Zhangff92e942012-02-29 10:58:15 +0800838 char path[BUFSIZ];
Caspar Zhang70422712012-02-20 19:54:22 +0800839
Caspar Zhang70422712012-02-20 19:54:22 +0800840 /*
Caspar Zhangff92e942012-02-29 10:58:15 +0800841 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
Caspar Zhang70422712012-02-20 19:54:22 +0800842 * please see Documentation/cgroups/cpusets.txt from kernel src
843 * for details
844 */
Caspar Zhangff92e942012-02-29 10:58:15 +0800845 snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
846 fd = open(path, O_RDONLY);
847 if (fd == -1) {
848 if (errno == ENOENT) {
849 snprintf(path, BUFSIZ, "%s/cpuset.%s",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800850 prefix, filename);
Caspar Zhangff92e942012-02-29 10:58:15 +0800851 fd = open(path, O_RDONLY);
852 if (fd == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800853 tst_brkm(TBROK | TERRNO, cleanup,
854 "open %s", path);
Caspar Zhangff92e942012-02-29 10:58:15 +0800855 } else
Wanlong Gao354ebb42012-12-07 10:10:04 +0800856 tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
Caspar Zhangff92e942012-02-29 10:58:15 +0800857 }
858 if (read(fd, retbuf, BUFSIZ) < 0)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800859 tst_brkm(TBROK | TERRNO, cleanup, "read %s", path);
Caspar Zhangff92e942012-02-29 10:58:15 +0800860 close(fd);
861}
862
863void write_cpuset_files(char *prefix, char *filename, char *buf)
864{
865 int fd;
866 char path[BUFSIZ];
867
868 /*
869 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
870 * please see Documentation/cgroups/cpusets.txt from kernel src
871 * for details
872 */
873 snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
Caspar Zhang633c7552012-02-21 11:01:03 +0800874 fd = open(path, O_WRONLY);
Caspar Zhang70422712012-02-20 19:54:22 +0800875 if (fd == -1) {
876 if (errno == ENOENT) {
Caspar Zhangff92e942012-02-29 10:58:15 +0800877 snprintf(path, BUFSIZ, "%s/cpuset.%s",
Wanlong Gao354ebb42012-12-07 10:10:04 +0800878 prefix, filename);
Caspar Zhang633c7552012-02-21 11:01:03 +0800879 fd = open(path, O_WRONLY);
Caspar Zhang70422712012-02-20 19:54:22 +0800880 if (fd == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800881 tst_brkm(TBROK | TERRNO, cleanup,
882 "open %s", path);
Caspar Zhang70422712012-02-20 19:54:22 +0800883 } else
Wanlong Gao354ebb42012-12-07 10:10:04 +0800884 tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
Caspar Zhang70422712012-02-20 19:54:22 +0800885 }
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800886 if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
Wanlong Gao354ebb42012-12-07 10:10:04 +0800887 tst_brkm(TBROK | TERRNO, cleanup, "write %s", path);
Caspar Zhang70422712012-02-20 19:54:22 +0800888 close(fd);
Caspar Zhang6d4044f2012-02-28 16:12:15 +0800889}
890
Caspar Zhang6d4044f2012-02-28 16:12:15 +0800891void write_cpusets(long nd)
892{
893 char buf[BUFSIZ];
Caspar Zhangff92e942012-02-29 10:58:15 +0800894 char cpus[BUFSIZ] = "";
Caspar Zhang6d4044f2012-02-28 16:12:15 +0800895
Caspar Zhangff92e942012-02-29 10:58:15 +0800896 snprintf(buf, BUFSIZ, "%ld", nd);
897 write_cpuset_files(CPATH_NEW, "mems", buf);
898
Zhouping Liu4ac1b8e2013-03-19 11:40:55 +0800899 gather_node_cpus(cpus, nd);
Zhouping Liu630f90d2013-04-29 14:07:17 +0800900 /*
901 * If the 'nd' node doesn't contain any CPUs,
902 * the first ID of CPU '0' will be used as
903 * the value of cpuset.cpus.
904 */
905 if (strlen(cpus) != 0) {
906 write_cpuset_files(CPATH_NEW, "cpus", cpus);
907 } else {
908 tst_resm(TINFO, "No CPUs in the node%ld; "
909 "using only CPU0", nd);
910 write_cpuset_files(CPATH_NEW, "cpus", "0");
911 }
Caspar Zhangff92e942012-02-29 10:58:15 +0800912
Zhouping Liue0963392013-04-23 15:12:55 +0800913 SAFE_FILE_PRINTF(NULL, CPATH_NEW "/tasks", "%d", getpid());
Caspar Zhang70422712012-02-20 19:54:22 +0800914}
915
Caspar Zhang6009edf2012-02-20 18:42:36 +0800916void umount_mem(char *path, char *path_new)
917{
918 FILE *fp;
919 int fd;
920 char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
921
922 /* Move all processes in task to its parent node. */
923 sprintf(s, "%s/tasks", path);
924 fd = open(s, O_WRONLY);
925 if (fd == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800926 tst_resm(TWARN | TERRNO, "open %s", s);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800927
928 snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
929 fp = fopen(s_new, "r");
930 if (fp == NULL)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800931 tst_resm(TWARN | TERRNO, "fopen %s", s_new);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800932 if ((fd != -1) && (fp != NULL)) {
933 while (fgets(value, BUFSIZ, fp) != NULL)
934 if (write(fd, value, strlen(value) - 1)
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800935 != (ssize_t)strlen(value) - 1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800936 tst_resm(TWARN | TERRNO, "write %s", s);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800937 }
938 if (fd != -1)
939 close(fd);
940 if (fp != NULL)
941 fclose(fp);
942 if (rmdir(path_new) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800943 tst_resm(TWARN | TERRNO, "rmdir %s", path_new);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800944 if (umount(path) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800945 tst_resm(TWARN | TERRNO, "umount %s", path);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800946 if (rmdir(path) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800947 tst_resm(TWARN | TERRNO, "rmdir %s", path);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800948}
949
950void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
951{
952 if (mkdir(path, 0777) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800953 tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800954 if (mount(name, path, fs, 0, options) == -1) {
955 if (errno == ENODEV) {
956 if (rmdir(path) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800957 tst_resm(TWARN | TERRNO, "rmdir %s failed",
958 path);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800959 tst_brkm(TCONF, NULL,
Wanlong Gao354ebb42012-12-07 10:10:04 +0800960 "file system %s is not configured in kernel",
961 fs);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800962 }
Wanlong Gao354ebb42012-12-07 10:10:04 +0800963 tst_brkm(TBROK | TERRNO, cleanup, "mount %s", path);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800964 }
965 if (mkdir(path_new, 0777) == -1)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800966 tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path_new);
Caspar Zhang6009edf2012-02-20 18:42:36 +0800967}
968
Caspar Zhanga7f0eed2012-02-28 15:44:04 +0800969/* shared */
Caspar Zhang6009edf2012-02-20 18:42:36 +0800970
Caspar Zhanga98ac192012-08-09 14:15:41 +0800971/* Warning: *DO NOT* use this function in child */
Wanlong Gao354ebb42012-12-07 10:10:04 +0800972unsigned int get_a_numa_node(void (*cleanup_fn) (void))
Caspar Zhang6009edf2012-02-20 18:42:36 +0800973{
Caspar Zhanga98ac192012-08-09 14:15:41 +0800974 unsigned int nd1, nd2;
975 int ret;
Caspar Zhang6009edf2012-02-20 18:42:36 +0800976
Caspar Zhanga98ac192012-08-09 14:15:41 +0800977 ret = get_allowed_nodes(0, 2, &nd1, &nd2);
978 switch (ret) {
979 case 0:
980 break;
981 case -3:
982 tst_brkm(TCONF, cleanup_fn, "requires a NUMA system.");
983 default:
Wanlong Gao354ebb42012-12-07 10:10:04 +0800984 tst_brkm(TBROK | TERRNO, cleanup_fn, "1st get_allowed_nodes");
Caspar Zhanga98ac192012-08-09 14:15:41 +0800985 }
Caspar Zhang6009edf2012-02-20 18:42:36 +0800986
Wanlong Gao354ebb42012-12-07 10:10:04 +0800987 ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
Caspar Zhanga98ac192012-08-09 14:15:41 +0800988 switch (ret) {
989 case 0:
Xiaoguang Wangf200be52014-04-28 16:47:13 +0800990 tst_resm(TINFO, "get node%u.", nd1);
Caspar Zhanga98ac192012-08-09 14:15:41 +0800991 return nd1;
992 case -3:
993 tst_brkm(TCONF, cleanup_fn, "requires a NUMA system that has "
Wanlong Gao354ebb42012-12-07 10:10:04 +0800994 "at least one node with both memory and CPU "
995 "available.");
Caspar Zhanga98ac192012-08-09 14:15:41 +0800996 default:
997 break;
998 }
Wanlong Gao354ebb42012-12-07 10:10:04 +0800999 tst_brkm(TBROK | TERRNO, cleanup_fn, "2nd get_allowed_nodes");
Caspar Zhang6009edf2012-02-20 18:42:36 +08001000}
1001
1002int path_exist(const char *path, ...)
1003{
1004 va_list ap;
1005 char pathbuf[PATH_MAX];
1006
1007 va_start(ap, path);
1008 vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
1009 va_end(ap);
1010
1011 return access(pathbuf, F_OK) == 0;
1012}
1013
Caspar Zhang233e1222011-12-23 13:02:56 +08001014long read_meminfo(char *item)
1015{
1016 FILE *fp;
1017 char line[BUFSIZ], buf[BUFSIZ];
1018 long val;
1019
1020 fp = fopen(PATH_MEMINFO, "r");
1021 if (fp == NULL)
Wanlong Gao354ebb42012-12-07 10:10:04 +08001022 tst_brkm(TBROK | TERRNO, cleanup, "fopen %s", PATH_MEMINFO);
Caspar Zhang233e1222011-12-23 13:02:56 +08001023
1024 while (fgets(line, BUFSIZ, fp) != NULL) {
1025 if (sscanf(line, "%64s %ld", buf, &val) == 2)
1026 if (strcmp(buf, item) == 0) {
1027 fclose(fp);
1028 return val;
1029 }
1030 continue;
1031 }
1032 fclose(fp);
1033
1034 tst_brkm(TBROK, cleanup, "cannot find \"%s\" in %s",
Wanlong Gao354ebb42012-12-07 10:10:04 +08001035 item, PATH_MEMINFO);
Caspar Zhang233e1222011-12-23 13:02:56 +08001036}
Caspar Zhang3fa874a2011-12-28 14:16:20 +08001037
1038void set_sys_tune(char *sys_file, long tune, int check)
1039{
Caspar Zhang3fa874a2011-12-28 14:16:20 +08001040 long val;
Zhouping Liue0963392013-04-23 15:12:55 +08001041 char path[BUFSIZ];
Caspar Zhang3fa874a2011-12-28 14:16:20 +08001042
1043 tst_resm(TINFO, "set %s to %ld", sys_file, tune);
1044
Zhouping Liue0963392013-04-23 15:12:55 +08001045 snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
1046 SAFE_FILE_PRINTF(NULL, path, "%ld", tune);
Caspar Zhang3fa874a2011-12-28 14:16:20 +08001047
1048 if (check) {
1049 val = get_sys_tune(sys_file);
1050 if (val != tune)
1051 tst_brkm(TBROK, cleanup, "%s = %ld, but expect %ld",
Wanlong Gao354ebb42012-12-07 10:10:04 +08001052 sys_file, val, tune);
Caspar Zhang3fa874a2011-12-28 14:16:20 +08001053 }
1054}
1055
1056long get_sys_tune(char *sys_file)
1057{
Zhouping Liue0963392013-04-23 15:12:55 +08001058 char path[BUFSIZ];
1059 long tune;
Caspar Zhang3fa874a2011-12-28 14:16:20 +08001060
Zhouping Liue0963392013-04-23 15:12:55 +08001061 snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
1062 SAFE_FILE_SCANF(NULL, path, "%ld", &tune);
Caspar Zhang3459f502012-02-27 17:11:33 +08001063
Zhouping Liue0963392013-04-23 15:12:55 +08001064 return tune;
Caspar Zhang3459f502012-02-27 17:11:33 +08001065}
Wanlong Gao892153a2012-04-02 16:21:59 +08001066
Wanlong Gao354ebb42012-12-07 10:10:04 +08001067void update_shm_size(size_t * shm_size)
Wanlong Gao892153a2012-04-02 16:21:59 +08001068{
Wanlong Gao892153a2012-04-02 16:21:59 +08001069 size_t shmmax;
1070
Zhouping Liue0963392013-04-23 15:12:55 +08001071 SAFE_FILE_SCANF(cleanup, PATH_SHMMAX, "%ld", &shmmax);
Wanlong Gao892153a2012-04-02 16:21:59 +08001072 if (*shm_size > shmmax) {
1073 tst_resm(TINFO, "Set shm_size to shmmax: %ld", shmmax);
1074 *shm_size = shmmax;
1075 }
1076}