| #include "config.h" |
| #include <sys/types.h> |
| #include <sys/mman.h> |
| #include <sys/mount.h> |
| #include <sys/stat.h> |
| #include <sys/wait.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #if HAVE_NUMA_H |
| #include <numa.h> |
| #endif |
| #if HAVE_NUMAIF_H |
| #include <numaif.h> |
| #endif |
| #include <stdarg.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <unistd.h> |
| |
| #include "test.h" |
| #include "safe_macros.h" |
| #include "safe_file_ops.h" |
| #include "mem.h" |
| #include "numa_helper.h" |
| |
| /* OOM */ |
| |
| static int alloc_mem(long int length, int testcase) |
| { |
| char *s; |
| long i, pagesz = getpagesize(); |
| |
| tst_resm(TINFO, "allocating %ld bytes.", length); |
| |
| s = mmap(NULL, length, PROT_READ | PROT_WRITE, |
| MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); |
| if (s == MAP_FAILED) |
| return errno; |
| |
| if (testcase == MLOCK && mlock(s, length) == -1) |
| return errno; |
| #ifdef HAVE_MADV_MERGEABLE |
| if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1) |
| return errno; |
| #endif |
| for (i = 0; i < length; i += pagesz) |
| s[i] = '\a'; |
| |
| return 0; |
| } |
| |
| static void test_alloc(int testcase, int lite) |
| { |
| int ret; |
| |
| if (lite) { |
| ret = alloc_mem(TESTMEM + MB, testcase); |
| } else { |
| ret = 0; |
| while (!ret) |
| ret = alloc_mem(LENGTH, testcase); |
| } |
| exit(ret); |
| } |
| |
| /* |
| * oom - allocates memory according to specified testcase and checks |
| * desired outcome (e.g. child killed, operation failed with ENOMEM) |
| * @testcase: selects how child allocates memory |
| * valid choices are: OVERCOMMIT, NORMAL, MLOCK and KSM |
| * @lite: if non-zero, child makes only single TESTMEM+MB allocation |
| * if zero, child keeps allocating memory until it gets killed |
| * or some operation fails |
| * @retcode: expected return code of child process |
| * if matches child ret code, this function reports PASS, |
| * otherwise it reports FAIL |
| * @allow_sigkill: if zero and child is killed, this function reports FAIL |
| * if non-zero, then if child is killed by SIGKILL |
| * it is considered as PASS |
| */ |
| void oom(int testcase, int lite, int retcode, int allow_sigkill) |
| { |
| pid_t pid; |
| int status; |
| |
| switch (pid = fork()) { |
| case -1: |
| tst_brkm(TBROK | TERRNO, cleanup, "fork"); |
| case 0: |
| test_alloc(testcase, lite); |
| default: |
| break; |
| } |
| |
| tst_resm(TINFO, "expected victim is %d.", pid); |
| if (waitpid(-1, &status, 0) == -1) |
| tst_brkm(TBROK | TERRNO, cleanup, "waitpid"); |
| |
| if (WIFSIGNALED(status)) { |
| if (allow_sigkill && WTERMSIG(status) == SIGKILL) { |
| tst_resm(TPASS, "victim signalled: (%d) %s", |
| SIGKILL, |
| tst_strsig(SIGKILL)); |
| } else { |
| tst_resm(TFAIL, "victim signalled: (%d) %s", |
| WTERMSIG(status), |
| tst_strsig(WTERMSIG(status))); |
| } |
| } else if (WIFEXITED(status) && WEXITSTATUS(status) == retcode) { |
| tst_resm(TPASS, "victim retcode: (%d) %s", |
| retcode, strerror(retcode)); |
| } else { |
| tst_resm(TFAIL, "victim unexpectedly ended with retcode: %d, " |
| "expected: %d", WEXITSTATUS(status), retcode); |
| } |
| } |
| |
| static void set_global_mempolicy(int mempolicy) |
| { |
| #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \ |
| && HAVE_MPOL_CONSTANTS |
| unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 }; |
| int num_nodes, *nodes; |
| int ret; |
| |
| if (mempolicy) { |
| ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes); |
| if (ret != 0) |
| tst_brkm(TBROK|TERRNO, cleanup, |
| "get_allowed_nodes_arr"); |
| if (num_nodes < 2) { |
| tst_resm(TINFO, "mempolicy need NUMA system support"); |
| free(nodes); |
| return; |
| } |
| switch(mempolicy) { |
| case MPOL_BIND: |
| /* bind the second node */ |
| set_node(nmask, nodes[1]); |
| break; |
| case MPOL_INTERLEAVE: |
| case MPOL_PREFERRED: |
| if (num_nodes == 2) { |
| tst_resm(TINFO, "The mempolicy need " |
| "more than 2 numa nodes"); |
| free(nodes); |
| return; |
| } else { |
| /* Using the 2nd,3rd node */ |
| set_node(nmask, nodes[1]); |
| set_node(nmask, nodes[2]); |
| } |
| break; |
| default: |
| tst_brkm(TBROK|TERRNO, cleanup, "Bad mempolicy mode"); |
| } |
| if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1) |
| tst_brkm(TBROK|TERRNO, cleanup, "set_mempolicy"); |
| } |
| #endif |
| } |
| |
| void testoom(int mempolicy, int lite, int retcode, int allow_sigkill) |
| { |
| set_global_mempolicy(mempolicy); |
| |
| tst_resm(TINFO, "start normal OOM testing."); |
| oom(NORMAL, lite, retcode, allow_sigkill); |
| |
| tst_resm(TINFO, "start OOM testing for mlocked pages."); |
| oom(MLOCK, lite, retcode, allow_sigkill); |
| |
| if (access(PATH_KSM, F_OK) == -1) { |
| tst_resm(TINFO, "KSM configuration is not enabled, " |
| "skip OOM test for KSM pags"); |
| } else { |
| tst_resm(TINFO, "start OOM testing for KSM pages."); |
| oom(KSM, lite, retcode, allow_sigkill); |
| } |
| } |
| |
| /* KSM */ |
| |
| static void check(char *path, long int value) |
| { |
| char fullpath[BUFSIZ]; |
| long actual_val; |
| |
| snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path); |
| SAFE_FILE_SCANF(cleanup, fullpath, "%ld", &actual_val); |
| |
| tst_resm(TINFO, "%s is %ld.", path, actual_val); |
| if (actual_val != value) |
| tst_resm(TFAIL, "%s is not %ld.", path, value); |
| } |
| |
| static void wait_ksmd_done(void) |
| { |
| long pages_shared, pages_sharing, pages_volatile, pages_unshared; |
| long old_pages_shared = 0, old_pages_sharing = 0; |
| long old_pages_volatile = 0, old_pages_unshared = 0; |
| int changing = 1, count = 0; |
| |
| while (changing) { |
| sleep(10); |
| count++; |
| |
| SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_shared", |
| "%ld", &pages_shared); |
| |
| SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_sharing", |
| "%ld", &pages_sharing); |
| |
| SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_volatile", |
| "%ld", &pages_volatile); |
| |
| SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_unshared", |
| "%ld", &pages_unshared); |
| |
| if (pages_shared != old_pages_shared || |
| pages_sharing != old_pages_sharing || |
| pages_volatile != old_pages_volatile || |
| pages_unshared != old_pages_unshared) { |
| old_pages_shared = pages_shared; |
| old_pages_sharing = pages_sharing; |
| old_pages_volatile = pages_volatile; |
| old_pages_unshared = pages_unshared; |
| } else { |
| changing = 0; |
| } |
| } |
| |
| tst_resm(TINFO, "ksm daemon takes %ds to scan all mergeable pages", |
| count * 10); |
| } |
| |
| static void group_check(int run, int pages_shared, int pages_sharing, |
| int pages_volatile, int pages_unshared, |
| int sleep_millisecs, int pages_to_scan) |
| { |
| /* wait for ksm daemon to scan all mergeable pages. */ |
| wait_ksmd_done(); |
| |
| tst_resm(TINFO, "check!"); |
| check("run", run); |
| check("pages_shared", pages_shared); |
| check("pages_sharing", pages_sharing); |
| check("pages_volatile", pages_volatile); |
| check("pages_unshared", pages_unshared); |
| check("sleep_millisecs", sleep_millisecs); |
| check("pages_to_scan", pages_to_scan); |
| } |
| |
| static void verify(char **memory, char value, int proc, |
| int start, int end, int start2, int end2) |
| { |
| int i, j; |
| void *s = NULL; |
| |
| s = malloc((end - start) * (end2 - start2)); |
| if (s == NULL) |
| tst_brkm(TBROK | TERRNO, tst_exit, "malloc"); |
| |
| tst_resm(TINFO, "child %d verifies memory content.", proc); |
| memset(s, value, (end - start) * (end2 - start2)); |
| if (memcmp(memory[start], s, (end - start) * (end2 - start2)) |
| != 0) |
| for (j = start; j < end; j++) |
| for (i = start2; i < end2; i++) |
| if (memory[j][i] != value) |
| tst_resm(TFAIL, "child %d has %c at " |
| "%d,%d,%d.", |
| proc, memory[j][i], proc, |
| j, i); |
| free(s); |
| } |
| |
| void write_memcg(void) |
| { |
| SAFE_FILE_PRINTF(NULL, MEMCG_LIMIT, "%ld", TESTMEM); |
| |
| SAFE_FILE_PRINTF(NULL, MEMCG_PATH_NEW "/tasks", "%d", getpid()); |
| } |
| |
| struct ksm_merge_data { |
| char data; |
| unsigned int mergeable_size; |
| }; |
| |
| static void ksm_child_memset(int child_num, int size, int total_unit, |
| struct ksm_merge_data ksm_merge_data, char **memory) |
| { |
| int i = 0, j; |
| int unit = size / total_unit; |
| |
| tst_resm(TINFO, "child %d continues...", child_num); |
| |
| if (ksm_merge_data.mergeable_size == size * MB) { |
| tst_resm(TINFO, "child %d allocates %d MB filled with '%c'", |
| child_num, size, ksm_merge_data.data); |
| |
| } else { |
| tst_resm(TINFO, "child %d allocates %d MB filled with '%c'" |
| " except one page with 'e'", |
| child_num, size, ksm_merge_data.data); |
| } |
| |
| for (j = 0; j < total_unit; j++) { |
| for (i = 0; (unsigned int)i < unit * MB; i++) |
| memory[j][i] = ksm_merge_data.data; |
| } |
| |
| /* if it contains unshared page, then set 'e' char |
| * at the end of the last page |
| */ |
| if (ksm_merge_data.mergeable_size < size * MB) |
| memory[j-1][i-1] = 'e'; |
| } |
| |
| static void create_ksm_child(int child_num, int size, int unit, |
| struct ksm_merge_data *ksm_merge_data) |
| { |
| int j, total_unit; |
| char **memory; |
| |
| /* The total units in all */ |
| total_unit = size / unit; |
| |
| /* Apply for the space for memory */ |
| memory = malloc(total_unit * sizeof(char *)); |
| for (j = 0; j < total_unit; j++) { |
| memory[j] = mmap(NULL, unit * MB, PROT_READ|PROT_WRITE, |
| MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
| if (memory[j] == MAP_FAILED) |
| tst_brkm(TBROK|TERRNO, tst_exit, "mmap"); |
| #ifdef HAVE_MADV_MERGEABLE |
| if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1) |
| tst_brkm(TBROK|TERRNO, tst_exit, "madvise"); |
| #endif |
| } |
| |
| tst_resm(TINFO, "child %d stops.", child_num); |
| if (raise(SIGSTOP) == -1) |
| tst_brkm(TBROK|TERRNO, tst_exit, "kill"); |
| fflush(stdout); |
| |
| for (j = 0; j < 4; j++) { |
| |
| ksm_child_memset(child_num, size, total_unit, |
| ksm_merge_data[j], memory); |
| |
| fflush(stdout); |
| |
| tst_resm(TINFO, "child %d stops.", child_num); |
| if (raise(SIGSTOP) == -1) |
| tst_brkm(TBROK|TERRNO, tst_exit, "kill"); |
| |
| if (ksm_merge_data[j].mergeable_size < size * MB) { |
| verify(memory, 'e', child_num, total_unit - 1, |
| total_unit, unit * MB - 1, unit * MB); |
| verify(memory, ksm_merge_data[j].data, child_num, |
| 0, total_unit, 0, unit * MB - 1); |
| } else { |
| verify(memory, ksm_merge_data[j].data, child_num, |
| 0, total_unit, 0, unit * MB); |
| } |
| } |
| |
| tst_resm(TINFO, "child %d finished.", child_num); |
| } |
| |
| static void stop_ksm_children(int *child, int num) |
| { |
| int k, status; |
| |
| tst_resm(TINFO, "wait for all children to stop."); |
| for (k = 0; k < num; k++) { |
| if (waitpid(child[k], &status, WUNTRACED) == -1) |
| tst_brkm(TBROK|TERRNO, cleanup, "waitpid"); |
| if (!WIFSTOPPED(status)) |
| tst_brkm(TBROK, cleanup, "child %d was not stopped", k); |
| } |
| } |
| |
| static void resume_ksm_children(int *child, int num) |
| { |
| int k; |
| |
| tst_resm(TINFO, "resume all children."); |
| for (k = 0; k < num; k++) { |
| if (kill(child[k], SIGCONT) == -1) |
| tst_brkm(TBROK|TERRNO, cleanup, "kill child[%d]", k); |
| } |
| fflush(stdout); |
| } |
| |
| void create_same_memory(int size, int num, int unit) |
| { |
| int i, j, status, *child; |
| unsigned long ps, pages; |
| struct ksm_merge_data **ksm_data; |
| |
| struct ksm_merge_data ksm_data0[] = { |
| {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB}, |
| }; |
| struct ksm_merge_data ksm_data1[] = { |
| {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1}, |
| }; |
| struct ksm_merge_data ksm_data2[] = { |
| {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB}, |
| }; |
| |
| ps = sysconf(_SC_PAGE_SIZE); |
| pages = MB / ps; |
| |
| ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *)); |
| /* Since from third child, the data is same with the first child's */ |
| for (i = 0; i < num - 3; i++) { |
| ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data)); |
| for (j = 0; j < 4; j++) { |
| ksm_data[i][j].data = ksm_data0[j].data; |
| ksm_data[i][j].mergeable_size = |
| ksm_data0[j].mergeable_size; |
| } |
| } |
| |
| child = malloc(num * sizeof(int)); |
| if (child == NULL) |
| tst_brkm(TBROK | TERRNO, cleanup, "malloc"); |
| |
| for (i = 0; i < num; i++) { |
| fflush(stdout); |
| switch (child[i] = fork()) { |
| case -1: |
| tst_brkm(TBROK|TERRNO, cleanup, "fork"); |
| case 0: |
| if (i == 0) { |
| create_ksm_child(i, size, unit, ksm_data0); |
| exit(0); |
| } else if (i == 1) { |
| create_ksm_child(i, size, unit, ksm_data1); |
| exit(0); |
| } else if (i == 2) { |
| create_ksm_child(i, size, unit, ksm_data2); |
| exit(0); |
| } else { |
| create_ksm_child(i, size, unit, ksm_data[i-3]); |
| exit(0); |
| } |
| } |
| } |
| |
| stop_ksm_children(child, num); |
| |
| tst_resm(TINFO, "KSM merging..."); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1"); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld", |
| size * pages *num); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0"); |
| |
| resume_ksm_children(child, num); |
| group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num); |
| |
| stop_ksm_children(child, num); |
| resume_ksm_children(child, num); |
| group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num); |
| |
| stop_ksm_children(child, num); |
| resume_ksm_children(child, num); |
| group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num); |
| |
| stop_ksm_children(child, num); |
| resume_ksm_children(child, num); |
| group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num); |
| |
| stop_ksm_children(child, num); |
| |
| tst_resm(TINFO, "KSM unmerging..."); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2"); |
| |
| resume_ksm_children(child, num); |
| group_check(2, 0, 0, 0, 0, 0, size * pages * num); |
| |
| tst_resm(TINFO, "stop KSM."); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "0"); |
| group_check(0, 0, 0, 0, 0, 0, size * pages * num); |
| |
| while (waitpid(-1, &status, WUNTRACED | WCONTINUED) > 0) |
| if (WEXITSTATUS(status) != 0) |
| tst_resm(TFAIL, "child exit status is %d", |
| WEXITSTATUS(status)); |
| } |
| |
| void test_ksm_merge_across_nodes(unsigned long nr_pages) |
| { |
| char **memory; |
| int i, ret; |
| int num_nodes, *nodes; |
| unsigned long length; |
| unsigned long pagesize; |
| |
| #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \ |
| && HAVE_MPOL_CONSTANTS |
| unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 }; |
| #endif |
| |
| ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes); |
| if (ret != 0) |
| tst_brkm(TBROK|TERRNO, cleanup, "get_allowed_nodes_arr"); |
| if (num_nodes < 2) { |
| tst_resm(TINFO, "need NUMA system support"); |
| free(nodes); |
| return; |
| } |
| |
| pagesize = sysconf(_SC_PAGE_SIZE); |
| length = nr_pages * pagesize; |
| |
| memory = malloc(num_nodes * sizeof(char *)); |
| for (i = 0; i < num_nodes; i++) { |
| memory[i] = mmap(NULL, length, PROT_READ|PROT_WRITE, |
| MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
| if (memory[i] == MAP_FAILED) |
| tst_brkm(TBROK|TERRNO, tst_exit, "mmap"); |
| #ifdef HAVE_MADV_MERGEABLE |
| if (madvise(memory[i], length, MADV_MERGEABLE) == -1) |
| tst_brkm(TBROK|TERRNO, tst_exit, "madvise"); |
| #endif |
| |
| #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \ |
| && HAVE_MPOL_CONSTANTS |
| clean_node(nmask); |
| set_node(nmask, nodes[i]); |
| /* |
| * Use mbind() to make sure each node contains |
| * length size memory. |
| */ |
| ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0); |
| if (ret == -1) |
| tst_brkm(TBROK|TERRNO, tst_exit, "mbind"); |
| #endif |
| |
| memset(memory[i], 10, length); |
| } |
| |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0"); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld", |
| nr_pages * num_nodes); |
| /* |
| * merge_across_nodes setting can be changed only when there |
| * are no ksm shared pages in system, so set run 2 to unmerge |
| * pages first, then to 1 after changing merge_across_nodes, |
| * to remerge according to the new setting. |
| */ |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2"); |
| wait_ksmd_done(); |
| tst_resm(TINFO, "Start to test KSM with merge_across_nodes=1"); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "1"); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1"); |
| group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0, |
| nr_pages * num_nodes); |
| |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2"); |
| wait_ksmd_done(); |
| tst_resm(TINFO, "Start to test KSM with merge_across_nodes=0"); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "0"); |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1"); |
| group_check(1, num_nodes, nr_pages * num_nodes - num_nodes, |
| 0, 0, 0, nr_pages * num_nodes); |
| |
| SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2"); |
| wait_ksmd_done(); |
| } |
| |
| void check_ksm_options(int *size, int *num, int *unit) |
| { |
| if (opt_size) { |
| *size = atoi(opt_sizestr); |
| if (*size < 1) |
| tst_brkm(TBROK, cleanup, "size cannot be less than 1."); |
| } |
| if (opt_unit) { |
| *unit = atoi(opt_unitstr); |
| if (*unit > *size) |
| tst_brkm(TBROK, cleanup, |
| "unit cannot be greater than size."); |
| if (*size % *unit != 0) |
| tst_brkm(TBROK, cleanup, |
| "the remainder of division of size by unit is " |
| "not zero."); |
| } |
| if (opt_num) { |
| *num = atoi(opt_numstr); |
| if (*num < 3) |
| tst_brkm(TBROK, cleanup, |
| "process number cannot be less 3."); |
| } |
| } |
| |
| void ksm_usage(void) |
| { |
| printf(" -n Number of processes\n"); |
| printf(" -s Memory allocation size in MB\n"); |
| printf(" -u Memory allocation unit in MB\n"); |
| } |
| |
| /* THP */ |
| |
| static int alloc_transparent_hugepages(int nr_thps, int hg_aligned) |
| { |
| unsigned long hugepagesize, size; |
| void *addr; |
| int ret; |
| |
| hugepagesize = read_meminfo("Hugepagesize:") * KB; |
| size = nr_thps * hugepagesize; |
| |
| if (hg_aligned) { |
| ret = posix_memalign(&addr, hugepagesize, size); |
| if (ret != 0) { |
| printf("posix_memalign failed\n"); |
| return -1; |
| } |
| } else { |
| addr = mmap(NULL, size, PROT_READ|PROT_WRITE, |
| MAP_PRIVATE|MAP_ANON, -1, 0); |
| if (addr == MAP_FAILED) { |
| perror("mmap"); |
| return -1; |
| } |
| } |
| |
| memset(addr, 10, size); |
| |
| tst_resm(TINFO, "child[%d] stop here", getpid()); |
| /* |
| * stop here, until the father finish to calculate |
| * all the transparent hugepages. |
| */ |
| if (raise(SIGSTOP) == -1) { |
| perror("kill"); |
| return -1; |
| } |
| |
| return 0; |
| } |
| |
| static void khugepaged_scan_done(void) |
| { |
| int changing = 1, count = 0, interval; |
| long old_pages_collapsed = 0, old_max_ptes_none = 0, |
| old_pages_to_scan = 0; |
| long pages_collapsed = 0, max_ptes_none = 0, pages_to_scan = 0; |
| |
| /* |
| * as 'khugepaged' run 100% during testing, so 5s is an |
| * enough interval for us to recognize if 'khugepaged' |
| * finish scanning proceses' anonymous hugepages or not. |
| */ |
| interval = 5; |
| |
| while (changing) { |
| sleep(interval); |
| count++; |
| |
| SAFE_FILE_SCANF(cleanup, PATH_KHPD "pages_collapsed", |
| "%ld", &pages_collapsed); |
| SAFE_FILE_SCANF(cleanup, PATH_KHPD "max_ptes_none", |
| "%ld", &max_ptes_none); |
| SAFE_FILE_SCANF(cleanup, PATH_KHPD "pages_to_scan", |
| "%ld", &pages_to_scan); |
| |
| if (pages_collapsed != old_pages_collapsed || |
| max_ptes_none != old_max_ptes_none || |
| pages_to_scan != old_pages_to_scan) { |
| old_pages_collapsed = pages_collapsed; |
| old_max_ptes_none = max_ptes_none; |
| old_pages_to_scan = pages_to_scan; |
| } else { |
| changing = 0; |
| } |
| } |
| |
| tst_resm(TINFO, "khugepaged daemon takes %ds to scan all thp pages", |
| count * interval); |
| } |
| |
| static void verify_thp_size(int *children, int nr_children, int nr_thps) |
| { |
| FILE *fp; |
| char path[BUFSIZ], buf[BUFSIZ], line[BUFSIZ]; |
| int i, ret; |
| long expect_thps; /* the amount of per child's transparent hugepages */ |
| long val, actual_thps; |
| long hugepagesize; |
| |
| hugepagesize = read_meminfo("Hugepagesize:"); |
| expect_thps = nr_thps * hugepagesize; |
| |
| for (i = 0; i < nr_children; i++) { |
| actual_thps = 0; |
| |
| snprintf(path, BUFSIZ, "/proc/%d/smaps", children[i]); |
| fp = fopen(path, "r"); |
| while (fgets(line, BUFSIZ, fp) != NULL) { |
| ret = sscanf(line, "%64s %ld", buf, &val); |
| if (ret == 2 && val != 0) { |
| if (strcmp(buf, "AnonHugePages:") == 0) |
| actual_thps += val; |
| } |
| } |
| |
| if (actual_thps != expect_thps) |
| tst_resm(TFAIL, "child[%d] got %ldKB thps - expect %ld" |
| "KB thps", getpid(), actual_thps, expect_thps); |
| fclose(fp); |
| } |
| } |
| |
| void test_transparent_hugepage(int nr_children, int nr_thps, |
| int hg_aligned, int mempolicy) |
| { |
| unsigned long hugepagesize, memfree; |
| int i, *pids, ret, status; |
| |
| if (mempolicy) |
| set_global_mempolicy(mempolicy); |
| |
| memfree = read_meminfo("MemFree:"); |
| tst_resm(TINFO, "The current MemFree is %luMB", memfree / KB); |
| if (memfree < MB) |
| tst_resm(TCONF, "Not enough memory for testing"); |
| |
| hugepagesize = read_meminfo("Hugepagesize:"); |
| tst_resm(TINFO, "The current Hugepagesize is %luMB", hugepagesize / KB); |
| |
| pids = malloc(nr_children * sizeof(int)); |
| if (pids == NULL) |
| tst_brkm(TBROK | TERRNO, cleanup, "malloc"); |
| |
| for (i = 0; i < nr_children; i++) { |
| switch (pids[i] = fork()) { |
| case -1: |
| tst_brkm(TBROK | TERRNO, cleanup, "fork"); |
| |
| case 0: |
| ret = alloc_transparent_hugepages(nr_thps, hg_aligned); |
| exit(ret); |
| } |
| } |
| |
| tst_resm(TINFO, "Stop all children..."); |
| for (i = 0; i < nr_children; i++) { |
| if (waitpid(pids[i], &status, WUNTRACED) == -1) |
| tst_brkm(TBROK|TERRNO, cleanup, "waitpid"); |
| if (!WIFSTOPPED(status)) |
| tst_brkm(TBROK, cleanup, |
| "child[%d] was not stoppted", pids[i]); |
| } |
| |
| tst_resm(TINFO, "Start to scan all transparent hugepages..."); |
| khugepaged_scan_done(); |
| |
| tst_resm(TINFO, "Start to verify transparent hugepage size..."); |
| verify_thp_size(pids, nr_children, nr_thps); |
| |
| tst_resm(TINFO, "Wake up all children..."); |
| for (i = 0; i < nr_children; i++) { |
| if (kill(pids[i], SIGCONT) == -1) |
| tst_brkm(TBROK | TERRNO, cleanup, |
| "signal continue child[%d]", pids[i]); |
| } |
| |
| /* wait all children finish their task */ |
| for (i = 0; i < nr_children; i++) { |
| if (waitpid(pids[i], &status, 0) == -1) |
| tst_brkm(TBROK|TERRNO, cleanup, "waitpid %d", pids[i]); |
| |
| if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) |
| tst_resm(TFAIL, "the child[%d] unexpectedly failed:" |
| " %d", pids[i], status); |
| } |
| } |
| |
| void check_thp_options(int *nr_children, int *nr_thps) |
| { |
| if (opt_nr_children) |
| *nr_children = SAFE_STRTOL(NULL, opt_nr_children_str, |
| 0, LONG_MAX); |
| if (opt_nr_thps) |
| *nr_thps = SAFE_STRTOL(NULL, opt_nr_thps_str, 0, LONG_MAX); |
| } |
| |
| void thp_usage(void) |
| { |
| printf(" -n Number of processes\n"); |
| printf(" -N Number of transparent hugepages\n"); |
| } |
| |
| /* cpuset/memcg */ |
| |
| static void gather_node_cpus(char *cpus, long nd) |
| { |
| int ncpus = 0; |
| int i; |
| long online; |
| char buf[BUFSIZ]; |
| char path[BUFSIZ], path1[BUFSIZ]; |
| |
| while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus)) |
| ncpus++; |
| |
| for (i = 0; i < ncpus; i++) { |
| snprintf(path, BUFSIZ, |
| PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i); |
| if (path_exist(path)) { |
| snprintf(path1, BUFSIZ, "%s/online", path); |
| /* |
| * if there is no online knob, then the cpu cannot |
| * be taken offline |
| */ |
| if (path_exist(path1)) { |
| SAFE_FILE_SCANF(cleanup, path1, "%ld", &online); |
| if (online == 0) |
| continue; |
| } |
| sprintf(buf, "%d,", i); |
| strcat(cpus, buf); |
| } |
| } |
| /* Remove the trailing comma. */ |
| cpus[strlen(cpus) - 1] = '\0'; |
| } |
| |
| void read_cpuset_files(char *prefix, char *filename, char *retbuf) |
| { |
| int fd; |
| char path[BUFSIZ]; |
| |
| /* |
| * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX' |
| * please see Documentation/cgroups/cpusets.txt from kernel src |
| * for details |
| */ |
| snprintf(path, BUFSIZ, "%s/%s", prefix, filename); |
| fd = open(path, O_RDONLY); |
| if (fd == -1) { |
| if (errno == ENOENT) { |
| snprintf(path, BUFSIZ, "%s/cpuset.%s", |
| prefix, filename); |
| fd = open(path, O_RDONLY); |
| if (fd == -1) |
| tst_brkm(TBROK | TERRNO, cleanup, |
| "open %s", path); |
| } else |
| tst_brkm(TBROK | TERRNO, cleanup, "open %s", path); |
| } |
| if (read(fd, retbuf, BUFSIZ) < 0) |
| tst_brkm(TBROK | TERRNO, cleanup, "read %s", path); |
| close(fd); |
| } |
| |
| void write_cpuset_files(char *prefix, char *filename, char *buf) |
| { |
| int fd; |
| char path[BUFSIZ]; |
| |
| /* |
| * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX' |
| * please see Documentation/cgroups/cpusets.txt from kernel src |
| * for details |
| */ |
| snprintf(path, BUFSIZ, "%s/%s", prefix, filename); |
| fd = open(path, O_WRONLY); |
| if (fd == -1) { |
| if (errno == ENOENT) { |
| snprintf(path, BUFSIZ, "%s/cpuset.%s", |
| prefix, filename); |
| fd = open(path, O_WRONLY); |
| if (fd == -1) |
| tst_brkm(TBROK | TERRNO, cleanup, |
| "open %s", path); |
| } else |
| tst_brkm(TBROK | TERRNO, cleanup, "open %s", path); |
| } |
| if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) |
| tst_brkm(TBROK | TERRNO, cleanup, "write %s", path); |
| close(fd); |
| } |
| |
| void write_cpusets(long nd) |
| { |
| char buf[BUFSIZ]; |
| char cpus[BUFSIZ] = ""; |
| |
| snprintf(buf, BUFSIZ, "%ld", nd); |
| write_cpuset_files(CPATH_NEW, "mems", buf); |
| |
| gather_node_cpus(cpus, nd); |
| /* |
| * If the 'nd' node doesn't contain any CPUs, |
| * the first ID of CPU '0' will be used as |
| * the value of cpuset.cpus. |
| */ |
| if (strlen(cpus) != 0) { |
| write_cpuset_files(CPATH_NEW, "cpus", cpus); |
| } else { |
| tst_resm(TINFO, "No CPUs in the node%ld; " |
| "using only CPU0", nd); |
| write_cpuset_files(CPATH_NEW, "cpus", "0"); |
| } |
| |
| SAFE_FILE_PRINTF(NULL, CPATH_NEW "/tasks", "%d", getpid()); |
| } |
| |
| void umount_mem(char *path, char *path_new) |
| { |
| FILE *fp; |
| int fd; |
| char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ]; |
| |
| /* Move all processes in task to its parent node. */ |
| sprintf(s, "%s/tasks", path); |
| fd = open(s, O_WRONLY); |
| if (fd == -1) |
| tst_resm(TWARN | TERRNO, "open %s", s); |
| |
| snprintf(s_new, BUFSIZ, "%s/tasks", path_new); |
| fp = fopen(s_new, "r"); |
| if (fp == NULL) |
| tst_resm(TWARN | TERRNO, "fopen %s", s_new); |
| if ((fd != -1) && (fp != NULL)) { |
| while (fgets(value, BUFSIZ, fp) != NULL) |
| if (write(fd, value, strlen(value) - 1) |
| != (ssize_t)strlen(value) - 1) |
| tst_resm(TWARN | TERRNO, "write %s", s); |
| } |
| if (fd != -1) |
| close(fd); |
| if (fp != NULL) |
| fclose(fp); |
| if (rmdir(path_new) == -1) |
| tst_resm(TWARN | TERRNO, "rmdir %s", path_new); |
| if (umount(path) == -1) |
| tst_resm(TWARN | TERRNO, "umount %s", path); |
| if (rmdir(path) == -1) |
| tst_resm(TWARN | TERRNO, "rmdir %s", path); |
| } |
| |
| void mount_mem(char *name, char *fs, char *options, char *path, char *path_new) |
| { |
| if (mkdir(path, 0777) == -1) |
| tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path); |
| if (mount(name, path, fs, 0, options) == -1) { |
| if (errno == ENODEV) { |
| if (rmdir(path) == -1) |
| tst_resm(TWARN | TERRNO, "rmdir %s failed", |
| path); |
| tst_brkm(TCONF, NULL, |
| "file system %s is not configured in kernel", |
| fs); |
| } |
| tst_brkm(TBROK | TERRNO, cleanup, "mount %s", path); |
| } |
| if (mkdir(path_new, 0777) == -1) |
| tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path_new); |
| } |
| |
| /* shared */ |
| |
| /* Warning: *DO NOT* use this function in child */ |
| unsigned int get_a_numa_node(void (*cleanup_fn) (void)) |
| { |
| unsigned int nd1, nd2; |
| int ret; |
| |
| ret = get_allowed_nodes(0, 2, &nd1, &nd2); |
| switch (ret) { |
| case 0: |
| break; |
| case -3: |
| tst_brkm(TCONF, cleanup_fn, "requires a NUMA system."); |
| default: |
| tst_brkm(TBROK | TERRNO, cleanup_fn, "1st get_allowed_nodes"); |
| } |
| |
| ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1); |
| switch (ret) { |
| case 0: |
| tst_resm(TINFO, "get node%u.", nd1); |
| return nd1; |
| case -3: |
| tst_brkm(TCONF, cleanup_fn, "requires a NUMA system that has " |
| "at least one node with both memory and CPU " |
| "available."); |
| default: |
| break; |
| } |
| tst_brkm(TBROK | TERRNO, cleanup_fn, "2nd get_allowed_nodes"); |
| } |
| |
| int path_exist(const char *path, ...) |
| { |
| va_list ap; |
| char pathbuf[PATH_MAX]; |
| |
| va_start(ap, path); |
| vsnprintf(pathbuf, sizeof(pathbuf), path, ap); |
| va_end(ap); |
| |
| return access(pathbuf, F_OK) == 0; |
| } |
| |
| long read_meminfo(char *item) |
| { |
| FILE *fp; |
| char line[BUFSIZ], buf[BUFSIZ]; |
| long val; |
| |
| fp = fopen(PATH_MEMINFO, "r"); |
| if (fp == NULL) |
| tst_brkm(TBROK | TERRNO, cleanup, "fopen %s", PATH_MEMINFO); |
| |
| while (fgets(line, BUFSIZ, fp) != NULL) { |
| if (sscanf(line, "%64s %ld", buf, &val) == 2) |
| if (strcmp(buf, item) == 0) { |
| fclose(fp); |
| return val; |
| } |
| continue; |
| } |
| fclose(fp); |
| |
| tst_brkm(TBROK, cleanup, "cannot find \"%s\" in %s", |
| item, PATH_MEMINFO); |
| } |
| |
| void set_sys_tune(char *sys_file, long tune, int check) |
| { |
| long val; |
| char path[BUFSIZ]; |
| |
| tst_resm(TINFO, "set %s to %ld", sys_file, tune); |
| |
| snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file); |
| SAFE_FILE_PRINTF(NULL, path, "%ld", tune); |
| |
| if (check) { |
| val = get_sys_tune(sys_file); |
| if (val != tune) |
| tst_brkm(TBROK, cleanup, "%s = %ld, but expect %ld", |
| sys_file, val, tune); |
| } |
| } |
| |
| long get_sys_tune(char *sys_file) |
| { |
| char path[BUFSIZ]; |
| long tune; |
| |
| snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file); |
| SAFE_FILE_SCANF(NULL, path, "%ld", &tune); |
| |
| return tune; |
| } |
| |
| void update_shm_size(size_t * shm_size) |
| { |
| size_t shmmax; |
| |
| SAFE_FILE_SCANF(cleanup, PATH_SHMMAX, "%ld", &shmmax); |
| if (*shm_size > shmmax) { |
| tst_resm(TINFO, "Set shm_size to shmmax: %ld", shmmax); |
| *shm_size = shmmax; |
| } |
| } |