Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 Linux Test Project, Inc. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify |
| 5 | * it under the terms of the GNU General Public License as published by |
| 6 | * the Free Software Foundation; either version 2 of the License, or |
| 7 | * (at your option) any later version. |
| 8 | * |
| 9 | * This program is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
| 12 | * the GNU General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU General Public License |
| 15 | * along with this program; if not, write to the Free Software |
| 16 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | #include "config.h" |
| 20 | #include <errno.h> |
| 21 | #if HAVE_NUMA_H |
| 22 | #include <numa.h> |
| 23 | #endif |
| 24 | #if HAVE_NUMAIF_H |
| 25 | #include <numaif.h> |
| 26 | #endif |
| 27 | #include <stdarg.h> |
| 28 | #include <stdio.h> |
| 29 | #include <string.h> |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 30 | #include <stdlib.h> |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 31 | #include <unistd.h> |
| 32 | #include <errno.h> |
| 33 | |
| 34 | #include "test.h" |
| 35 | #include "usctest.h" |
| 36 | #include "safe_macros.h" |
| 37 | #include "numa_helper.h" |
| 38 | #include "linux_syscall_numbers.h" |
| 39 | |
Jan Stancek | 7312c44 | 2012-10-26 15:31:15 +0200 | [diff] [blame] | 40 | unsigned long get_max_node(void) |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 41 | { |
| 42 | unsigned long max_node = 0; |
Jan Stancek | 7312c44 | 2012-10-26 15:31:15 +0200 | [diff] [blame] | 43 | #if HAVE_NUMA_H |
Jan Stancek | 7bf7014 | 2012-07-12 16:09:47 +0200 | [diff] [blame] | 44 | #if !defined(LIBNUMA_API_VERSION) || LIBNUMA_API_VERSION < 2 |
| 45 | max_node = NUMA_NUM_NODES; |
| 46 | /* |
| 47 | * NUMA_NUM_NODES is not reliable, libnuma >=2 is looking |
| 48 | * at /proc/self/status to figure out correct number. |
| 49 | * If buffer is not large enough get_mempolicy will fail with EINVAL. |
| 50 | */ |
| 51 | if (max_node < 1024) |
| 52 | max_node = 1024; |
| 53 | #else |
| 54 | max_node = numa_max_possible_node() + 1; |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 55 | #endif |
Jan Stancek | 7312c44 | 2012-10-26 15:31:15 +0200 | [diff] [blame] | 56 | #endif /* HAVE_NUMA_H */ |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 57 | return max_node; |
| 58 | } |
Jan Stancek | 7bf7014 | 2012-07-12 16:09:47 +0200 | [diff] [blame] | 59 | |
Jan Stancek | 7312c44 | 2012-10-26 15:31:15 +0200 | [diff] [blame] | 60 | #if HAVE_NUMA_H |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 61 | static void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node) |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 62 | { |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 63 | unsigned long nodemask_size = max_node / 8 + 1; |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 64 | int i; |
| 65 | char fn[64]; |
| 66 | struct stat st; |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 67 | |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 68 | memset(nodemask, 0, nodemask_size); |
| 69 | for (i = 0; i < max_node; i++) { |
| 70 | sprintf(fn, "/sys/devices/system/node/node%d", i); |
| 71 | if (stat(fn, &st) == 0) |
| 72 | nodemask_set(nodemask, i); |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 73 | } |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 74 | } |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 75 | |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 76 | static int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node) |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 77 | { |
Jan Stancek | 7bf7014 | 2012-07-12 16:09:47 +0200 | [diff] [blame] | 78 | #if MPOL_F_MEMS_ALLOWED |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 79 | unsigned long nodemask_size = max_node / 8 + 1; |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 80 | memset(nodemask, 0, nodemask_size); |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 81 | /* |
| 82 | * avoid numa_get_mems_allowed(), because of bug in getpol() |
| 83 | * utility function in older versions: |
| 84 | * http://www.spinics.net/lists/linux-numa/msg00849.html |
| 85 | */ |
Jan Stancek | 359980f | 2013-02-15 10:16:05 +0100 | [diff] [blame] | 86 | if (ltp_syscall(__NR_get_mempolicy, NULL, nodemask->n, |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 87 | max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 88 | return -2; |
Jan Stancek | 7bf7014 | 2012-07-12 16:09:47 +0200 | [diff] [blame] | 89 | #else |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 90 | int i; |
Jan Stancek | 7bf7014 | 2012-07-12 16:09:47 +0200 | [diff] [blame] | 91 | /* |
| 92 | * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume |
| 93 | * that we can use any node with memory > 0 |
| 94 | */ |
Jan Stancek | 7bf7014 | 2012-07-12 16:09:47 +0200 | [diff] [blame] | 95 | for (i = 0; i < max_node; i++) { |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 96 | if (!nodemask_isset(nodemask, i)) |
| 97 | continue; |
| 98 | if (numa_node_size64(i, NULL) <= 0) |
| 99 | nodemask_clr(nodemask, i); |
| 100 | } |
| 101 | #endif /* MPOL_F_MEMS_ALLOWED */ |
| 102 | return 0; |
| 103 | } |
| 104 | |
| 105 | static int cpumask_has_cpus(char *cpumask, size_t len) |
| 106 | { |
| 107 | int j; |
| 108 | for (j = 0; j < len; j++) |
| 109 | if (cpumask[j] == '\0') |
| 110 | return 0; |
| 111 | else if ((cpumask[j] > '0' && cpumask[j] <= '9') || |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 112 | (cpumask[j] >= 'a' && cpumask[j] <= 'f')) |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 113 | return 1; |
| 114 | return 0; |
| 115 | |
| 116 | } |
| 117 | |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 118 | static void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node) |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 119 | { |
| 120 | char *cpumask = NULL; |
| 121 | char fn[64]; |
| 122 | FILE *f; |
| 123 | size_t len; |
| 124 | int i, ret; |
| 125 | |
| 126 | for (i = 0; i < max_node; i++) { |
| 127 | if (!nodemask_isset(nodemask, i)) |
| 128 | continue; |
| 129 | sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i); |
| 130 | f = fopen(fn, "r"); |
| 131 | if (f) { |
| 132 | ret = getdelim(&cpumask, &len, '\n', f); |
| 133 | if ((ret > 0) && (!cpumask_has_cpus(cpumask, len))) |
| 134 | nodemask_clr(nodemask, i); |
| 135 | fclose(f); |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 136 | } |
| 137 | } |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 138 | free(cpumask); |
| 139 | } |
Jan Stancek | 7bf7014 | 2012-07-12 16:09:47 +0200 | [diff] [blame] | 140 | #endif /* HAVE_NUMA_H */ |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 141 | |
| 142 | /* |
| 143 | * get_allowed_nodes_arr - get number and array of available nodes |
| 144 | * @num_nodes: pointer where number of available nodes will be stored |
| 145 | * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED |
| 146 | * node bitmask compacted (without holes), so that each field |
| 147 | * contains node number. If NULL only num_nodes is |
| 148 | * returned, otherwise it cotains new allocated array, |
| 149 | * which caller is responsible to free. |
| 150 | * RETURNS: |
| 151 | * 0 on success |
| 152 | * -1 on allocation failure |
| 153 | * -2 on get_mempolicy failure |
| 154 | */ |
| 155 | int get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes) |
| 156 | { |
| 157 | int ret = 0; |
| 158 | #if HAVE_NUMA_H |
| 159 | int i; |
| 160 | nodemask_t *nodemask = NULL; |
| 161 | #endif |
| 162 | *num_nodes = 0; |
| 163 | if (nodes) |
| 164 | *nodes = NULL; |
| 165 | |
| 166 | #if HAVE_NUMA_H |
| 167 | unsigned long max_node = get_max_node(); |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 168 | unsigned long nodemask_size = max_node / 8 + 1; |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 169 | |
| 170 | nodemask = malloc(nodemask_size); |
| 171 | if (nodes) |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 172 | *nodes = malloc(sizeof(int) * max_node); |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 173 | |
| 174 | do { |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 175 | if (nodemask == NULL || (nodes && (*nodes == NULL))) { |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 176 | ret = -1; |
| 177 | break; |
| 178 | } |
| 179 | |
| 180 | /* allow all nodes at start, then filter based on flags */ |
| 181 | get_nodemask_allnodes(nodemask, max_node); |
| 182 | if ((flag & NH_MEMS) == NH_MEMS) { |
| 183 | ret = filter_nodemask_mem(nodemask, max_node); |
| 184 | if (ret < 0) |
| 185 | break; |
| 186 | } |
| 187 | if ((flag & NH_CPUS) == NH_CPUS) |
| 188 | filter_nodemask_cpu(nodemask, max_node); |
| 189 | |
Wanlong Gao | 354ebb4 | 2012-12-07 10:10:04 +0800 | [diff] [blame] | 190 | for (i = 0; i < max_node; i++) { |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 191 | if (nodemask_isset(nodemask, i)) { |
| 192 | if (nodes) |
| 193 | (*nodes)[*num_nodes] = i; |
| 194 | (*num_nodes)++; |
| 195 | } |
| 196 | } |
| 197 | } while (0); |
| 198 | free(nodemask); |
| 199 | #endif |
| 200 | return ret; |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 201 | } |
| 202 | |
| 203 | /* |
| 204 | * get_allowed_nodes - convenience function to get fixed number of nodes |
| 205 | * @count: how many nodes to get |
| 206 | * @...: int pointers, where node ids will be stored |
| 207 | * RETURNS: |
| 208 | * 0 on success |
| 209 | * -1 on allocation failure |
| 210 | * -2 on get_mempolicy failure |
| 211 | * -3 on not enough allowed nodes |
| 212 | */ |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 213 | int get_allowed_nodes(int flag, int count, ...) |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 214 | { |
| 215 | int ret; |
| 216 | int i, *nodep; |
| 217 | va_list ap; |
| 218 | int num_nodes = 0; |
| 219 | int *nodes = NULL; |
| 220 | |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 221 | ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes); |
Wanlong Gao | 6d1110b | 2012-07-13 14:57:46 +0800 | [diff] [blame] | 222 | if (ret < 0) |
Jan Stancek | cfdc4f7 | 2012-06-28 11:03:15 +0200 | [diff] [blame] | 223 | return ret; |
| 224 | |
| 225 | va_start(ap, count); |
| 226 | for (i = 0; i < count; i++) { |
| 227 | nodep = va_arg(ap, int *); |
| 228 | if (i < num_nodes) { |
| 229 | *nodep = nodes[i]; |
| 230 | } else { |
| 231 | ret = -3; |
| 232 | errno = EINVAL; |
| 233 | break; |
| 234 | } |
| 235 | } |
| 236 | free(nodes); |
| 237 | va_end(ap); |
| 238 | |
| 239 | return ret; |
| 240 | } |
Jan Stancek | 78de650 | 2012-08-09 14:15:37 +0800 | [diff] [blame] | 241 | |
| 242 | static void print_node_info(int flag) |
| 243 | { |
| 244 | int *allowed_nodes = NULL; |
| 245 | int i, ret, num_nodes; |
| 246 | |
| 247 | ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes); |
| 248 | printf("nodes (flag=%d): ", flag); |
| 249 | if (ret == 0) { |
| 250 | for (i = 0; i < num_nodes; i++) |
| 251 | printf("%d ", allowed_nodes[i]); |
| 252 | printf("\n"); |
| 253 | } else |
| 254 | printf("error(%d)\n", ret); |
| 255 | free(allowed_nodes); |
| 256 | } |
| 257 | |
| 258 | /* |
| 259 | * nh_dump_nodes - dump info about nodes to stdout |
| 260 | */ |
| 261 | void nh_dump_nodes() |
| 262 | { |
| 263 | print_node_info(0); |
| 264 | print_node_info(NH_MEMS); |
| 265 | print_node_info(NH_CPUS); |
| 266 | print_node_info(NH_MEMS | NH_CPUS); |
| 267 | } |
Zhouping Liu | 798adcd | 2013-03-19 11:40:51 +0800 | [diff] [blame^] | 268 | |
| 269 | /* |
| 270 | * is_numa - judge a system is NUMA system or not |
| 271 | * NOTE: the function is designed to try to find more than |
| 272 | * 1 available node, at least each node contains memory. |
| 273 | * WARN: Don't use this func in child, as it calls tst_brkm() |
| 274 | * RETURNS: |
| 275 | * 0 - it's not a NUMA system |
| 276 | * 1 - it's a NUMA system |
| 277 | */ |
| 278 | int is_numa(void (*cleanup_fn)(void)) |
| 279 | { |
| 280 | int ret; |
| 281 | int numa_nodes = 0; |
| 282 | |
| 283 | ret = get_allowed_nodes_arr(NH_MEMS, &numa_nodes, NULL); |
| 284 | if (ret < 0) |
| 285 | tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr"); |
| 286 | |
| 287 | if (numa_nodes > 1) |
| 288 | return 1; |
| 289 | else |
| 290 | return 0; |
| 291 | } |