blob: 7cee7037b1f21bd8232d894bf59694dc934c34b3 [file] [log] [blame]
Jan Stancekcfdc4f72012-06-28 11:03:15 +02001/*
2 * Copyright (C) 2012 Linux Test Project, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "config.h"
20#include <errno.h>
21#if HAVE_NUMA_H
22#include <numa.h>
23#endif
24#if HAVE_NUMAIF_H
25#include <numaif.h>
26#endif
27#include <stdarg.h>
28#include <stdio.h>
29#include <string.h>
Jan Stancek78de6502012-08-09 14:15:37 +080030#include <stdlib.h>
Jan Stancekcfdc4f72012-06-28 11:03:15 +020031#include <unistd.h>
32#include <errno.h>
33
34#include "test.h"
Jan Stancekcfdc4f72012-06-28 11:03:15 +020035#include "safe_macros.h"
36#include "numa_helper.h"
37#include "linux_syscall_numbers.h"
38
Jan Stancek7312c442012-10-26 15:31:15 +020039unsigned long get_max_node(void)
Jan Stancek78de6502012-08-09 14:15:37 +080040{
41 unsigned long max_node = 0;
Jan Stancek7312c442012-10-26 15:31:15 +020042#if HAVE_NUMA_H
Jan Stancek7bf70142012-07-12 16:09:47 +020043#if !defined(LIBNUMA_API_VERSION) || LIBNUMA_API_VERSION < 2
44 max_node = NUMA_NUM_NODES;
45 /*
46 * NUMA_NUM_NODES is not reliable, libnuma >=2 is looking
47 * at /proc/self/status to figure out correct number.
48 * If buffer is not large enough get_mempolicy will fail with EINVAL.
49 */
50 if (max_node < 1024)
51 max_node = 1024;
52#else
53 max_node = numa_max_possible_node() + 1;
Jan Stancekcfdc4f72012-06-28 11:03:15 +020054#endif
Jan Stancek7312c442012-10-26 15:31:15 +020055#endif /* HAVE_NUMA_H */
Jan Stancek78de6502012-08-09 14:15:37 +080056 return max_node;
57}
Jan Stancek7bf70142012-07-12 16:09:47 +020058
Jan Stancek7312c442012-10-26 15:31:15 +020059#if HAVE_NUMA_H
Wanlong Gao354ebb42012-12-07 10:10:04 +080060static void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node)
Jan Stancek78de6502012-08-09 14:15:37 +080061{
Stanislav Kholmanskikh7037cea2013-08-27 16:49:52 +040062 unsigned long nodemask_size = max_node / 8;
Jan Stancek78de6502012-08-09 14:15:37 +080063 int i;
64 char fn[64];
65 struct stat st;
Jan Stancekcfdc4f72012-06-28 11:03:15 +020066
Jan Stancek78de6502012-08-09 14:15:37 +080067 memset(nodemask, 0, nodemask_size);
68 for (i = 0; i < max_node; i++) {
69 sprintf(fn, "/sys/devices/system/node/node%d", i);
70 if (stat(fn, &st) == 0)
71 nodemask_set(nodemask, i);
Jan Stancekcfdc4f72012-06-28 11:03:15 +020072 }
Jan Stancek78de6502012-08-09 14:15:37 +080073}
Jan Stancekcfdc4f72012-06-28 11:03:15 +020074
Wanlong Gao354ebb42012-12-07 10:10:04 +080075static int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node)
Jan Stancek78de6502012-08-09 14:15:37 +080076{
Jan Stancek7bf70142012-07-12 16:09:47 +020077#if MPOL_F_MEMS_ALLOWED
Stanislav Kholmanskikh7037cea2013-08-27 16:49:52 +040078 unsigned long nodemask_size = max_node / 8;
Jan Stancek78de6502012-08-09 14:15:37 +080079 memset(nodemask, 0, nodemask_size);
Jan Stancekcfdc4f72012-06-28 11:03:15 +020080 /*
81 * avoid numa_get_mems_allowed(), because of bug in getpol()
82 * utility function in older versions:
83 * http://www.spinics.net/lists/linux-numa/msg00849.html
Jan Stancek75fd07a2016-02-23 11:00:31 +010084 *
85 * At the moment numa_available() implementation also uses
86 * get_mempolicy, but let's make explicit check for ENOSYS
87 * here as well in case it changes in future. Silent ignore
88 * of ENOSYS is OK, because without NUMA caller gets empty
89 * set of nodes anyway.
Jan Stancekcfdc4f72012-06-28 11:03:15 +020090 */
Jan Stancek75fd07a2016-02-23 11:00:31 +010091 if (syscall(__NR_get_mempolicy, NULL, nodemask->n,
92 max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) {
93 if (errno == ENOSYS)
94 return 0;
Jan Stancekcfdc4f72012-06-28 11:03:15 +020095 return -2;
Jan Stancek75fd07a2016-02-23 11:00:31 +010096 }
Jan Stancek7bf70142012-07-12 16:09:47 +020097#else
Jan Stancek78de6502012-08-09 14:15:37 +080098 int i;
Jan Stancek7bf70142012-07-12 16:09:47 +020099 /*
100 * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume
101 * that we can use any node with memory > 0
102 */
Jan Stancek7bf70142012-07-12 16:09:47 +0200103 for (i = 0; i < max_node; i++) {
Jan Stancek78de6502012-08-09 14:15:37 +0800104 if (!nodemask_isset(nodemask, i))
105 continue;
106 if (numa_node_size64(i, NULL) <= 0)
107 nodemask_clr(nodemask, i);
108 }
109#endif /* MPOL_F_MEMS_ALLOWED */
110 return 0;
111}
112
113static int cpumask_has_cpus(char *cpumask, size_t len)
114{
115 int j;
116 for (j = 0; j < len; j++)
117 if (cpumask[j] == '\0')
118 return 0;
119 else if ((cpumask[j] > '0' && cpumask[j] <= '9') ||
Wanlong Gao354ebb42012-12-07 10:10:04 +0800120 (cpumask[j] >= 'a' && cpumask[j] <= 'f'))
Jan Stancek78de6502012-08-09 14:15:37 +0800121 return 1;
122 return 0;
123
124}
125
Wanlong Gao354ebb42012-12-07 10:10:04 +0800126static void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node)
Jan Stancek78de6502012-08-09 14:15:37 +0800127{
128 char *cpumask = NULL;
129 char fn[64];
130 FILE *f;
131 size_t len;
132 int i, ret;
133
134 for (i = 0; i < max_node; i++) {
135 if (!nodemask_isset(nodemask, i))
136 continue;
137 sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i);
138 f = fopen(fn, "r");
139 if (f) {
140 ret = getdelim(&cpumask, &len, '\n', f);
141 if ((ret > 0) && (!cpumask_has_cpus(cpumask, len)))
142 nodemask_clr(nodemask, i);
143 fclose(f);
Jan Stancekcfdc4f72012-06-28 11:03:15 +0200144 }
145 }
Jan Stancek78de6502012-08-09 14:15:37 +0800146 free(cpumask);
147}
Jan Stancek7bf70142012-07-12 16:09:47 +0200148#endif /* HAVE_NUMA_H */
Jan Stancek78de6502012-08-09 14:15:37 +0800149
150/*
151 * get_allowed_nodes_arr - get number and array of available nodes
152 * @num_nodes: pointer where number of available nodes will be stored
153 * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED
154 * node bitmask compacted (without holes), so that each field
155 * contains node number. If NULL only num_nodes is
156 * returned, otherwise it cotains new allocated array,
157 * which caller is responsible to free.
158 * RETURNS:
159 * 0 on success
160 * -1 on allocation failure
161 * -2 on get_mempolicy failure
162 */
163int get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes)
164{
165 int ret = 0;
166#if HAVE_NUMA_H
167 int i;
168 nodemask_t *nodemask = NULL;
169#endif
170 *num_nodes = 0;
171 if (nodes)
172 *nodes = NULL;
173
174#if HAVE_NUMA_H
Jan Stancek75fd07a2016-02-23 11:00:31 +0100175 unsigned long max_node, nodemask_size;
176
177 if (numa_available() == -1)
178 return 0;
179
180 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
181 nodemask_size = max_node / 8;
Jan Stancek78de6502012-08-09 14:15:37 +0800182
183 nodemask = malloc(nodemask_size);
184 if (nodes)
Wanlong Gao354ebb42012-12-07 10:10:04 +0800185 *nodes = malloc(sizeof(int) * max_node);
Jan Stancek78de6502012-08-09 14:15:37 +0800186
187 do {
Wanlong Gao354ebb42012-12-07 10:10:04 +0800188 if (nodemask == NULL || (nodes && (*nodes == NULL))) {
Jan Stancek78de6502012-08-09 14:15:37 +0800189 ret = -1;
190 break;
191 }
192
193 /* allow all nodes at start, then filter based on flags */
194 get_nodemask_allnodes(nodemask, max_node);
195 if ((flag & NH_MEMS) == NH_MEMS) {
196 ret = filter_nodemask_mem(nodemask, max_node);
197 if (ret < 0)
198 break;
199 }
200 if ((flag & NH_CPUS) == NH_CPUS)
201 filter_nodemask_cpu(nodemask, max_node);
202
Wanlong Gao354ebb42012-12-07 10:10:04 +0800203 for (i = 0; i < max_node; i++) {
Jan Stancek78de6502012-08-09 14:15:37 +0800204 if (nodemask_isset(nodemask, i)) {
205 if (nodes)
206 (*nodes)[*num_nodes] = i;
207 (*num_nodes)++;
208 }
209 }
210 } while (0);
211 free(nodemask);
212#endif
213 return ret;
Jan Stancekcfdc4f72012-06-28 11:03:15 +0200214}
215
216/*
217 * get_allowed_nodes - convenience function to get fixed number of nodes
218 * @count: how many nodes to get
219 * @...: int pointers, where node ids will be stored
220 * RETURNS:
221 * 0 on success
222 * -1 on allocation failure
223 * -2 on get_mempolicy failure
224 * -3 on not enough allowed nodes
225 */
Jan Stancek78de6502012-08-09 14:15:37 +0800226int get_allowed_nodes(int flag, int count, ...)
Jan Stancekcfdc4f72012-06-28 11:03:15 +0200227{
228 int ret;
229 int i, *nodep;
230 va_list ap;
231 int num_nodes = 0;
232 int *nodes = NULL;
233
Jan Stancek78de6502012-08-09 14:15:37 +0800234 ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes);
Wanlong Gao6d1110b2012-07-13 14:57:46 +0800235 if (ret < 0)
Jan Stancekcfdc4f72012-06-28 11:03:15 +0200236 return ret;
237
238 va_start(ap, count);
239 for (i = 0; i < count; i++) {
240 nodep = va_arg(ap, int *);
241 if (i < num_nodes) {
242 *nodep = nodes[i];
243 } else {
244 ret = -3;
245 errno = EINVAL;
246 break;
247 }
248 }
249 free(nodes);
250 va_end(ap);
251
252 return ret;
253}
Jan Stancek78de6502012-08-09 14:15:37 +0800254
255static void print_node_info(int flag)
256{
257 int *allowed_nodes = NULL;
258 int i, ret, num_nodes;
259
260 ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes);
261 printf("nodes (flag=%d): ", flag);
262 if (ret == 0) {
263 for (i = 0; i < num_nodes; i++)
264 printf("%d ", allowed_nodes[i]);
265 printf("\n");
266 } else
267 printf("error(%d)\n", ret);
268 free(allowed_nodes);
269}
270
271/*
272 * nh_dump_nodes - dump info about nodes to stdout
273 */
Mike Frysingere948e1d2014-07-22 07:31:29 -0400274void nh_dump_nodes(void)
Jan Stancek78de6502012-08-09 14:15:37 +0800275{
276 print_node_info(0);
277 print_node_info(NH_MEMS);
278 print_node_info(NH_CPUS);
279 print_node_info(NH_MEMS | NH_CPUS);
280}
Zhouping Liu798adcd2013-03-19 11:40:51 +0800281
282/*
283 * is_numa - judge a system is NUMA system or not
Jan Stancekb8c69af2016-02-23 11:00:32 +0100284 * @flag: NH_MEMS and/or NH_CPUS
285 * @min_nodes: find at least 'min_nodes' nodes with memory
286 * NOTE: the function is designed to try to find at least 'min_nodes'
287 * available nodes, where each node contains memory.
Zhouping Liu798adcd2013-03-19 11:40:51 +0800288 * WARN: Don't use this func in child, as it calls tst_brkm()
289 * RETURNS:
290 * 0 - it's not a NUMA system
291 * 1 - it's a NUMA system
292 */
Jan Stancekb8c69af2016-02-23 11:00:32 +0100293int is_numa(void (*cleanup_fn)(void), int flag, int min_nodes)
Zhouping Liu798adcd2013-03-19 11:40:51 +0800294{
295 int ret;
296 int numa_nodes = 0;
297
Jan Stancekb8c69af2016-02-23 11:00:32 +0100298 ret = get_allowed_nodes_arr(flag, &numa_nodes, NULL);
Zhouping Liu798adcd2013-03-19 11:40:51 +0800299 if (ret < 0)
300 tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr");
301
Jan Stancekb8c69af2016-02-23 11:00:32 +0100302 if (numa_nodes >= min_nodes)
Zhouping Liu798adcd2013-03-19 11:40:51 +0800303 return 1;
304 else
305 return 0;
306}