blob: 8ed3415cbe32b4840a3e3d5a0b8f592149abc8f1 [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef KMP_AFFINITY_H
16#define KMP_AFFINITY_H
17
18extern int __kmp_affinity_compact; /* Affinity 'compact' value */
19
20class Address {
21public:
22 static const unsigned maxDepth = 32;
23 unsigned labels[maxDepth];
24 unsigned childNums[maxDepth];
25 unsigned depth;
26 unsigned leader;
27 Address(unsigned _depth)
28 : depth(_depth), leader(FALSE) {
29 }
30 Address &operator=(const Address &b) {
31 depth = b.depth;
32 for (unsigned i = 0; i < depth; i++) {
33 labels[i] = b.labels[i];
34 childNums[i] = b.childNums[i];
35 }
36 leader = FALSE;
37 return *this;
38 }
39 bool operator==(const Address &b) const {
40 if (depth != b.depth)
41 return false;
42 for (unsigned i = 0; i < depth; i++)
43 if(labels[i] != b.labels[i])
44 return false;
45 return true;
46 }
47 bool isClose(const Address &b, int level) const {
48 if (depth != b.depth)
49 return false;
50 if ((unsigned)level >= depth)
51 return true;
52 for (unsigned i = 0; i < (depth - level); i++)
53 if(labels[i] != b.labels[i])
54 return false;
55 return true;
56 }
57 bool operator!=(const Address &b) const {
58 return !operator==(b);
59 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +000060 void print() const {
61 unsigned i;
62 printf("Depth: %u --- ", depth);
63 for(i=0;i<depth;i++) {
64 printf("%u ", labels[i]);
65 }
66 }
Jonathan Peyton17078362015-09-10 19:22:07 +000067};
68
69class AddrUnsPair {
70public:
71 Address first;
72 unsigned second;
73 AddrUnsPair(Address _first, unsigned _second)
74 : first(_first), second(_second) {
75 }
76 AddrUnsPair &operator=(const AddrUnsPair &b)
77 {
78 first = b.first;
79 second = b.second;
80 return *this;
81 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +000082 void print() const {
83 printf("first = "); first.print();
84 printf(" --- second = %u", second);
85 }
86 bool operator==(const AddrUnsPair &b) const {
87 if(first != b.first) return false;
88 if(second != b.second) return false;
89 return true;
90 }
91 bool operator!=(const AddrUnsPair &b) const {
92 return !operator==(b);
93 }
Jonathan Peyton17078362015-09-10 19:22:07 +000094};
95
96
97static int
98__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
99{
100 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
101 ->first);
102 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
103 ->first);
104 unsigned depth = aa->depth;
105 unsigned i;
106 KMP_DEBUG_ASSERT(depth == bb->depth);
107 for (i = 0; i < depth; i++) {
108 if (aa->labels[i] < bb->labels[i]) return -1;
109 if (aa->labels[i] > bb->labels[i]) return 1;
110 }
111 return 0;
112}
113
114
115static int
116__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
117{
118 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
119 ->first);
120 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
121 ->first);
122 unsigned depth = aa->depth;
123 unsigned i;
124 KMP_DEBUG_ASSERT(depth == bb->depth);
125 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
126 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
127 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
128 int j = depth - i - 1;
129 if (aa->childNums[j] < bb->childNums[j]) return -1;
130 if (aa->childNums[j] > bb->childNums[j]) return 1;
131 }
132 for (; i < depth; i++) {
133 int j = i - __kmp_affinity_compact;
134 if (aa->childNums[j] < bb->childNums[j]) return -1;
135 if (aa->childNums[j] > bb->childNums[j]) return 1;
136 }
137 return 0;
138}
139
140
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000141/** A structure for holding machine-specific hierarchy info to be computed once at init.
142 This structure represents a mapping of threads to the actual machine hierarchy, or to
143 our best guess at what the hierarchy might be, for the purpose of performing an
144 efficient barrier. In the worst case, when there is no machine hierarchy information,
145 it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000146class hierarchy_info {
147public:
148 /** Good default values for number of leaves and branching factor, given no affinity information.
149 Behaves a bit like hyper barrier. */
150 static const kmp_uint32 maxLeaves=4;
151 static const kmp_uint32 minBranch=4;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000152 /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
153 or socket, packages/node, nodes/machine, etc. We don't want to get specific with
154 nomenclature. When the machine is oversubscribed we add levels to duplicate the
155 hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000156 kmp_uint32 maxLevels;
157
158 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
159 number of levels along the longest path from root to any leaf. It corresponds to the
160 number of entries in numPerLevel if we exclude all but one trailing 1. */
161 kmp_uint32 depth;
162 kmp_uint32 base_num_threads;
163 enum init_status { initialized=0, not_initialized=1, initializing=2 };
164 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
165 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
166
167 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
168 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
169 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
170 kmp_uint32 *numPerLevel;
171 kmp_uint32 *skipPerLevel;
172
173 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
174 int hier_depth = adr2os[0].first.depth;
175 int level = 0;
176 for (int i=hier_depth-1; i>=0; --i) {
177 int max = -1;
178 for (int j=0; j<num_addrs; ++j) {
179 int next = adr2os[j].first.childNums[i];
180 if (next > max) max = next;
181 }
182 numPerLevel[level] = max+1;
183 ++level;
184 }
185 }
186
187 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
188
189 void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
190
191 void init(AddrUnsPair *adr2os, int num_addrs)
192 {
193 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
194 if (bool_result == 0) { // Wait for initialization
195 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
196 return;
197 }
198 KMP_DEBUG_ASSERT(bool_result==1);
199
200 /* Added explicit initialization of the data fields here to prevent usage of dirty value
201 observed when static library is re-initialized multiple times (e.g. when
202 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
203 depth = 1;
204 resizing = 0;
205 maxLevels = 7;
206 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
207 skipPerLevel = &(numPerLevel[maxLevels]);
208 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
209 numPerLevel[i] = 1;
210 skipPerLevel[i] = 1;
211 }
212
213 // Sort table by physical ID
214 if (adr2os) {
215 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
216 deriveLevels(adr2os, num_addrs);
217 }
218 else {
219 numPerLevel[0] = maxLeaves;
220 numPerLevel[1] = num_addrs/maxLeaves;
221 if (num_addrs%maxLeaves) numPerLevel[1]++;
222 }
223
224 base_num_threads = num_addrs;
225 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
226 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
227 depth++;
228
229 kmp_uint32 branch = minBranch;
230 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
231 if (branch<minBranch) branch=minBranch;
232 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
233 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
234 if (numPerLevel[d] & 1) numPerLevel[d]++;
235 numPerLevel[d] = numPerLevel[d] >> 1;
236 if (numPerLevel[d+1] == 1) depth++;
237 numPerLevel[d+1] = numPerLevel[d+1] << 1;
238 }
239 if(numPerLevel[0] == 1) {
240 branch = branch >> 1;
241 if (branch<4) branch = minBranch;
242 }
243 }
244
245 for (kmp_uint32 i=1; i<depth; ++i)
246 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
247 // Fill in hierarchy in the case of oversubscription
248 for (kmp_uint32 i=depth; i<maxLevels; ++i)
249 skipPerLevel[i] = 2*skipPerLevel[i-1];
250
251 uninitialized = initialized; // One writer
252
253 }
254
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000255 // Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton17078362015-09-10 19:22:07 +0000256 void resize(kmp_uint32 nproc)
257 {
258 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000259 while (bool_result == 0) { // someone else is trying to resize
260 KMP_CPU_PAUSE();
261 if (nproc <= base_num_threads) // happy with other thread's resize
262 return;
263 else // try to resize
264 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton17078362015-09-10 19:22:07 +0000265 }
266 KMP_DEBUG_ASSERT(bool_result!=0);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000267 if (nproc <= base_num_threads) return; // happy with other thread's resize
Jonathan Peyton17078362015-09-10 19:22:07 +0000268
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000269 // Calculate new maxLevels
Jonathan Peyton17078362015-09-10 19:22:07 +0000270 kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000271 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000272 // First see if old maxLevels is enough to contain new size
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000273 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
274 skipPerLevel[i] = 2*skipPerLevel[i-1];
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000275 numPerLevel[i-1] *= 2;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000276 old_sz *= 2;
277 depth++;
278 }
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000279 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
280 while (nproc > old_sz) {
281 old_sz *=2;
282 incs++;
283 depth++;
284 }
285 maxLevels += incs;
Jonathan Peyton17078362015-09-10 19:22:07 +0000286
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000287 // Resize arrays
288 kmp_uint32 *old_numPerLevel = numPerLevel;
289 kmp_uint32 *old_skipPerLevel = skipPerLevel;
290 numPerLevel = skipPerLevel = NULL;
291 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
292 skipPerLevel = &(numPerLevel[maxLevels]);
Jonathan Peyton17078362015-09-10 19:22:07 +0000293
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000294 // Copy old elements from old arrays
295 for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
296 numPerLevel[i] = old_numPerLevel[i];
297 skipPerLevel[i] = old_skipPerLevel[i];
298 }
299
300 // Init new elements in arrays to 1
301 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
302 numPerLevel[i] = 1;
303 skipPerLevel[i] = 1;
304 }
305
306 // Free old arrays
307 __kmp_free(old_numPerLevel);
Jonathan Peyton17078362015-09-10 19:22:07 +0000308 }
309
Jonathan Peyton17078362015-09-10 19:22:07 +0000310 // Fill in oversubscription levels of hierarchy
311 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
312 skipPerLevel[i] = 2*skipPerLevel[i-1];
313
314 base_num_threads = nproc;
315 resizing = 0; // One writer
316
317 }
318};
319#endif // KMP_AFFINITY_H