blob: 0385307bccae2ff327c4c1f2195df751d4183252 [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef KMP_AFFINITY_H
16#define KMP_AFFINITY_H
17
18extern int __kmp_affinity_compact; /* Affinity 'compact' value */
19
20class Address {
21public:
22 static const unsigned maxDepth = 32;
23 unsigned labels[maxDepth];
24 unsigned childNums[maxDepth];
25 unsigned depth;
26 unsigned leader;
27 Address(unsigned _depth)
28 : depth(_depth), leader(FALSE) {
29 }
30 Address &operator=(const Address &b) {
31 depth = b.depth;
32 for (unsigned i = 0; i < depth; i++) {
33 labels[i] = b.labels[i];
34 childNums[i] = b.childNums[i];
35 }
36 leader = FALSE;
37 return *this;
38 }
39 bool operator==(const Address &b) const {
40 if (depth != b.depth)
41 return false;
42 for (unsigned i = 0; i < depth; i++)
43 if(labels[i] != b.labels[i])
44 return false;
45 return true;
46 }
47 bool isClose(const Address &b, int level) const {
48 if (depth != b.depth)
49 return false;
50 if ((unsigned)level >= depth)
51 return true;
52 for (unsigned i = 0; i < (depth - level); i++)
53 if(labels[i] != b.labels[i])
54 return false;
55 return true;
56 }
57 bool operator!=(const Address &b) const {
58 return !operator==(b);
59 }
60};
61
62class AddrUnsPair {
63public:
64 Address first;
65 unsigned second;
66 AddrUnsPair(Address _first, unsigned _second)
67 : first(_first), second(_second) {
68 }
69 AddrUnsPair &operator=(const AddrUnsPair &b)
70 {
71 first = b.first;
72 second = b.second;
73 return *this;
74 }
75};
76
77
78static int
79__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
80{
81 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
82 ->first);
83 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
84 ->first);
85 unsigned depth = aa->depth;
86 unsigned i;
87 KMP_DEBUG_ASSERT(depth == bb->depth);
88 for (i = 0; i < depth; i++) {
89 if (aa->labels[i] < bb->labels[i]) return -1;
90 if (aa->labels[i] > bb->labels[i]) return 1;
91 }
92 return 0;
93}
94
95
96static int
97__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
98{
99 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
100 ->first);
101 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
102 ->first);
103 unsigned depth = aa->depth;
104 unsigned i;
105 KMP_DEBUG_ASSERT(depth == bb->depth);
106 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
107 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
108 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
109 int j = depth - i - 1;
110 if (aa->childNums[j] < bb->childNums[j]) return -1;
111 if (aa->childNums[j] > bb->childNums[j]) return 1;
112 }
113 for (; i < depth; i++) {
114 int j = i - __kmp_affinity_compact;
115 if (aa->childNums[j] < bb->childNums[j]) return -1;
116 if (aa->childNums[j] > bb->childNums[j]) return 1;
117 }
118 return 0;
119}
120
121
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000122/** A structure for holding machine-specific hierarchy info to be computed once at init.
123 This structure represents a mapping of threads to the actual machine hierarchy, or to
124 our best guess at what the hierarchy might be, for the purpose of performing an
125 efficient barrier. In the worst case, when there is no machine hierarchy information,
126 it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000127class hierarchy_info {
128public:
129 /** Good default values for number of leaves and branching factor, given no affinity information.
130 Behaves a bit like hyper barrier. */
131 static const kmp_uint32 maxLeaves=4;
132 static const kmp_uint32 minBranch=4;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000133 /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
134 or socket, packages/node, nodes/machine, etc. We don't want to get specific with
135 nomenclature. When the machine is oversubscribed we add levels to duplicate the
136 hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000137 kmp_uint32 maxLevels;
138
139 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
140 number of levels along the longest path from root to any leaf. It corresponds to the
141 number of entries in numPerLevel if we exclude all but one trailing 1. */
142 kmp_uint32 depth;
143 kmp_uint32 base_num_threads;
144 enum init_status { initialized=0, not_initialized=1, initializing=2 };
145 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
146 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
147
148 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
149 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
150 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
151 kmp_uint32 *numPerLevel;
152 kmp_uint32 *skipPerLevel;
153
154 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
155 int hier_depth = adr2os[0].first.depth;
156 int level = 0;
157 for (int i=hier_depth-1; i>=0; --i) {
158 int max = -1;
159 for (int j=0; j<num_addrs; ++j) {
160 int next = adr2os[j].first.childNums[i];
161 if (next > max) max = next;
162 }
163 numPerLevel[level] = max+1;
164 ++level;
165 }
166 }
167
168 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
169
170 void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
171
172 void init(AddrUnsPair *adr2os, int num_addrs)
173 {
174 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
175 if (bool_result == 0) { // Wait for initialization
176 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
177 return;
178 }
179 KMP_DEBUG_ASSERT(bool_result==1);
180
181 /* Added explicit initialization of the data fields here to prevent usage of dirty value
182 observed when static library is re-initialized multiple times (e.g. when
183 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
184 depth = 1;
185 resizing = 0;
186 maxLevels = 7;
187 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
188 skipPerLevel = &(numPerLevel[maxLevels]);
189 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
190 numPerLevel[i] = 1;
191 skipPerLevel[i] = 1;
192 }
193
194 // Sort table by physical ID
195 if (adr2os) {
196 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
197 deriveLevels(adr2os, num_addrs);
198 }
199 else {
200 numPerLevel[0] = maxLeaves;
201 numPerLevel[1] = num_addrs/maxLeaves;
202 if (num_addrs%maxLeaves) numPerLevel[1]++;
203 }
204
205 base_num_threads = num_addrs;
206 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
207 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
208 depth++;
209
210 kmp_uint32 branch = minBranch;
211 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
212 if (branch<minBranch) branch=minBranch;
213 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
214 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
215 if (numPerLevel[d] & 1) numPerLevel[d]++;
216 numPerLevel[d] = numPerLevel[d] >> 1;
217 if (numPerLevel[d+1] == 1) depth++;
218 numPerLevel[d+1] = numPerLevel[d+1] << 1;
219 }
220 if(numPerLevel[0] == 1) {
221 branch = branch >> 1;
222 if (branch<4) branch = minBranch;
223 }
224 }
225
226 for (kmp_uint32 i=1; i<depth; ++i)
227 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
228 // Fill in hierarchy in the case of oversubscription
229 for (kmp_uint32 i=depth; i<maxLevels; ++i)
230 skipPerLevel[i] = 2*skipPerLevel[i-1];
231
232 uninitialized = initialized; // One writer
233
234 }
235
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000236 // Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton17078362015-09-10 19:22:07 +0000237 void resize(kmp_uint32 nproc)
238 {
239 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
240 if (bool_result == 0) { // Someone else is resizing
241 while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
242 return;
243 }
244 KMP_DEBUG_ASSERT(bool_result!=0);
245 KMP_DEBUG_ASSERT(nproc > base_num_threads);
246
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000247 // Calculate new maxLevels
Jonathan Peyton17078362015-09-10 19:22:07 +0000248 kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000249 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
250 // First see if old maxLevels is enough to contain new size
251 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
252 skipPerLevel[i] = 2*skipPerLevel[i-1];
253 old_sz *= 2;
254 depth++;
255 }
256 if (nproc <= old_sz) // enough space already
257 return;
258 // Not enough space, need to expand hierarchy
Jonathan Peyton17078362015-09-10 19:22:07 +0000259 while (nproc > old_sz) {
260 old_sz *=2;
261 incs++;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000262 depth++;
263 }
Jonathan Peyton17078362015-09-10 19:22:07 +0000264 maxLevels += incs;
265
266 // Resize arrays
267 kmp_uint32 *old_numPerLevel = numPerLevel;
268 kmp_uint32 *old_skipPerLevel = skipPerLevel;
269 numPerLevel = skipPerLevel = NULL;
270 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
271 skipPerLevel = &(numPerLevel[maxLevels]);
272
273 // Copy old elements from old arrays
274 for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
275 numPerLevel[i] = old_numPerLevel[i];
276 skipPerLevel[i] = old_skipPerLevel[i];
277 }
278
279 // Init new elements in arrays to 1
280 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
281 numPerLevel[i] = 1;
282 skipPerLevel[i] = 1;
283 }
284
285 // Free old arrays
286 __kmp_free(old_numPerLevel);
287
288 // Fill in oversubscription levels of hierarchy
289 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
290 skipPerLevel[i] = 2*skipPerLevel[i-1];
291
292 base_num_threads = nproc;
293 resizing = 0; // One writer
294
295 }
296};
297#endif // KMP_AFFINITY_H