blob: 24a7d40c1a3b9c8b4c8c80a3cfedc3ce8a96c72a [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef KMP_AFFINITY_H
16#define KMP_AFFINITY_H
17
18extern int __kmp_affinity_compact; /* Affinity 'compact' value */
19
20class Address {
21public:
22 static const unsigned maxDepth = 32;
23 unsigned labels[maxDepth];
24 unsigned childNums[maxDepth];
25 unsigned depth;
26 unsigned leader;
27 Address(unsigned _depth)
28 : depth(_depth), leader(FALSE) {
29 }
30 Address &operator=(const Address &b) {
31 depth = b.depth;
32 for (unsigned i = 0; i < depth; i++) {
33 labels[i] = b.labels[i];
34 childNums[i] = b.childNums[i];
35 }
36 leader = FALSE;
37 return *this;
38 }
39 bool operator==(const Address &b) const {
40 if (depth != b.depth)
41 return false;
42 for (unsigned i = 0; i < depth; i++)
43 if(labels[i] != b.labels[i])
44 return false;
45 return true;
46 }
47 bool isClose(const Address &b, int level) const {
48 if (depth != b.depth)
49 return false;
50 if ((unsigned)level >= depth)
51 return true;
52 for (unsigned i = 0; i < (depth - level); i++)
53 if(labels[i] != b.labels[i])
54 return false;
55 return true;
56 }
57 bool operator!=(const Address &b) const {
58 return !operator==(b);
59 }
60};
61
62class AddrUnsPair {
63public:
64 Address first;
65 unsigned second;
66 AddrUnsPair(Address _first, unsigned _second)
67 : first(_first), second(_second) {
68 }
69 AddrUnsPair &operator=(const AddrUnsPair &b)
70 {
71 first = b.first;
72 second = b.second;
73 return *this;
74 }
75};
76
77
78static int
79__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
80{
81 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
82 ->first);
83 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
84 ->first);
85 unsigned depth = aa->depth;
86 unsigned i;
87 KMP_DEBUG_ASSERT(depth == bb->depth);
88 for (i = 0; i < depth; i++) {
89 if (aa->labels[i] < bb->labels[i]) return -1;
90 if (aa->labels[i] > bb->labels[i]) return 1;
91 }
92 return 0;
93}
94
95
96static int
97__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
98{
99 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
100 ->first);
101 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
102 ->first);
103 unsigned depth = aa->depth;
104 unsigned i;
105 KMP_DEBUG_ASSERT(depth == bb->depth);
106 KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
107 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
108 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
109 int j = depth - i - 1;
110 if (aa->childNums[j] < bb->childNums[j]) return -1;
111 if (aa->childNums[j] > bb->childNums[j]) return 1;
112 }
113 for (; i < depth; i++) {
114 int j = i - __kmp_affinity_compact;
115 if (aa->childNums[j] < bb->childNums[j]) return -1;
116 if (aa->childNums[j] > bb->childNums[j]) return 1;
117 }
118 return 0;
119}
120
121
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000122/** A structure for holding machine-specific hierarchy info to be computed once at init.
123 This structure represents a mapping of threads to the actual machine hierarchy, or to
124 our best guess at what the hierarchy might be, for the purpose of performing an
125 efficient barrier. In the worst case, when there is no machine hierarchy information,
126 it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000127class hierarchy_info {
128public:
129 /** Good default values for number of leaves and branching factor, given no affinity information.
130 Behaves a bit like hyper barrier. */
131 static const kmp_uint32 maxLeaves=4;
132 static const kmp_uint32 minBranch=4;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000133 /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
134 or socket, packages/node, nodes/machine, etc. We don't want to get specific with
135 nomenclature. When the machine is oversubscribed we add levels to duplicate the
136 hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000137 kmp_uint32 maxLevels;
138
139 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
140 number of levels along the longest path from root to any leaf. It corresponds to the
141 number of entries in numPerLevel if we exclude all but one trailing 1. */
142 kmp_uint32 depth;
143 kmp_uint32 base_num_threads;
144 enum init_status { initialized=0, not_initialized=1, initializing=2 };
145 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
146 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
147
148 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
149 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
150 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
151 kmp_uint32 *numPerLevel;
152 kmp_uint32 *skipPerLevel;
153
154 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
155 int hier_depth = adr2os[0].first.depth;
156 int level = 0;
157 for (int i=hier_depth-1; i>=0; --i) {
158 int max = -1;
159 for (int j=0; j<num_addrs; ++j) {
160 int next = adr2os[j].first.childNums[i];
161 if (next > max) max = next;
162 }
163 numPerLevel[level] = max+1;
164 ++level;
165 }
166 }
167
168 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
169
170 void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
171
172 void init(AddrUnsPair *adr2os, int num_addrs)
173 {
174 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
175 if (bool_result == 0) { // Wait for initialization
176 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
177 return;
178 }
179 KMP_DEBUG_ASSERT(bool_result==1);
180
181 /* Added explicit initialization of the data fields here to prevent usage of dirty value
182 observed when static library is re-initialized multiple times (e.g. when
183 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
184 depth = 1;
185 resizing = 0;
186 maxLevels = 7;
187 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
188 skipPerLevel = &(numPerLevel[maxLevels]);
189 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
190 numPerLevel[i] = 1;
191 skipPerLevel[i] = 1;
192 }
193
194 // Sort table by physical ID
195 if (adr2os) {
196 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
197 deriveLevels(adr2os, num_addrs);
198 }
199 else {
200 numPerLevel[0] = maxLeaves;
201 numPerLevel[1] = num_addrs/maxLeaves;
202 if (num_addrs%maxLeaves) numPerLevel[1]++;
203 }
204
205 base_num_threads = num_addrs;
206 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
207 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
208 depth++;
209
210 kmp_uint32 branch = minBranch;
211 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
212 if (branch<minBranch) branch=minBranch;
213 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
214 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
215 if (numPerLevel[d] & 1) numPerLevel[d]++;
216 numPerLevel[d] = numPerLevel[d] >> 1;
217 if (numPerLevel[d+1] == 1) depth++;
218 numPerLevel[d+1] = numPerLevel[d+1] << 1;
219 }
220 if(numPerLevel[0] == 1) {
221 branch = branch >> 1;
222 if (branch<4) branch = minBranch;
223 }
224 }
225
226 for (kmp_uint32 i=1; i<depth; ++i)
227 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
228 // Fill in hierarchy in the case of oversubscription
229 for (kmp_uint32 i=depth; i<maxLevels; ++i)
230 skipPerLevel[i] = 2*skipPerLevel[i-1];
231
232 uninitialized = initialized; // One writer
233
234 }
235
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000236 // Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton17078362015-09-10 19:22:07 +0000237 void resize(kmp_uint32 nproc)
238 {
239 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000240 while (bool_result == 0) { // someone else is trying to resize
241 KMP_CPU_PAUSE();
242 if (nproc <= base_num_threads) // happy with other thread's resize
243 return;
244 else // try to resize
245 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton17078362015-09-10 19:22:07 +0000246 }
247 KMP_DEBUG_ASSERT(bool_result!=0);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000248 if (nproc <= base_num_threads) return; // happy with other thread's resize
Jonathan Peyton17078362015-09-10 19:22:07 +0000249
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000250 // Calculate new maxLevels
Jonathan Peyton17078362015-09-10 19:22:07 +0000251 kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000252 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000253 // First see if old maxLevels is enough to contain new size
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000254 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
255 skipPerLevel[i] = 2*skipPerLevel[i-1];
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000256 numPerLevel[i-1] *= 2;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000257 old_sz *= 2;
258 depth++;
259 }
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000260 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
261 while (nproc > old_sz) {
262 old_sz *=2;
263 incs++;
264 depth++;
265 }
266 maxLevels += incs;
Jonathan Peyton17078362015-09-10 19:22:07 +0000267
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000268 // Resize arrays
269 kmp_uint32 *old_numPerLevel = numPerLevel;
270 kmp_uint32 *old_skipPerLevel = skipPerLevel;
271 numPerLevel = skipPerLevel = NULL;
272 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
273 skipPerLevel = &(numPerLevel[maxLevels]);
Jonathan Peyton17078362015-09-10 19:22:07 +0000274
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000275 // Copy old elements from old arrays
276 for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
277 numPerLevel[i] = old_numPerLevel[i];
278 skipPerLevel[i] = old_skipPerLevel[i];
279 }
280
281 // Init new elements in arrays to 1
282 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
283 numPerLevel[i] = 1;
284 skipPerLevel[i] = 1;
285 }
286
287 // Free old arrays
288 __kmp_free(old_numPerLevel);
Jonathan Peyton17078362015-09-10 19:22:07 +0000289 }
290
Jonathan Peyton17078362015-09-10 19:22:07 +0000291 // Fill in oversubscription levels of hierarchy
292 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
293 skipPerLevel[i] = 2*skipPerLevel[i-1];
294
295 base_num_threads = nproc;
296 resizing = 0; // One writer
297
298 }
299};
300#endif // KMP_AFFINITY_H