blob: 0fa1f3d06c9b89ec9a6f394528cd1b351bf34ae6 [file] [log] [blame]
Jonathan Peyton17078362015-09-10 19:22:07 +00001/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef KMP_AFFINITY_H
16#define KMP_AFFINITY_H
17
Jonathan Peyton17078362015-09-10 19:22:07 +000018class Address {
19public:
20 static const unsigned maxDepth = 32;
21 unsigned labels[maxDepth];
22 unsigned childNums[maxDepth];
23 unsigned depth;
24 unsigned leader;
25 Address(unsigned _depth)
26 : depth(_depth), leader(FALSE) {
27 }
28 Address &operator=(const Address &b) {
29 depth = b.depth;
30 for (unsigned i = 0; i < depth; i++) {
31 labels[i] = b.labels[i];
32 childNums[i] = b.childNums[i];
33 }
34 leader = FALSE;
35 return *this;
36 }
37 bool operator==(const Address &b) const {
38 if (depth != b.depth)
39 return false;
40 for (unsigned i = 0; i < depth; i++)
41 if(labels[i] != b.labels[i])
42 return false;
43 return true;
44 }
45 bool isClose(const Address &b, int level) const {
46 if (depth != b.depth)
47 return false;
48 if ((unsigned)level >= depth)
49 return true;
50 for (unsigned i = 0; i < (depth - level); i++)
51 if(labels[i] != b.labels[i])
52 return false;
53 return true;
54 }
55 bool operator!=(const Address &b) const {
56 return !operator==(b);
57 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +000058 void print() const {
59 unsigned i;
60 printf("Depth: %u --- ", depth);
61 for(i=0;i<depth;i++) {
62 printf("%u ", labels[i]);
63 }
64 }
Jonathan Peyton17078362015-09-10 19:22:07 +000065};
66
67class AddrUnsPair {
68public:
69 Address first;
70 unsigned second;
71 AddrUnsPair(Address _first, unsigned _second)
72 : first(_first), second(_second) {
73 }
74 AddrUnsPair &operator=(const AddrUnsPair &b)
75 {
76 first = b.first;
77 second = b.second;
78 return *this;
79 }
Jonathan Peyton01dcf362015-11-30 20:02:59 +000080 void print() const {
81 printf("first = "); first.print();
82 printf(" --- second = %u", second);
83 }
84 bool operator==(const AddrUnsPair &b) const {
85 if(first != b.first) return false;
86 if(second != b.second) return false;
87 return true;
88 }
89 bool operator!=(const AddrUnsPair &b) const {
90 return !operator==(b);
91 }
Jonathan Peyton17078362015-09-10 19:22:07 +000092};
93
94
95static int
96__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
97{
98 const Address *aa = (const Address *)&(((AddrUnsPair *)a)
99 ->first);
100 const Address *bb = (const Address *)&(((AddrUnsPair *)b)
101 ->first);
102 unsigned depth = aa->depth;
103 unsigned i;
104 KMP_DEBUG_ASSERT(depth == bb->depth);
105 for (i = 0; i < depth; i++) {
106 if (aa->labels[i] < bb->labels[i]) return -1;
107 if (aa->labels[i] > bb->labels[i]) return 1;
108 }
109 return 0;
110}
111
112
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000113/** A structure for holding machine-specific hierarchy info to be computed once at init.
114 This structure represents a mapping of threads to the actual machine hierarchy, or to
115 our best guess at what the hierarchy might be, for the purpose of performing an
116 efficient barrier. In the worst case, when there is no machine hierarchy information,
117 it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000118class hierarchy_info {
119public:
120 /** Good default values for number of leaves and branching factor, given no affinity information.
121 Behaves a bit like hyper barrier. */
122 static const kmp_uint32 maxLeaves=4;
123 static const kmp_uint32 minBranch=4;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000124 /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
125 or socket, packages/node, nodes/machine, etc. We don't want to get specific with
126 nomenclature. When the machine is oversubscribed we add levels to duplicate the
127 hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
Jonathan Peyton17078362015-09-10 19:22:07 +0000128 kmp_uint32 maxLevels;
129
130 /** This is specifically the depth of the machine configuration hierarchy, in terms of the
131 number of levels along the longest path from root to any leaf. It corresponds to the
132 number of entries in numPerLevel if we exclude all but one trailing 1. */
133 kmp_uint32 depth;
134 kmp_uint32 base_num_threads;
135 enum init_status { initialized=0, not_initialized=1, initializing=2 };
136 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
137 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
138
139 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
140 node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
141 and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
142 kmp_uint32 *numPerLevel;
143 kmp_uint32 *skipPerLevel;
144
145 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
146 int hier_depth = adr2os[0].first.depth;
147 int level = 0;
148 for (int i=hier_depth-1; i>=0; --i) {
149 int max = -1;
150 for (int j=0; j<num_addrs; ++j) {
151 int next = adr2os[j].first.childNums[i];
152 if (next > max) max = next;
153 }
154 numPerLevel[level] = max+1;
155 ++level;
156 }
157 }
158
159 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
160
161 void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
162
163 void init(AddrUnsPair *adr2os, int num_addrs)
164 {
165 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
166 if (bool_result == 0) { // Wait for initialization
167 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
168 return;
169 }
170 KMP_DEBUG_ASSERT(bool_result==1);
171
172 /* Added explicit initialization of the data fields here to prevent usage of dirty value
173 observed when static library is re-initialized multiple times (e.g. when
174 non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
175 depth = 1;
176 resizing = 0;
177 maxLevels = 7;
178 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
179 skipPerLevel = &(numPerLevel[maxLevels]);
180 for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
181 numPerLevel[i] = 1;
182 skipPerLevel[i] = 1;
183 }
184
185 // Sort table by physical ID
186 if (adr2os) {
187 qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
188 deriveLevels(adr2os, num_addrs);
189 }
190 else {
191 numPerLevel[0] = maxLeaves;
192 numPerLevel[1] = num_addrs/maxLeaves;
193 if (num_addrs%maxLeaves) numPerLevel[1]++;
194 }
195
196 base_num_threads = num_addrs;
197 for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
198 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
199 depth++;
200
201 kmp_uint32 branch = minBranch;
202 if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
203 if (branch<minBranch) branch=minBranch;
204 for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
205 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
206 if (numPerLevel[d] & 1) numPerLevel[d]++;
207 numPerLevel[d] = numPerLevel[d] >> 1;
208 if (numPerLevel[d+1] == 1) depth++;
209 numPerLevel[d+1] = numPerLevel[d+1] << 1;
210 }
211 if(numPerLevel[0] == 1) {
212 branch = branch >> 1;
213 if (branch<4) branch = minBranch;
214 }
215 }
216
217 for (kmp_uint32 i=1; i<depth; ++i)
218 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
219 // Fill in hierarchy in the case of oversubscription
220 for (kmp_uint32 i=depth; i<maxLevels; ++i)
221 skipPerLevel[i] = 2*skipPerLevel[i-1];
222
223 uninitialized = initialized; // One writer
224
225 }
226
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000227 // Resize the hierarchy if nproc changes to something larger than before
Jonathan Peyton17078362015-09-10 19:22:07 +0000228 void resize(kmp_uint32 nproc)
229 {
230 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000231 while (bool_result == 0) { // someone else is trying to resize
232 KMP_CPU_PAUSE();
233 if (nproc <= base_num_threads) // happy with other thread's resize
234 return;
235 else // try to resize
236 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
Jonathan Peyton17078362015-09-10 19:22:07 +0000237 }
238 KMP_DEBUG_ASSERT(bool_result!=0);
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000239 if (nproc <= base_num_threads) return; // happy with other thread's resize
Jonathan Peyton17078362015-09-10 19:22:07 +0000240
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000241 // Calculate new maxLevels
Jonathan Peyton17078362015-09-10 19:22:07 +0000242 kmp_uint32 old_sz = skipPerLevel[depth-1];
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000243 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000244 // First see if old maxLevels is enough to contain new size
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000245 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
246 skipPerLevel[i] = 2*skipPerLevel[i-1];
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000247 numPerLevel[i-1] *= 2;
Jonathan Peytondf4d3dd2015-09-10 20:34:32 +0000248 old_sz *= 2;
249 depth++;
250 }
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000251 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
252 while (nproc > old_sz) {
253 old_sz *=2;
254 incs++;
255 depth++;
256 }
257 maxLevels += incs;
Jonathan Peyton17078362015-09-10 19:22:07 +0000258
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000259 // Resize arrays
260 kmp_uint32 *old_numPerLevel = numPerLevel;
261 kmp_uint32 *old_skipPerLevel = skipPerLevel;
262 numPerLevel = skipPerLevel = NULL;
263 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
264 skipPerLevel = &(numPerLevel[maxLevels]);
Jonathan Peyton17078362015-09-10 19:22:07 +0000265
Jonathan Peyton7dee82e2015-11-09 16:24:53 +0000266 // Copy old elements from old arrays
267 for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
268 numPerLevel[i] = old_numPerLevel[i];
269 skipPerLevel[i] = old_skipPerLevel[i];
270 }
271
272 // Init new elements in arrays to 1
273 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
274 numPerLevel[i] = 1;
275 skipPerLevel[i] = 1;
276 }
277
278 // Free old arrays
279 __kmp_free(old_numPerLevel);
Jonathan Peyton17078362015-09-10 19:22:07 +0000280 }
281
Jonathan Peyton17078362015-09-10 19:22:07 +0000282 // Fill in oversubscription levels of hierarchy
283 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
284 skipPerLevel[i] = 2*skipPerLevel[i-1];
285
286 base_num_threads = nproc;
287 resizing = 0; // One writer
288
289 }
290};
291#endif // KMP_AFFINITY_H