blob: 5d5d3448f2932fd6ac0e8f3bdf00139ec74787b0 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_atomic.c -- ATOMIC implementation routines
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp_atomic.h"
17#include "kmp.h" // TRUE, asm routines prototypes
18
19typedef unsigned char uchar;
20typedef unsigned short ushort;
21
22/*!
23@defgroup ATOMIC_OPS Atomic Operations
24These functions are used for implementing the many different varieties of atomic operations.
25
26The compiler is at liberty to inline atomic operations that are naturally supported
27by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined
28@code
29static int s = 0;
30#pragma omp atomic
31 s++;
32@endcode
33using the single instruction: `lock; incl s`
34
35However the runtime does provide entrypoints for these operations to support compilers that choose
36not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the
37increment above.)
38
39The names of the functions are encoded by using the data type name and the operation name, as in these tables.
40
41Data Type | Data type encoding
42-----------|---------------
43int8_t | `fixed1`
44uint8_t | `fixed1u`
45int16_t | `fixed2`
46uint16_t | `fixed2u`
47int32_t | `fixed4`
48uint32_t | `fixed4u`
49int32_t | `fixed8`
50uint32_t | `fixed8u`
51float | `float4`
52double | `float8`
53float 10 (8087 eighty bit float) | `float10`
54complex<float> | `cmplx4`
55complex<double> | `cmplx8`
56complex<float10> | `cmplx10`
57<br>
58
59Operation | Operation encoding
60----------|-------------------
61+ | add
62- | sub
63\* | mul
64/ | div
65& | andb
66<< | shl
67\>\> | shr
68\| | orb
69^ | xor
70&& | andl
71\|\| | orl
72maximum | max
73minimum | min
74.eqv. | eqv
75.neqv. | neqv
76
77<br>
78For non-commutative operations, `_rev` can also be added for the reversed operation.
79For the functions that capture the result, the suffix `_cpt` is added.
80
81Update Functions
82================
83The general form of an atomic function that just performs an update (without a `capture`)
84@code
85void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs );
86@endcode
87@param ident_t a pointer to source location
88@param gtid the global thread id
89@param lhs a pointer to the left operand
90@param rhs the right operand
91
92`capture` functions
93===================
94The capture functions perform an atomic update and return a result, which is either the value
95before the capture, or that after. They take an additional argument to determine which result is returned.
96Their general form is therefore
97@code
98TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag );
99@endcode
100@param ident_t a pointer to source location
101@param gtid the global thread id
102@param lhs a pointer to the left operand
103@param rhs the right operand
104@param flag one if the result is to be captured *after* the operation, zero if captured *before*.
105
106The one set of exceptions to this is the `complex<float>` type where the value is not returned,
107rather an extra argument pointer is passed.
108
109They look like
110@code
111void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
112@endcode
113
114Read and Write Operations
115=========================
116The OpenMP<sup>*</sup> standard now supports atomic operations that simply ensure that the
117value is read or written atomically, with no modification
118performed. In many cases on IA-32 architecture these operations can be inlined since
119the architecture guarantees that no tearing occurs on aligned objects
120accessed with a single memory operation of up to 64 bits in size.
121
122The general form of the read operations is
123@code
124TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
125@endcode
126
127For the write operations the form is
128@code
129void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs );
130@endcode
131
132Full list of functions
133======================
134This leads to the generation of 376 atomic functions, as follows.
135
136Functons for integers
137---------------------
138There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters).
139@code
140 __kmpc_atomic_fixed1_add
141 __kmpc_atomic_fixed1_add_cpt
142 __kmpc_atomic_fixed1_add_fp
143 __kmpc_atomic_fixed1_andb
144 __kmpc_atomic_fixed1_andb_cpt
145 __kmpc_atomic_fixed1_andl
146 __kmpc_atomic_fixed1_andl_cpt
147 __kmpc_atomic_fixed1_div
148 __kmpc_atomic_fixed1_div_cpt
149 __kmpc_atomic_fixed1_div_cpt_rev
150 __kmpc_atomic_fixed1_div_float8
151 __kmpc_atomic_fixed1_div_fp
152 __kmpc_atomic_fixed1_div_rev
153 __kmpc_atomic_fixed1_eqv
154 __kmpc_atomic_fixed1_eqv_cpt
155 __kmpc_atomic_fixed1_max
156 __kmpc_atomic_fixed1_max_cpt
157 __kmpc_atomic_fixed1_min
158 __kmpc_atomic_fixed1_min_cpt
159 __kmpc_atomic_fixed1_mul
160 __kmpc_atomic_fixed1_mul_cpt
161 __kmpc_atomic_fixed1_mul_float8
162 __kmpc_atomic_fixed1_mul_fp
163 __kmpc_atomic_fixed1_neqv
164 __kmpc_atomic_fixed1_neqv_cpt
165 __kmpc_atomic_fixed1_orb
166 __kmpc_atomic_fixed1_orb_cpt
167 __kmpc_atomic_fixed1_orl
168 __kmpc_atomic_fixed1_orl_cpt
169 __kmpc_atomic_fixed1_rd
170 __kmpc_atomic_fixed1_shl
171 __kmpc_atomic_fixed1_shl_cpt
172 __kmpc_atomic_fixed1_shl_cpt_rev
173 __kmpc_atomic_fixed1_shl_rev
174 __kmpc_atomic_fixed1_shr
175 __kmpc_atomic_fixed1_shr_cpt
176 __kmpc_atomic_fixed1_shr_cpt_rev
177 __kmpc_atomic_fixed1_shr_rev
178 __kmpc_atomic_fixed1_sub
179 __kmpc_atomic_fixed1_sub_cpt
180 __kmpc_atomic_fixed1_sub_cpt_rev
181 __kmpc_atomic_fixed1_sub_fp
182 __kmpc_atomic_fixed1_sub_rev
183 __kmpc_atomic_fixed1_swp
184 __kmpc_atomic_fixed1_wr
185 __kmpc_atomic_fixed1_xor
186 __kmpc_atomic_fixed1_xor_cpt
187 __kmpc_atomic_fixed1u_div
188 __kmpc_atomic_fixed1u_div_cpt
189 __kmpc_atomic_fixed1u_div_cpt_rev
190 __kmpc_atomic_fixed1u_div_fp
191 __kmpc_atomic_fixed1u_div_rev
192 __kmpc_atomic_fixed1u_shr
193 __kmpc_atomic_fixed1u_shr_cpt
194 __kmpc_atomic_fixed1u_shr_cpt_rev
195 __kmpc_atomic_fixed1u_shr_rev
196 __kmpc_atomic_fixed2_add
197 __kmpc_atomic_fixed2_add_cpt
198 __kmpc_atomic_fixed2_add_fp
199 __kmpc_atomic_fixed2_andb
200 __kmpc_atomic_fixed2_andb_cpt
201 __kmpc_atomic_fixed2_andl
202 __kmpc_atomic_fixed2_andl_cpt
203 __kmpc_atomic_fixed2_div
204 __kmpc_atomic_fixed2_div_cpt
205 __kmpc_atomic_fixed2_div_cpt_rev
206 __kmpc_atomic_fixed2_div_float8
207 __kmpc_atomic_fixed2_div_fp
208 __kmpc_atomic_fixed2_div_rev
209 __kmpc_atomic_fixed2_eqv
210 __kmpc_atomic_fixed2_eqv_cpt
211 __kmpc_atomic_fixed2_max
212 __kmpc_atomic_fixed2_max_cpt
213 __kmpc_atomic_fixed2_min
214 __kmpc_atomic_fixed2_min_cpt
215 __kmpc_atomic_fixed2_mul
216 __kmpc_atomic_fixed2_mul_cpt
217 __kmpc_atomic_fixed2_mul_float8
218 __kmpc_atomic_fixed2_mul_fp
219 __kmpc_atomic_fixed2_neqv
220 __kmpc_atomic_fixed2_neqv_cpt
221 __kmpc_atomic_fixed2_orb
222 __kmpc_atomic_fixed2_orb_cpt
223 __kmpc_atomic_fixed2_orl
224 __kmpc_atomic_fixed2_orl_cpt
225 __kmpc_atomic_fixed2_rd
226 __kmpc_atomic_fixed2_shl
227 __kmpc_atomic_fixed2_shl_cpt
228 __kmpc_atomic_fixed2_shl_cpt_rev
229 __kmpc_atomic_fixed2_shl_rev
230 __kmpc_atomic_fixed2_shr
231 __kmpc_atomic_fixed2_shr_cpt
232 __kmpc_atomic_fixed2_shr_cpt_rev
233 __kmpc_atomic_fixed2_shr_rev
234 __kmpc_atomic_fixed2_sub
235 __kmpc_atomic_fixed2_sub_cpt
236 __kmpc_atomic_fixed2_sub_cpt_rev
237 __kmpc_atomic_fixed2_sub_fp
238 __kmpc_atomic_fixed2_sub_rev
239 __kmpc_atomic_fixed2_swp
240 __kmpc_atomic_fixed2_wr
241 __kmpc_atomic_fixed2_xor
242 __kmpc_atomic_fixed2_xor_cpt
243 __kmpc_atomic_fixed2u_div
244 __kmpc_atomic_fixed2u_div_cpt
245 __kmpc_atomic_fixed2u_div_cpt_rev
246 __kmpc_atomic_fixed2u_div_fp
247 __kmpc_atomic_fixed2u_div_rev
248 __kmpc_atomic_fixed2u_shr
249 __kmpc_atomic_fixed2u_shr_cpt
250 __kmpc_atomic_fixed2u_shr_cpt_rev
251 __kmpc_atomic_fixed2u_shr_rev
252 __kmpc_atomic_fixed4_add
253 __kmpc_atomic_fixed4_add_cpt
254 __kmpc_atomic_fixed4_add_fp
255 __kmpc_atomic_fixed4_andb
256 __kmpc_atomic_fixed4_andb_cpt
257 __kmpc_atomic_fixed4_andl
258 __kmpc_atomic_fixed4_andl_cpt
259 __kmpc_atomic_fixed4_div
260 __kmpc_atomic_fixed4_div_cpt
261 __kmpc_atomic_fixed4_div_cpt_rev
262 __kmpc_atomic_fixed4_div_float8
263 __kmpc_atomic_fixed4_div_fp
264 __kmpc_atomic_fixed4_div_rev
265 __kmpc_atomic_fixed4_eqv
266 __kmpc_atomic_fixed4_eqv_cpt
267 __kmpc_atomic_fixed4_max
268 __kmpc_atomic_fixed4_max_cpt
269 __kmpc_atomic_fixed4_min
270 __kmpc_atomic_fixed4_min_cpt
271 __kmpc_atomic_fixed4_mul
272 __kmpc_atomic_fixed4_mul_cpt
273 __kmpc_atomic_fixed4_mul_float8
274 __kmpc_atomic_fixed4_mul_fp
275 __kmpc_atomic_fixed4_neqv
276 __kmpc_atomic_fixed4_neqv_cpt
277 __kmpc_atomic_fixed4_orb
278 __kmpc_atomic_fixed4_orb_cpt
279 __kmpc_atomic_fixed4_orl
280 __kmpc_atomic_fixed4_orl_cpt
281 __kmpc_atomic_fixed4_rd
282 __kmpc_atomic_fixed4_shl
283 __kmpc_atomic_fixed4_shl_cpt
284 __kmpc_atomic_fixed4_shl_cpt_rev
285 __kmpc_atomic_fixed4_shl_rev
286 __kmpc_atomic_fixed4_shr
287 __kmpc_atomic_fixed4_shr_cpt
288 __kmpc_atomic_fixed4_shr_cpt_rev
289 __kmpc_atomic_fixed4_shr_rev
290 __kmpc_atomic_fixed4_sub
291 __kmpc_atomic_fixed4_sub_cpt
292 __kmpc_atomic_fixed4_sub_cpt_rev
293 __kmpc_atomic_fixed4_sub_fp
294 __kmpc_atomic_fixed4_sub_rev
295 __kmpc_atomic_fixed4_swp
296 __kmpc_atomic_fixed4_wr
297 __kmpc_atomic_fixed4_xor
298 __kmpc_atomic_fixed4_xor_cpt
299 __kmpc_atomic_fixed4u_div
300 __kmpc_atomic_fixed4u_div_cpt
301 __kmpc_atomic_fixed4u_div_cpt_rev
302 __kmpc_atomic_fixed4u_div_fp
303 __kmpc_atomic_fixed4u_div_rev
304 __kmpc_atomic_fixed4u_shr
305 __kmpc_atomic_fixed4u_shr_cpt
306 __kmpc_atomic_fixed4u_shr_cpt_rev
307 __kmpc_atomic_fixed4u_shr_rev
308 __kmpc_atomic_fixed8_add
309 __kmpc_atomic_fixed8_add_cpt
310 __kmpc_atomic_fixed8_add_fp
311 __kmpc_atomic_fixed8_andb
312 __kmpc_atomic_fixed8_andb_cpt
313 __kmpc_atomic_fixed8_andl
314 __kmpc_atomic_fixed8_andl_cpt
315 __kmpc_atomic_fixed8_div
316 __kmpc_atomic_fixed8_div_cpt
317 __kmpc_atomic_fixed8_div_cpt_rev
318 __kmpc_atomic_fixed8_div_float8
319 __kmpc_atomic_fixed8_div_fp
320 __kmpc_atomic_fixed8_div_rev
321 __kmpc_atomic_fixed8_eqv
322 __kmpc_atomic_fixed8_eqv_cpt
323 __kmpc_atomic_fixed8_max
324 __kmpc_atomic_fixed8_max_cpt
325 __kmpc_atomic_fixed8_min
326 __kmpc_atomic_fixed8_min_cpt
327 __kmpc_atomic_fixed8_mul
328 __kmpc_atomic_fixed8_mul_cpt
329 __kmpc_atomic_fixed8_mul_float8
330 __kmpc_atomic_fixed8_mul_fp
331 __kmpc_atomic_fixed8_neqv
332 __kmpc_atomic_fixed8_neqv_cpt
333 __kmpc_atomic_fixed8_orb
334 __kmpc_atomic_fixed8_orb_cpt
335 __kmpc_atomic_fixed8_orl
336 __kmpc_atomic_fixed8_orl_cpt
337 __kmpc_atomic_fixed8_rd
338 __kmpc_atomic_fixed8_shl
339 __kmpc_atomic_fixed8_shl_cpt
340 __kmpc_atomic_fixed8_shl_cpt_rev
341 __kmpc_atomic_fixed8_shl_rev
342 __kmpc_atomic_fixed8_shr
343 __kmpc_atomic_fixed8_shr_cpt
344 __kmpc_atomic_fixed8_shr_cpt_rev
345 __kmpc_atomic_fixed8_shr_rev
346 __kmpc_atomic_fixed8_sub
347 __kmpc_atomic_fixed8_sub_cpt
348 __kmpc_atomic_fixed8_sub_cpt_rev
349 __kmpc_atomic_fixed8_sub_fp
350 __kmpc_atomic_fixed8_sub_rev
351 __kmpc_atomic_fixed8_swp
352 __kmpc_atomic_fixed8_wr
353 __kmpc_atomic_fixed8_xor
354 __kmpc_atomic_fixed8_xor_cpt
355 __kmpc_atomic_fixed8u_div
356 __kmpc_atomic_fixed8u_div_cpt
357 __kmpc_atomic_fixed8u_div_cpt_rev
358 __kmpc_atomic_fixed8u_div_fp
359 __kmpc_atomic_fixed8u_div_rev
360 __kmpc_atomic_fixed8u_shr
361 __kmpc_atomic_fixed8u_shr_cpt
362 __kmpc_atomic_fixed8u_shr_cpt_rev
363 __kmpc_atomic_fixed8u_shr_rev
364@endcode
365
366Functions for floating point
367----------------------------
368There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes.
369(Ten byte floats are used by X87, but are now rare).
370@code
371 __kmpc_atomic_float4_add
372 __kmpc_atomic_float4_add_cpt
373 __kmpc_atomic_float4_add_float8
374 __kmpc_atomic_float4_add_fp
375 __kmpc_atomic_float4_div
376 __kmpc_atomic_float4_div_cpt
377 __kmpc_atomic_float4_div_cpt_rev
378 __kmpc_atomic_float4_div_float8
379 __kmpc_atomic_float4_div_fp
380 __kmpc_atomic_float4_div_rev
381 __kmpc_atomic_float4_max
382 __kmpc_atomic_float4_max_cpt
383 __kmpc_atomic_float4_min
384 __kmpc_atomic_float4_min_cpt
385 __kmpc_atomic_float4_mul
386 __kmpc_atomic_float4_mul_cpt
387 __kmpc_atomic_float4_mul_float8
388 __kmpc_atomic_float4_mul_fp
389 __kmpc_atomic_float4_rd
390 __kmpc_atomic_float4_sub
391 __kmpc_atomic_float4_sub_cpt
392 __kmpc_atomic_float4_sub_cpt_rev
393 __kmpc_atomic_float4_sub_float8
394 __kmpc_atomic_float4_sub_fp
395 __kmpc_atomic_float4_sub_rev
396 __kmpc_atomic_float4_swp
397 __kmpc_atomic_float4_wr
398 __kmpc_atomic_float8_add
399 __kmpc_atomic_float8_add_cpt
400 __kmpc_atomic_float8_add_fp
401 __kmpc_atomic_float8_div
402 __kmpc_atomic_float8_div_cpt
403 __kmpc_atomic_float8_div_cpt_rev
404 __kmpc_atomic_float8_div_fp
405 __kmpc_atomic_float8_div_rev
406 __kmpc_atomic_float8_max
407 __kmpc_atomic_float8_max_cpt
408 __kmpc_atomic_float8_min
409 __kmpc_atomic_float8_min_cpt
410 __kmpc_atomic_float8_mul
411 __kmpc_atomic_float8_mul_cpt
412 __kmpc_atomic_float8_mul_fp
413 __kmpc_atomic_float8_rd
414 __kmpc_atomic_float8_sub
415 __kmpc_atomic_float8_sub_cpt
416 __kmpc_atomic_float8_sub_cpt_rev
417 __kmpc_atomic_float8_sub_fp
418 __kmpc_atomic_float8_sub_rev
419 __kmpc_atomic_float8_swp
420 __kmpc_atomic_float8_wr
421 __kmpc_atomic_float10_add
422 __kmpc_atomic_float10_add_cpt
423 __kmpc_atomic_float10_add_fp
424 __kmpc_atomic_float10_div
425 __kmpc_atomic_float10_div_cpt
426 __kmpc_atomic_float10_div_cpt_rev
427 __kmpc_atomic_float10_div_fp
428 __kmpc_atomic_float10_div_rev
429 __kmpc_atomic_float10_mul
430 __kmpc_atomic_float10_mul_cpt
431 __kmpc_atomic_float10_mul_fp
432 __kmpc_atomic_float10_rd
433 __kmpc_atomic_float10_sub
434 __kmpc_atomic_float10_sub_cpt
435 __kmpc_atomic_float10_sub_cpt_rev
436 __kmpc_atomic_float10_sub_fp
437 __kmpc_atomic_float10_sub_rev
438 __kmpc_atomic_float10_swp
439 __kmpc_atomic_float10_wr
440 __kmpc_atomic_float16_add
441 __kmpc_atomic_float16_add_cpt
442 __kmpc_atomic_float16_div
443 __kmpc_atomic_float16_div_cpt
444 __kmpc_atomic_float16_div_cpt_rev
445 __kmpc_atomic_float16_div_rev
446 __kmpc_atomic_float16_max
447 __kmpc_atomic_float16_max_cpt
448 __kmpc_atomic_float16_min
449 __kmpc_atomic_float16_min_cpt
450 __kmpc_atomic_float16_mul
451 __kmpc_atomic_float16_mul_cpt
452 __kmpc_atomic_float16_rd
453 __kmpc_atomic_float16_sub
454 __kmpc_atomic_float16_sub_cpt
455 __kmpc_atomic_float16_sub_cpt_rev
456 __kmpc_atomic_float16_sub_rev
457 __kmpc_atomic_float16_swp
458 __kmpc_atomic_float16_wr
459@endcode
460
461Functions for Complex types
462---------------------------
463Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes.
464The names here are based on the size of the component float, *not* the size of the complex type. So
465`__kmpc_atomc_cmplx8_add` is an operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
466
467@code
468 __kmpc_atomic_cmplx4_add
469 __kmpc_atomic_cmplx4_add_cmplx8
470 __kmpc_atomic_cmplx4_add_cpt
471 __kmpc_atomic_cmplx4_div
472 __kmpc_atomic_cmplx4_div_cmplx8
473 __kmpc_atomic_cmplx4_div_cpt
474 __kmpc_atomic_cmplx4_div_cpt_rev
475 __kmpc_atomic_cmplx4_div_rev
476 __kmpc_atomic_cmplx4_mul
477 __kmpc_atomic_cmplx4_mul_cmplx8
478 __kmpc_atomic_cmplx4_mul_cpt
479 __kmpc_atomic_cmplx4_rd
480 __kmpc_atomic_cmplx4_sub
481 __kmpc_atomic_cmplx4_sub_cmplx8
482 __kmpc_atomic_cmplx4_sub_cpt
483 __kmpc_atomic_cmplx4_sub_cpt_rev
484 __kmpc_atomic_cmplx4_sub_rev
485 __kmpc_atomic_cmplx4_swp
486 __kmpc_atomic_cmplx4_wr
487 __kmpc_atomic_cmplx8_add
488 __kmpc_atomic_cmplx8_add_cpt
489 __kmpc_atomic_cmplx8_div
490 __kmpc_atomic_cmplx8_div_cpt
491 __kmpc_atomic_cmplx8_div_cpt_rev
492 __kmpc_atomic_cmplx8_div_rev
493 __kmpc_atomic_cmplx8_mul
494 __kmpc_atomic_cmplx8_mul_cpt
495 __kmpc_atomic_cmplx8_rd
496 __kmpc_atomic_cmplx8_sub
497 __kmpc_atomic_cmplx8_sub_cpt
498 __kmpc_atomic_cmplx8_sub_cpt_rev
499 __kmpc_atomic_cmplx8_sub_rev
500 __kmpc_atomic_cmplx8_swp
501 __kmpc_atomic_cmplx8_wr
502 __kmpc_atomic_cmplx10_add
503 __kmpc_atomic_cmplx10_add_cpt
504 __kmpc_atomic_cmplx10_div
505 __kmpc_atomic_cmplx10_div_cpt
506 __kmpc_atomic_cmplx10_div_cpt_rev
507 __kmpc_atomic_cmplx10_div_rev
508 __kmpc_atomic_cmplx10_mul
509 __kmpc_atomic_cmplx10_mul_cpt
510 __kmpc_atomic_cmplx10_rd
511 __kmpc_atomic_cmplx10_sub
512 __kmpc_atomic_cmplx10_sub_cpt
513 __kmpc_atomic_cmplx10_sub_cpt_rev
514 __kmpc_atomic_cmplx10_sub_rev
515 __kmpc_atomic_cmplx10_swp
516 __kmpc_atomic_cmplx10_wr
517 __kmpc_atomic_cmplx16_add
518 __kmpc_atomic_cmplx16_add_cpt
519 __kmpc_atomic_cmplx16_div
520 __kmpc_atomic_cmplx16_div_cpt
521 __kmpc_atomic_cmplx16_div_cpt_rev
522 __kmpc_atomic_cmplx16_div_rev
523 __kmpc_atomic_cmplx16_mul
524 __kmpc_atomic_cmplx16_mul_cpt
525 __kmpc_atomic_cmplx16_rd
526 __kmpc_atomic_cmplx16_sub
527 __kmpc_atomic_cmplx16_sub_cpt
528 __kmpc_atomic_cmplx16_sub_cpt_rev
529 __kmpc_atomic_cmplx16_swp
530 __kmpc_atomic_cmplx16_wr
531@endcode
532*/
533
534/*!
535@ingroup ATOMIC_OPS
536@{
537*/
538
539/*
540 * Global vars
541 */
542
543#ifndef KMP_GOMP_COMPAT
544int __kmp_atomic_mode = 1; // Intel perf
545#else
546int __kmp_atomic_mode = 2; // GOMP compatibility
547#endif /* KMP_GOMP_COMPAT */
548
549KMP_ALIGN(128)
550
551kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
552kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
553kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
554kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
555kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
556kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
557kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
558kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
559kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
560kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
561kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
562kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
563kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
564
565
566/*
567 2007-03-02:
568 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
569 bug on *_32 and *_32e. This is just a temporary workaround for the problem.
570 It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
571 routines in assembler language.
572*/
573#define KMP_ATOMIC_VOLATILE volatile
574
Jim Cownie181b4bb2013-12-23 17:28:57 +0000575#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +0000576
577 static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
578 static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
579 static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
580 static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
581 static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
582 static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
583
584 static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
585 static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
586 static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
587 static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
588 static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
589 static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
590
591 static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
592 static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
593 static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
594 static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
595
596 static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
597 static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
598 static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
599 static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
600
601#endif
602
603/* ------------------------------------------------------------------------ */
604/* ATOMIC implementation routines */
605/* one routine for each operation and operand type */
606/* ------------------------------------------------------------------------ */
607
608// All routines declarations looks like
Jim Cownie181b4bb2013-12-23 17:28:57 +0000609// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610// ------------------------------------------------------------------------
611
612#define KMP_CHECK_GTID \
613 if ( gtid == KMP_GTID_UNKNOWN ) { \
614 gtid = __kmp_entry_gtid(); \
615 } // check and get gtid when needed
616
617// Beginning of a definition (provides name, parameters, gebug trace)
618// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
619// OP_ID - operation identifier (add, sub, mul, ...)
620// TYPE - operands' type
621#define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
622RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
623{ \
624 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
625 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
626
627// ------------------------------------------------------------------------
628// Lock variables used for critical sections for various size operands
629#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
630#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
631#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
632#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
633#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
634#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
635#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
636#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
637#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
638#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
639#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
640#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
641#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
642
643// ------------------------------------------------------------------------
644// Operation on *lhs, rhs bound by critical section
645// OP - operator (it's supposed to contain an assignment)
646// LCK_ID - lock identifier
647// Note: don't check gtid as it should always be valid
648// 1, 2-byte - expect valid parameter, other - check before this macro
649#define OP_CRITICAL(OP,LCK_ID) \
650 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
651 \
652 (*lhs) OP (rhs); \
653 \
654 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
655
656// ------------------------------------------------------------------------
657// For GNU compatibility, we may need to use a critical section,
658// even though it is not required by the ISA.
659//
660// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
661// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
662// critical section. On Intel(R) 64, all atomic operations are done with fetch
663// and add or compare and exchange. Therefore, the FLAG parameter to this
664// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
665// require a critical section, where we predict that they will be implemented
666// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
667//
668// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
669// the FLAG parameter should always be 1. If we know that we will be using
670// a critical section, then we want to make certain that we use the generic
671// lock __kmp_atomic_lock to protect the atomic update, and not of of the
672// locks that are specialized based upon the size or type of the data.
673//
674// If FLAG is 0, then we are relying on dead code elimination by the build
675// compiler to get rid of the useless block of code, and save a needless
676// branch at runtime.
677//
678
679#ifdef KMP_GOMP_COMPAT
680# define OP_GOMP_CRITICAL(OP,FLAG) \
681 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
682 KMP_CHECK_GTID; \
683 OP_CRITICAL( OP, 0 ); \
684 return; \
685 }
686# else
687# define OP_GOMP_CRITICAL(OP,FLAG)
688#endif /* KMP_GOMP_COMPAT */
689
690#if KMP_MIC
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000691# define KMP_DO_PAUSE _mm_delay_32( 1 )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000692#else
693# define KMP_DO_PAUSE KMP_CPU_PAUSE()
694#endif /* KMP_MIC */
695
696// ------------------------------------------------------------------------
697// Operation on *lhs, rhs using "compare_and_store" routine
698// TYPE - operands' type
699// BITS - size in bits, used to distinguish low level calls
700// OP - operator
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701#define OP_CMPXCHG(TYPE,BITS,OP) \
702 { \
Jim Cownie5e8470a2013-09-27 10:38:44 +0000703 TYPE old_value, new_value; \
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000704 old_value = *(TYPE volatile *)lhs; \
Jim Cownie5e8470a2013-09-27 10:38:44 +0000705 new_value = old_value OP rhs; \
706 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
707 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
708 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
709 { \
710 KMP_DO_PAUSE; \
711 \
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000712 old_value = *(TYPE volatile *)lhs; \
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713 new_value = old_value OP rhs; \
714 } \
715 }
716
Jim Cownie181b4bb2013-12-23 17:28:57 +0000717#if USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000718// 2007-06-25:
719// workaround for C78287 (complex(kind=4) data type)
720// lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
721// Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
722// This is a problem of the compiler.
723// Related tracker is C76005, targeted to 11.0.
724// I verified the asm of the workaround.
725#define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
726 { \
Jim Cownie5e8470a2013-09-27 10:38:44 +0000727 struct _sss { \
728 TYPE cmp; \
729 kmp_int##BITS *vvv; \
730 }; \
731 struct _sss old_value, new_value; \
732 old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
733 new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
734 *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
735 new_value.cmp = old_value.cmp OP rhs; \
736 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
737 *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
738 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
739 { \
740 KMP_DO_PAUSE; \
741 \
742 *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
743 new_value.cmp = old_value.cmp OP rhs; \
744 } \
745 }
746// end of the first part of the workaround for C78287
Jim Cownie181b4bb2013-12-23 17:28:57 +0000747#endif // USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000748
749#if KMP_ARCH_X86 || KMP_ARCH_X86_64
750
751// ------------------------------------------------------------------------
752// X86 or X86_64: no alignment problems ====================================
753#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
754ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
755 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
756 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
757 KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
758}
759// -------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000760#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
761ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
762 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
763 OP_CMPXCHG(TYPE,BITS,OP) \
764}
Jim Cownie181b4bb2013-12-23 17:28:57 +0000765#if USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000766// -------------------------------------------------------------------------
767// workaround for C78287 (complex(kind=4) data type)
768#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
769ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
770 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
771 OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
772}
773// end of the second part of the workaround for C78287
Jim Cownie181b4bb2013-12-23 17:28:57 +0000774#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775
776#else
777// -------------------------------------------------------------------------
778// Code for other architectures that don't handle unaligned accesses.
779#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
780ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
781 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
782 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
783 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
784 KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
785 } else { \
786 KMP_CHECK_GTID; \
787 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
788 } \
789}
790// -------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000791#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
792ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
793 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
794 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
795 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
796 } else { \
797 KMP_CHECK_GTID; \
798 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
799 } \
800}
Jim Cownie181b4bb2013-12-23 17:28:57 +0000801#if USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802// -------------------------------------------------------------------------
803// workaround for C78287 (complex(kind=4) data type)
804#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
805ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
806 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
807 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
808 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
809 } else { \
810 KMP_CHECK_GTID; \
811 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
812 } \
813}
814// end of the second part of the workaround for C78287
Jim Cownie181b4bb2013-12-23 17:28:57 +0000815#endif // USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
817
818// Routines for ATOMIC 4-byte operands addition and subtraction
819ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
820ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
821
Jim Cownie5e8470a2013-09-27 10:38:44 +0000822ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
823ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824
825// Routines for ATOMIC 8-byte operands addition and subtraction
826ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
827ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
828
Jim Cownie5e8470a2013-09-27 10:38:44 +0000829ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
830ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
Jim Cownie5e8470a2013-09-27 10:38:44 +0000831
832// ------------------------------------------------------------------------
833// Entries definition for integer operands
834// TYPE_ID - operands type and size (fixed4, float4)
835// OP_ID - operation identifier (add, sub, mul, ...)
836// TYPE - operand type
837// BITS - size in bits, used to distinguish low level calls
838// OP - operator (used in critical section)
839// LCK_ID - lock identifier, used to possibly distinguish lock variable
840// MASK - used for alignment check
841
842// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
843// ------------------------------------------------------------------------
844// Routines for ATOMIC integer operands, other operators
845// ------------------------------------------------------------------------
846// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
847ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
848ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
849ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
850ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
851ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
852ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
853ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
854ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
855ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
856ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
857ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
858ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
859ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
860ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
861ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
862ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
863ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
864ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
865ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
866ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
867ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
868ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
869ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
870ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
871ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
872ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
873ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
874ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
875ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
876ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
877ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
878ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
879ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
880ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
881ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
882ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
883ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
884ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
885ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
886ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
887ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
888ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
889ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
890ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
891// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
892
893
894/* ------------------------------------------------------------------------ */
895/* Routines for C/C++ Reduction operators && and || */
896/* ------------------------------------------------------------------------ */
897
898// ------------------------------------------------------------------------
899// Need separate macros for &&, || because there is no combined assignment
900// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
901#define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
902ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
903 OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
904 OP_CRITICAL( = *lhs OP, LCK_ID ) \
905}
906
907#if KMP_ARCH_X86 || KMP_ARCH_X86_64
908
909// ------------------------------------------------------------------------
910// X86 or X86_64: no alignment problems ===================================
911#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
912ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
913 OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
914 OP_CMPXCHG(TYPE,BITS,OP) \
915}
916
917#else
918// ------------------------------------------------------------------------
919// Code for other architectures that don't handle unaligned accesses.
920#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
921ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
922 OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
923 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
924 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
925 } else { \
926 KMP_CHECK_GTID; \
927 OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
928 } \
929}
930#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
931
932ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
933ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
934ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
935ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
936ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
937ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
938ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
939ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
940
941
942/* ------------------------------------------------------------------------- */
943/* Routines for Fortran operators that matched no one in C: */
944/* MAX, MIN, .EQV., .NEQV. */
945/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
946/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
947/* ------------------------------------------------------------------------- */
948
949// -------------------------------------------------------------------------
950// MIN and MAX need separate macros
951// OP - operator to check if we need any actions?
952#define MIN_MAX_CRITSECT(OP,LCK_ID) \
953 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
954 \
955 if ( *lhs OP rhs ) { /* still need actions? */ \
956 *lhs = rhs; \
957 } \
958 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
959
960// -------------------------------------------------------------------------
961#ifdef KMP_GOMP_COMPAT
962#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
963 if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
964 KMP_CHECK_GTID; \
965 MIN_MAX_CRITSECT( OP, 0 ); \
966 return; \
967 }
968#else
969#define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
970#endif /* KMP_GOMP_COMPAT */
971
972// -------------------------------------------------------------------------
973#define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
974 { \
975 TYPE KMP_ATOMIC_VOLATILE temp_val; \
976 TYPE old_value; \
977 temp_val = *lhs; \
978 old_value = temp_val; \
979 while ( old_value OP rhs && /* still need actions? */ \
980 ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
981 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
982 *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
983 { \
984 KMP_CPU_PAUSE(); \
985 temp_val = *lhs; \
986 old_value = temp_val; \
987 } \
988 }
989
990// -------------------------------------------------------------------------
991// 1-byte, 2-byte operands - use critical section
992#define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
993ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
994 if ( *lhs OP rhs ) { /* need actions? */ \
995 GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
996 MIN_MAX_CRITSECT(OP,LCK_ID) \
997 } \
998}
999
1000#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1001
1002// -------------------------------------------------------------------------
1003// X86 or X86_64: no alignment problems ====================================
1004#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1005ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1006 if ( *lhs OP rhs ) { \
1007 GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1008 MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1009 } \
1010}
1011
1012#else
1013// -------------------------------------------------------------------------
1014// Code for other architectures that don't handle unaligned accesses.
1015#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1016ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1017 if ( *lhs OP rhs ) { \
1018 GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1019 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1020 MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1021 } else { \
1022 KMP_CHECK_GTID; \
1023 MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1024 } \
1025 } \
1026}
1027#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1028
1029MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1030MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1031MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1032MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1033MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1034MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1035MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1036MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1037MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1038MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1039MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1040MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
Jim Cownie181b4bb2013-12-23 17:28:57 +00001041#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001042MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1043MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1044#if ( KMP_ARCH_X86 )
1045 MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1046 MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1047#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001048#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001049// ------------------------------------------------------------------------
1050// Need separate macros for .EQV. because of the need of complement (~)
1051// OP ignored for critical sections, ^=~ used instead
1052#define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1053ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1054 OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1055 OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1056}
1057
1058// ------------------------------------------------------------------------
1059#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1060// ------------------------------------------------------------------------
1061// X86 or X86_64: no alignment problems ===================================
1062#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1063ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1064 OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1065 OP_CMPXCHG(TYPE,BITS,OP) \
1066}
1067// ------------------------------------------------------------------------
1068#else
1069// ------------------------------------------------------------------------
1070// Code for other architectures that don't handle unaligned accesses.
1071#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1072ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1073 OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1074 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1075 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1076 } else { \
1077 KMP_CHECK_GTID; \
1078 OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1079 } \
1080}
1081#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1082
1083ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1084ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1085ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1086ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1087ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1088ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1089ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1090ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1091
1092
1093// ------------------------------------------------------------------------
1094// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1095// TYPE_ID, OP_ID, TYPE - detailed above
1096// OP - operator
1097// LCK_ID - lock identifier, used to possibly distinguish lock variable
1098#define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1099ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1100 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1101 OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1102}
1103
1104/* ------------------------------------------------------------------------- */
1105// routines for long double type
1106ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1107ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1108ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1109ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
Jim Cownie181b4bb2013-12-23 17:28:57 +00001110#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001111// routines for _Quad type
1112ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1113ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1114ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1115ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1116#if ( KMP_ARCH_X86 )
1117 ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1118 ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1119 ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1120 ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1121#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001122#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001123// routines for complex types
1124
Jim Cownie181b4bb2013-12-23 17:28:57 +00001125#if USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001126// workaround for C78287 (complex(kind=4) data type)
1127ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1128ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1129ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1130ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1131// end of the workaround for C78287
Jim Cownie181b4bb2013-12-23 17:28:57 +00001132#else
1133ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
1134ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
1135ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
1136ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
1137#endif // USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001138
1139ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1140ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1141ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1142ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1143ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1144ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1145ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1146ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
Jim Cownie181b4bb2013-12-23 17:28:57 +00001147#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1149ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1150ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1151ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1152#if ( KMP_ARCH_X86 )
1153 ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1154 ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1155 ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1156 ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1157#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001158#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001159
1160#if OMP_40_ENABLED
1161
1162// OpenMP 4.0: x = expr binop x for non-commutative operations.
1163// Supported only on IA-32 architecture and Intel(R) 64
1164#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1165
1166// ------------------------------------------------------------------------
1167// Operation on *lhs, rhs bound by critical section
1168// OP - operator (it's supposed to contain an assignment)
1169// LCK_ID - lock identifier
1170// Note: don't check gtid as it should always be valid
1171// 1, 2-byte - expect valid parameter, other - check before this macro
1172#define OP_CRITICAL_REV(OP,LCK_ID) \
1173 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1174 \
1175 (*lhs) = (rhs) OP (*lhs); \
1176 \
1177 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1178
1179#ifdef KMP_GOMP_COMPAT
1180#define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1181 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1182 KMP_CHECK_GTID; \
1183 OP_CRITICAL_REV( OP, 0 ); \
1184 return; \
1185 }
1186#else
1187#define OP_GOMP_CRITICAL_REV(OP,FLAG)
1188#endif /* KMP_GOMP_COMPAT */
1189
1190
1191// Beginning of a definition (provides name, parameters, gebug trace)
1192// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1193// OP_ID - operation identifier (add, sub, mul, ...)
1194// TYPE - operands' type
1195#define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1196RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1197{ \
1198 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1199 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1200
1201// ------------------------------------------------------------------------
1202// Operation on *lhs, rhs using "compare_and_store" routine
1203// TYPE - operands' type
1204// BITS - size in bits, used to distinguish low level calls
1205// OP - operator
1206// Note: temp_val introduced in order to force the compiler to read
1207// *lhs only once (w/o it the compiler reads *lhs twice)
1208#define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1209 { \
1210 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1211 TYPE old_value, new_value; \
1212 temp_val = *lhs; \
1213 old_value = temp_val; \
1214 new_value = rhs OP old_value; \
1215 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1216 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1217 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1218 { \
1219 KMP_DO_PAUSE; \
1220 \
1221 temp_val = *lhs; \
1222 old_value = temp_val; \
1223 new_value = rhs OP old_value; \
1224 } \
1225 }
1226
1227// -------------------------------------------------------------------------
1228#define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1229ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1230 OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1231 OP_CMPXCHG_REV(TYPE,BITS,OP) \
1232}
1233
1234// ------------------------------------------------------------------------
1235// Entries definition for integer operands
1236// TYPE_ID - operands type and size (fixed4, float4)
1237// OP_ID - operation identifier (add, sub, mul, ...)
1238// TYPE - operand type
1239// BITS - size in bits, used to distinguish low level calls
1240// OP - operator (used in critical section)
1241// LCK_ID - lock identifier, used to possibly distinguish lock variable
1242
1243// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1244// ------------------------------------------------------------------------
1245// Routines for ATOMIC integer operands, other operators
1246// ------------------------------------------------------------------------
1247// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1248ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1249ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1250ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1251ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1252ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1253ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1254
1255ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1256ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1257ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1258ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1259ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1260ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1261
1262ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1263ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1264ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1265ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1266ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1267ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1268
1269ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1270ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1271ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1272ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1273ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1274ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1275
1276ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1277ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1278
1279ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1280ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1281// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1282
1283// ------------------------------------------------------------------------
1284// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1285// TYPE_ID, OP_ID, TYPE - detailed above
1286// OP - operator
1287// LCK_ID - lock identifier, used to possibly distinguish lock variable
1288#define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1289ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1290 OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1291 OP_CRITICAL_REV(OP,LCK_ID) \
1292}
1293
1294/* ------------------------------------------------------------------------- */
1295// routines for long double type
1296ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1297ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
Jim Cownie181b4bb2013-12-23 17:28:57 +00001298#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001299// routines for _Quad type
1300ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1301ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1302#if ( KMP_ARCH_X86 )
1303 ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1304 ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1305#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001306#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001307
1308// routines for complex types
1309ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1310ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1311ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1312ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1313ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1314ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
Jim Cownie181b4bb2013-12-23 17:28:57 +00001315#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001316ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1317ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1318#if ( KMP_ARCH_X86 )
1319 ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1320 ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1321#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001322#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001323
1324
1325#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1326// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1327
1328#endif //OMP_40_ENABLED
1329
1330
1331/* ------------------------------------------------------------------------ */
1332/* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1333/* Note: in order to reduce the total number of types combinations */
1334/* it is supposed that compiler converts RHS to longest floating type,*/
1335/* that is _Quad, before call to any of these routines */
1336/* Conversion to _Quad will be done by the compiler during calculation, */
1337/* conversion back to TYPE - before the assignment, like: */
1338/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1339/* Performance penalty expected because of SW emulation use */
1340/* ------------------------------------------------------------------------ */
1341
1342#define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1343void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1344{ \
1345 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1346 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1347
1348// -------------------------------------------------------------------------
1349#define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1350ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1351 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1352 OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1353}
1354
1355// -------------------------------------------------------------------------
1356#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1357// -------------------------------------------------------------------------
1358// X86 or X86_64: no alignment problems ====================================
1359#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1360ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1361 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1362 OP_CMPXCHG(TYPE,BITS,OP) \
1363}
1364// -------------------------------------------------------------------------
1365#else
1366// ------------------------------------------------------------------------
1367// Code for other architectures that don't handle unaligned accesses.
1368#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1369ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1370 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1371 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1372 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1373 } else { \
1374 KMP_CHECK_GTID; \
1375 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1376 } \
1377}
1378#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1379
1380// RHS=float8
1381ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1382ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1383ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1384ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1385ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1386ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1387ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1388ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1389ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1390ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1391ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1392ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1393
1394// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
Jim Cownie181b4bb2013-12-23 17:28:57 +00001395#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001396ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1397ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1398ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1399ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1400ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1401
1402ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1403ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1404ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1405ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1406ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1407
1408ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1409ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1410ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1411ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1412ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1413
1414ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1415ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1416ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1417ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1418ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1419
1420ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1421ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1422ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1423ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1424
1425ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1426ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1427ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1428ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1429
1430ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1431ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1432ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1433ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
Jim Cownie181b4bb2013-12-23 17:28:57 +00001434#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001435
1436#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1437// ------------------------------------------------------------------------
1438// X86 or X86_64: no alignment problems ====================================
Jim Cownie181b4bb2013-12-23 17:28:57 +00001439#if USE_CMPXCHG_FIX
Jim Cownie5e8470a2013-09-27 10:38:44 +00001440// workaround for C78287 (complex(kind=4) data type)
1441#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1442ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1443 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1444 OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1445}
1446// end of the second part of the workaround for C78287
1447#else
Jim Cownie181b4bb2013-12-23 17:28:57 +00001448#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1449ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1450 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1451 OP_CMPXCHG(TYPE,BITS,OP) \
1452}
1453#endif // USE_CMPXCHG_FIX
1454#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001455// ------------------------------------------------------------------------
1456// Code for other architectures that don't handle unaligned accesses.
1457#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1458ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1459 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1460 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1461 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1462 } else { \
1463 KMP_CHECK_GTID; \
1464 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1465 } \
1466}
1467#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1468
1469ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1470ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1471ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1472ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1473
1474// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1475#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1476
1477//////////////////////////////////////////////////////////////////////////////////////////////////////
1478// ------------------------------------------------------------------------
1479// Atomic READ routines
1480// ------------------------------------------------------------------------
1481
1482// ------------------------------------------------------------------------
1483// Beginning of a definition (provides name, parameters, gebug trace)
1484// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1485// OP_ID - operation identifier (add, sub, mul, ...)
1486// TYPE - operands' type
1487#define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1488RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1489{ \
1490 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1491 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1492
1493// ------------------------------------------------------------------------
1494// Operation on *lhs, rhs using "compare_and_store_ret" routine
1495// TYPE - operands' type
1496// BITS - size in bits, used to distinguish low level calls
1497// OP - operator
1498// Note: temp_val introduced in order to force the compiler to read
1499// *lhs only once (w/o it the compiler reads *lhs twice)
1500// TODO: check if it is still necessary
1501// Return old value regardless of the result of "compare & swap# operation
1502
1503#define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1504 { \
1505 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1506 union f_i_union { \
1507 TYPE f_val; \
1508 kmp_int##BITS i_val; \
1509 }; \
1510 union f_i_union old_value; \
1511 temp_val = *loc; \
1512 old_value.f_val = temp_val; \
1513 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1514 *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1515 *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1516 new_value = old_value.f_val; \
1517 return new_value; \
1518 }
1519
1520// -------------------------------------------------------------------------
1521// Operation on *lhs, rhs bound by critical section
1522// OP - operator (it's supposed to contain an assignment)
1523// LCK_ID - lock identifier
1524// Note: don't check gtid as it should always be valid
1525// 1, 2-byte - expect valid parameter, other - check before this macro
1526#define OP_CRITICAL_READ(OP,LCK_ID) \
1527 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1528 \
1529 new_value = (*loc); \
1530 \
1531 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1532
1533// -------------------------------------------------------------------------
1534#ifdef KMP_GOMP_COMPAT
1535#define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1536 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1537 KMP_CHECK_GTID; \
1538 OP_CRITICAL_READ( OP, 0 ); \
1539 return new_value; \
1540 }
1541#else
1542#define OP_GOMP_CRITICAL_READ(OP,FLAG)
1543#endif /* KMP_GOMP_COMPAT */
1544
1545// -------------------------------------------------------------------------
1546#define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1547ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1548 TYPE new_value; \
1549 OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1550 new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1551 return new_value; \
1552}
1553// -------------------------------------------------------------------------
1554#define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1555ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1556 TYPE new_value; \
1557 OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1558 OP_CMPXCHG_READ(TYPE,BITS,OP) \
1559}
1560// ------------------------------------------------------------------------
1561// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1562// TYPE_ID, OP_ID, TYPE - detailed above
1563// OP - operator
1564// LCK_ID - lock identifier, used to possibly distinguish lock variable
1565#define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1566ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1567 TYPE new_value; \
1568 OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1569 OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1570 return new_value; \
1571}
1572
1573// ------------------------------------------------------------------------
1574// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1575// Let's return the read value through the additional parameter.
1576
1577#if ( KMP_OS_WINDOWS )
1578
1579#define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1580 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1581 \
1582 (*out) = (*loc); \
1583 \
1584 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1585// ------------------------------------------------------------------------
1586#ifdef KMP_GOMP_COMPAT
1587#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1588 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1589 KMP_CHECK_GTID; \
1590 OP_CRITICAL_READ_WRK( OP, 0 ); \
1591 }
1592#else
1593#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1594#endif /* KMP_GOMP_COMPAT */
1595// ------------------------------------------------------------------------
1596#define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1597void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1598{ \
1599 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1600 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1601
1602// ------------------------------------------------------------------------
1603#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1604ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1605 OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1606 OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1607}
1608
1609#endif // KMP_OS_WINDOWS
1610
1611// ------------------------------------------------------------------------
1612// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1613ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1614ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1615ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1616ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1617
1618// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1619ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1620ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1621
1622ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
Jim Cownie181b4bb2013-12-23 17:28:57 +00001623#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001624ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
Jim Cownie181b4bb2013-12-23 17:28:57 +00001625#endif // KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001626
1627// Fix for CQ220361 on Windows* OS
1628#if ( KMP_OS_WINDOWS )
1629 ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1630#else
1631 ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1632#endif
1633ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1634ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
Jim Cownie181b4bb2013-12-23 17:28:57 +00001635#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001636ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1637#if ( KMP_ARCH_X86 )
1638 ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1639 ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1640#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001641#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001642
1643
1644// ------------------------------------------------------------------------
1645// Atomic WRITE routines
1646// ------------------------------------------------------------------------
1647
1648#define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1649ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1650 OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1651 KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1652}
1653// ------------------------------------------------------------------------
1654#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1655ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1656 OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1657 KMP_XCHG_REAL##BITS( lhs, rhs ); \
1658}
1659
1660
1661// ------------------------------------------------------------------------
1662// Operation on *lhs, rhs using "compare_and_store" routine
1663// TYPE - operands' type
1664// BITS - size in bits, used to distinguish low level calls
1665// OP - operator
1666// Note: temp_val introduced in order to force the compiler to read
1667// *lhs only once (w/o it the compiler reads *lhs twice)
1668#define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1669 { \
1670 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1671 TYPE old_value, new_value; \
1672 temp_val = *lhs; \
1673 old_value = temp_val; \
1674 new_value = rhs; \
1675 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1676 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1677 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1678 { \
1679 KMP_CPU_PAUSE(); \
1680 \
1681 temp_val = *lhs; \
1682 old_value = temp_val; \
1683 new_value = rhs; \
1684 } \
1685 }
1686
1687// -------------------------------------------------------------------------
1688#define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1689ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1690 OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1691 OP_CMPXCHG_WR(TYPE,BITS,OP) \
1692}
1693
1694// ------------------------------------------------------------------------
1695// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1696// TYPE_ID, OP_ID, TYPE - detailed above
1697// OP - operator
1698// LCK_ID - lock identifier, used to possibly distinguish lock variable
1699#define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1700ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1701 OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1702 OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1703}
1704// -------------------------------------------------------------------------
1705
1706ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1707ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1708ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1709#if ( KMP_ARCH_X86 )
1710 ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1711#else
1712 ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1713#endif
1714
1715ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1716#if ( KMP_ARCH_X86 )
1717 ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1718#else
1719 ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1720#endif
1721
1722ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
Jim Cownie181b4bb2013-12-23 17:28:57 +00001723#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001724ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
Jim Cownie181b4bb2013-12-23 17:28:57 +00001725#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001726ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1727ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1728ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
Jim Cownie181b4bb2013-12-23 17:28:57 +00001729#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00001730ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1731#if ( KMP_ARCH_X86 )
1732 ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1733 ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1734#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001735#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001736
1737
1738// ------------------------------------------------------------------------
1739// Atomic CAPTURE routines
1740// ------------------------------------------------------------------------
1741
1742// Beginning of a definition (provides name, parameters, gebug trace)
1743// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1744// OP_ID - operation identifier (add, sub, mul, ...)
1745// TYPE - operands' type
1746#define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1747RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1748{ \
1749 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1750 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1751
1752// -------------------------------------------------------------------------
1753// Operation on *lhs, rhs bound by critical section
1754// OP - operator (it's supposed to contain an assignment)
1755// LCK_ID - lock identifier
1756// Note: don't check gtid as it should always be valid
1757// 1, 2-byte - expect valid parameter, other - check before this macro
1758#define OP_CRITICAL_CPT(OP,LCK_ID) \
1759 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1760 \
1761 if( flag ) { \
1762 (*lhs) OP rhs; \
1763 new_value = (*lhs); \
1764 } else { \
1765 new_value = (*lhs); \
1766 (*lhs) OP rhs; \
1767 } \
1768 \
1769 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1770 return new_value;
1771
1772// ------------------------------------------------------------------------
1773#ifdef KMP_GOMP_COMPAT
1774#define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1775 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1776 KMP_CHECK_GTID; \
1777 OP_CRITICAL_CPT( OP##=, 0 ); \
1778 }
1779#else
1780#define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1781#endif /* KMP_GOMP_COMPAT */
1782
1783// ------------------------------------------------------------------------
1784// Operation on *lhs, rhs using "compare_and_store" routine
1785// TYPE - operands' type
1786// BITS - size in bits, used to distinguish low level calls
1787// OP - operator
1788// Note: temp_val introduced in order to force the compiler to read
1789// *lhs only once (w/o it the compiler reads *lhs twice)
1790#define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1791 { \
1792 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1793 TYPE old_value, new_value; \
1794 temp_val = *lhs; \
1795 old_value = temp_val; \
1796 new_value = old_value OP rhs; \
1797 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1798 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1799 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1800 { \
1801 KMP_CPU_PAUSE(); \
1802 \
1803 temp_val = *lhs; \
1804 old_value = temp_val; \
1805 new_value = old_value OP rhs; \
1806 } \
1807 if( flag ) { \
1808 return new_value; \
1809 } else \
1810 return old_value; \
1811 }
1812
1813// -------------------------------------------------------------------------
1814#define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1815ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1816 TYPE new_value; \
1817 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1818 OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1819}
1820
1821// -------------------------------------------------------------------------
1822#define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1823ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1824 TYPE old_value, new_value; \
1825 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1826 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1827 old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1828 if( flag ) { \
1829 return old_value OP rhs; \
1830 } else \
1831 return old_value; \
1832}
1833// -------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001834
1835ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1836ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1837ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1838ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1839
Jim Cownie5e8470a2013-09-27 10:38:44 +00001840ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1841ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1842ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1843ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
Jim Cownie5e8470a2013-09-27 10:38:44 +00001844
1845// ------------------------------------------------------------------------
1846// Entries definition for integer operands
1847// TYPE_ID - operands type and size (fixed4, float4)
1848// OP_ID - operation identifier (add, sub, mul, ...)
1849// TYPE - operand type
1850// BITS - size in bits, used to distinguish low level calls
1851// OP - operator (used in critical section)
1852// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1853// ------------------------------------------------------------------------
1854// Routines for ATOMIC integer operands, other operators
1855// ------------------------------------------------------------------------
1856// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1857ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1858ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1859ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1860ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1861ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1862ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1863ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1864ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1865ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1866ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1867ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1868ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1869ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1870ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1871ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1872ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1873ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1874ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1875ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1876ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1877ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1878ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1879ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1880ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1881ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1882ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1883ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1884ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1885ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1886ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1887ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1888ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1889ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1890ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1891ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1892ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1893ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1894ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1895ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1896ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1897ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1898ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1899ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1900ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1901// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1902
1903// ------------------------------------------------------------------------
1904// Routines for C/C++ Reduction operators && and ||
1905// ------------------------------------------------------------------------
1906
1907// -------------------------------------------------------------------------
1908// Operation on *lhs, rhs bound by critical section
1909// OP - operator (it's supposed to contain an assignment)
1910// LCK_ID - lock identifier
1911// Note: don't check gtid as it should always be valid
1912// 1, 2-byte - expect valid parameter, other - check before this macro
1913#define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1914 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1915 \
1916 if( flag ) { \
1917 new_value OP rhs; \
1918 } else \
1919 new_value = (*lhs); \
1920 \
1921 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1922
1923// ------------------------------------------------------------------------
1924#ifdef KMP_GOMP_COMPAT
1925#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
1926 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1927 KMP_CHECK_GTID; \
1928 OP_CRITICAL_L_CPT( OP, 0 ); \
1929 return new_value; \
1930 }
1931#else
1932#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
1933#endif /* KMP_GOMP_COMPAT */
1934
1935// ------------------------------------------------------------------------
1936// Need separate macros for &&, || because there is no combined assignment
1937#define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1938ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1939 TYPE new_value; \
1940 OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
1941 OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1942}
1943
1944ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
1945ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
1946ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
1947ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
1948ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
1949ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
1950ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
1951ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
1952
1953
1954// -------------------------------------------------------------------------
1955// Routines for Fortran operators that matched no one in C:
1956// MAX, MIN, .EQV., .NEQV.
1957// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
1958// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
1959// -------------------------------------------------------------------------
1960
1961// -------------------------------------------------------------------------
1962// MIN and MAX need separate macros
1963// OP - operator to check if we need any actions?
1964#define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
1965 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1966 \
1967 if ( *lhs OP rhs ) { /* still need actions? */ \
1968 old_value = *lhs; \
1969 *lhs = rhs; \
1970 if ( flag ) \
1971 new_value = rhs; \
1972 else \
1973 new_value = old_value; \
1974 } \
1975 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1976 return new_value; \
1977
1978// -------------------------------------------------------------------------
1979#ifdef KMP_GOMP_COMPAT
1980#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
1981 if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
1982 KMP_CHECK_GTID; \
1983 MIN_MAX_CRITSECT_CPT( OP, 0 ); \
1984 }
1985#else
1986#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
1987#endif /* KMP_GOMP_COMPAT */
1988
1989// -------------------------------------------------------------------------
1990#define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
1991 { \
1992 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1993 /*TYPE old_value; */ \
1994 temp_val = *lhs; \
1995 old_value = temp_val; \
1996 while ( old_value OP rhs && /* still need actions? */ \
1997 ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1998 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1999 *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2000 { \
2001 KMP_CPU_PAUSE(); \
2002 temp_val = *lhs; \
2003 old_value = temp_val; \
2004 } \
2005 if( flag ) \
2006 return rhs; \
2007 else \
2008 return old_value; \
2009 }
2010
2011// -------------------------------------------------------------------------
2012// 1-byte, 2-byte operands - use critical section
2013#define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2014ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2015 TYPE new_value, old_value; \
2016 if ( *lhs OP rhs ) { /* need actions? */ \
2017 GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2018 MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2019 } \
2020 return *lhs; \
2021}
2022
2023#define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2024ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2025 TYPE new_value, old_value; \
2026 if ( *lhs OP rhs ) { \
2027 GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2028 MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2029 } \
2030 return *lhs; \
2031}
2032
2033
2034MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2035MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2036MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2037MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2038MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2039MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2040MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2041MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2042MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2043MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2044MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2045MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
Jim Cownie181b4bb2013-12-23 17:28:57 +00002046#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00002047MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2048MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2049#if ( KMP_ARCH_X86 )
2050 MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2051 MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2052#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00002053#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002054
2055// ------------------------------------------------------------------------
2056#ifdef KMP_GOMP_COMPAT
2057#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2058 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2059 KMP_CHECK_GTID; \
2060 OP_CRITICAL_CPT( OP, 0 ); \
2061 }
2062#else
2063#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2064#endif /* KMP_GOMP_COMPAT */
2065// ------------------------------------------------------------------------
2066#define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2067ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2068 TYPE new_value; \
2069 OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2070 OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2071}
2072
2073// ------------------------------------------------------------------------
2074
2075ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2076ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2077ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2078ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2079ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2080ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2081ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2082ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2083
2084// ------------------------------------------------------------------------
2085// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2086// TYPE_ID, OP_ID, TYPE - detailed above
2087// OP - operator
2088// LCK_ID - lock identifier, used to possibly distinguish lock variable
2089#define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2090ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2091 TYPE new_value; \
2092 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2093 OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2094}
2095
2096// ------------------------------------------------------------------------
2097
2098// Workaround for cmplx4. Regular routines with return value don't work
2099// on Win_32e. Let's return captured values through the additional parameter.
2100#define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2101 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2102 \
2103 if( flag ) { \
2104 (*lhs) OP rhs; \
2105 (*out) = (*lhs); \
2106 } else { \
2107 (*out) = (*lhs); \
2108 (*lhs) OP rhs; \
2109 } \
2110 \
2111 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2112 return;
2113// ------------------------------------------------------------------------
2114
2115#ifdef KMP_GOMP_COMPAT
2116#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2117 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2118 KMP_CHECK_GTID; \
2119 OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2120 }
2121#else
2122#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2123#endif /* KMP_GOMP_COMPAT */
2124// ------------------------------------------------------------------------
2125
2126#define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2127void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2128{ \
2129 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2130 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2131// ------------------------------------------------------------------------
2132
2133#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2134ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2135 OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2136 OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2137}
2138// The end of workaround for cmplx4
2139
2140/* ------------------------------------------------------------------------- */
2141// routines for long double type
2142ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2143ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2144ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2145ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
Jim Cownie181b4bb2013-12-23 17:28:57 +00002146#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00002147// routines for _Quad type
2148ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2149ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2150ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2151ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2152#if ( KMP_ARCH_X86 )
2153 ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2154 ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2155 ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2156 ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2157#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00002158#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002159
2160// routines for complex types
2161
2162// cmplx4 routines to return void
2163ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2164ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2165ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2166ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2167
2168ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2169ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2170ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2171ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2172ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2173ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2174ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2175ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
Jim Cownie181b4bb2013-12-23 17:28:57 +00002176#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00002177ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2178ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2179ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2180ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2181#if ( KMP_ARCH_X86 )
2182 ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2183 ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2184 ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2185 ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2186#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00002187#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002188
2189#if OMP_40_ENABLED
2190
2191// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2192// Supported only on IA-32 architecture and Intel(R) 64
2193
2194// -------------------------------------------------------------------------
2195// Operation on *lhs, rhs bound by critical section
2196// OP - operator (it's supposed to contain an assignment)
2197// LCK_ID - lock identifier
2198// Note: don't check gtid as it should always be valid
2199// 1, 2-byte - expect valid parameter, other - check before this macro
2200#define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2201 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2202 \
2203 if( flag ) { \
2204 /*temp_val = (*lhs);*/\
2205 (*lhs) = (rhs) OP (*lhs); \
2206 new_value = (*lhs); \
2207 } else { \
2208 new_value = (*lhs);\
2209 (*lhs) = (rhs) OP (*lhs); \
2210 } \
2211 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2212 return new_value;
2213
2214// ------------------------------------------------------------------------
2215#ifdef KMP_GOMP_COMPAT
2216#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2217 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2218 KMP_CHECK_GTID; \
2219 OP_CRITICAL_CPT_REV( OP, 0 ); \
2220 }
2221#else
2222#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2223#endif /* KMP_GOMP_COMPAT */
2224
2225// ------------------------------------------------------------------------
2226// Operation on *lhs, rhs using "compare_and_store" routine
2227// TYPE - operands' type
2228// BITS - size in bits, used to distinguish low level calls
2229// OP - operator
2230// Note: temp_val introduced in order to force the compiler to read
2231// *lhs only once (w/o it the compiler reads *lhs twice)
2232#define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2233 { \
2234 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2235 TYPE old_value, new_value; \
2236 temp_val = *lhs; \
2237 old_value = temp_val; \
2238 new_value = rhs OP old_value; \
2239 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2240 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2241 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2242 { \
2243 KMP_CPU_PAUSE(); \
2244 \
2245 temp_val = *lhs; \
2246 old_value = temp_val; \
2247 new_value = rhs OP old_value; \
2248 } \
2249 if( flag ) { \
2250 return new_value; \
2251 } else \
2252 return old_value; \
2253 }
2254
2255// -------------------------------------------------------------------------
2256#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2257ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2258 TYPE new_value; \
2259 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2260 OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2261 OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2262}
2263
2264
2265ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2266ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2267ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2268ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2269ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2270ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2271ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2272ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2273ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2274ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2275ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2276ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2277ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2278ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2279ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2280ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2281ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2282ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2283ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2284ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2285ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2286ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2287ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2288ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2289ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2290ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2291ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2292ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2293// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2294
2295
2296// ------------------------------------------------------------------------
2297// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2298// TYPE_ID, OP_ID, TYPE - detailed above
2299// OP - operator
2300// LCK_ID - lock identifier, used to possibly distinguish lock variable
2301#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2302ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2303 TYPE new_value; \
2304 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2305 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2306 OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2307 OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2308}
2309
2310
2311/* ------------------------------------------------------------------------- */
2312// routines for long double type
2313ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2314ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
Jim Cownie181b4bb2013-12-23 17:28:57 +00002315#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00002316// routines for _Quad type
2317ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2318ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2319#if ( KMP_ARCH_X86 )
2320 ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2321 ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2322#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00002323#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002324
2325// routines for complex types
2326
2327// ------------------------------------------------------------------------
2328
2329// Workaround for cmplx4. Regular routines with return value don't work
2330// on Win_32e. Let's return captured values through the additional parameter.
2331#define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2332 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2333 \
2334 if( flag ) { \
2335 (*lhs) = (rhs) OP (*lhs); \
2336 (*out) = (*lhs); \
2337 } else { \
2338 (*out) = (*lhs); \
2339 (*lhs) = (rhs) OP (*lhs); \
2340 } \
2341 \
2342 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2343 return;
2344// ------------------------------------------------------------------------
2345
2346#ifdef KMP_GOMP_COMPAT
2347#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2348 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2349 KMP_CHECK_GTID; \
2350 OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2351 }
2352#else
2353#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2354#endif /* KMP_GOMP_COMPAT */
2355// ------------------------------------------------------------------------
2356
2357#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2358ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2359 OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2360 OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2361}
2362// The end of workaround for cmplx4
2363
2364
2365// !!! TODO: check if we need to return void for cmplx4 routines
2366// cmplx4 routines to return void
2367ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2368ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2369
2370ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2371ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2372ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2373ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
Jim Cownie181b4bb2013-12-23 17:28:57 +00002374#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00002375ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2376ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2377#if ( KMP_ARCH_X86 )
2378 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2379 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2380#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00002381#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002382
2383// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2384
2385#define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2386TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2387{ \
2388 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2389 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2390
2391#define CRITICAL_SWP(LCK_ID) \
2392 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2393 \
2394 old_value = (*lhs); \
2395 (*lhs) = rhs; \
2396 \
2397 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2398 return old_value;
2399
2400// ------------------------------------------------------------------------
2401#ifdef KMP_GOMP_COMPAT
2402#define GOMP_CRITICAL_SWP(FLAG) \
2403 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2404 KMP_CHECK_GTID; \
2405 CRITICAL_SWP( 0 ); \
2406 }
2407#else
2408#define GOMP_CRITICAL_SWP(FLAG)
2409#endif /* KMP_GOMP_COMPAT */
2410
2411
2412#define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2413ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2414 TYPE old_value; \
2415 GOMP_CRITICAL_SWP(GOMP_FLAG) \
2416 old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2417 return old_value; \
2418}
2419// ------------------------------------------------------------------------
2420#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2421ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2422 TYPE old_value; \
2423 GOMP_CRITICAL_SWP(GOMP_FLAG) \
2424 old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2425 return old_value; \
2426}
2427
2428// ------------------------------------------------------------------------
2429#define CMPXCHG_SWP(TYPE,BITS) \
2430 { \
2431 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2432 TYPE old_value, new_value; \
2433 temp_val = *lhs; \
2434 old_value = temp_val; \
2435 new_value = rhs; \
2436 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2437 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2438 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2439 { \
2440 KMP_CPU_PAUSE(); \
2441 \
2442 temp_val = *lhs; \
2443 old_value = temp_val; \
2444 new_value = rhs; \
2445 } \
2446 return old_value; \
2447 }
2448
2449// -------------------------------------------------------------------------
2450#define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2451ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2452 TYPE old_value; \
2453 GOMP_CRITICAL_SWP(GOMP_FLAG) \
2454 CMPXCHG_SWP(TYPE,BITS) \
2455}
2456
2457ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2458ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2459ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2460
2461ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2462
2463#if ( KMP_ARCH_X86 )
2464 ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2465 ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2466#else
2467 ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2468 ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2469#endif
2470
2471// ------------------------------------------------------------------------
2472// Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2473#define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2474ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2475 TYPE old_value; \
2476 GOMP_CRITICAL_SWP(GOMP_FLAG) \
2477 CRITICAL_SWP(LCK_ID) \
2478}
2479
2480// ------------------------------------------------------------------------
2481
2482// !!! TODO: check if we need to return void for cmplx4 routines
2483// Workaround for cmplx4. Regular routines with return value don't work
2484// on Win_32e. Let's return captured values through the additional parameter.
2485
2486#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2487void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2488{ \
2489 KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2490 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2491
2492
2493#define CRITICAL_SWP_WRK(LCK_ID) \
2494 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2495 \
2496 tmp = (*lhs); \
2497 (*lhs) = (rhs); \
2498 (*out) = tmp; \
2499 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2500 return;
2501
2502// ------------------------------------------------------------------------
2503
2504#ifdef KMP_GOMP_COMPAT
2505#define GOMP_CRITICAL_SWP_WRK(FLAG) \
2506 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2507 KMP_CHECK_GTID; \
2508 CRITICAL_SWP_WRK( 0 ); \
2509 }
2510#else
2511#define GOMP_CRITICAL_SWP_WRK(FLAG)
2512#endif /* KMP_GOMP_COMPAT */
2513// ------------------------------------------------------------------------
2514
2515#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2516ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2517 TYPE tmp; \
2518 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2519 CRITICAL_SWP_WRK(LCK_ID) \
2520}
2521// The end of workaround for cmplx4
2522
2523
2524ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
Jim Cownie181b4bb2013-12-23 17:28:57 +00002525#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00002526ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
Jim Cownie181b4bb2013-12-23 17:28:57 +00002527#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002528// cmplx4 routine to return void
2529ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2530
2531//ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2532
2533
2534ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2535ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
Jim Cownie181b4bb2013-12-23 17:28:57 +00002536#if KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +00002537ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2538#if ( KMP_ARCH_X86 )
2539 ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2540 ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2541#endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00002542#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543
2544
2545// End of OpenMP 4.0 Capture
2546
2547#endif //OMP_40_ENABLED
2548
2549#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2550
2551
2552#undef OP_CRITICAL
2553
2554/* ------------------------------------------------------------------------ */
2555/* Generic atomic routines */
2556/* ------------------------------------------------------------------------ */
2557
2558void
2559__kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2560{
2561 KMP_DEBUG_ASSERT( __kmp_init_serial );
2562
2563 if (
2564#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2565 FALSE /* must use lock */
2566#else
2567 TRUE
2568#endif
2569 )
2570 {
2571 kmp_int8 old_value, new_value;
2572
2573 old_value = *(kmp_int8 *) lhs;
2574 (*f)( &new_value, &old_value, rhs );
2575
2576 /* TODO: Should this be acquire or release? */
2577 while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2578 *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2579 {
2580 KMP_CPU_PAUSE();
2581
2582 old_value = *(kmp_int8 *) lhs;
2583 (*f)( &new_value, &old_value, rhs );
2584 }
2585
2586 return;
2587 }
2588 else {
2589 //
2590 // All 1-byte data is of integer data type.
2591 //
2592
2593#ifdef KMP_GOMP_COMPAT
2594 if ( __kmp_atomic_mode == 2 ) {
2595 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2596 }
2597 else
2598#endif /* KMP_GOMP_COMPAT */
2599 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2600
2601 (*f)( lhs, lhs, rhs );
2602
2603#ifdef KMP_GOMP_COMPAT
2604 if ( __kmp_atomic_mode == 2 ) {
2605 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2606 }
2607 else
2608#endif /* KMP_GOMP_COMPAT */
2609 __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2610 }
2611}
2612
2613void
2614__kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2615{
2616 if (
2617#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2618 FALSE /* must use lock */
2619#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2620 TRUE /* no alignment problems */
2621#else
2622 ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2623#endif
2624 )
2625 {
2626 kmp_int16 old_value, new_value;
2627
2628 old_value = *(kmp_int16 *) lhs;
2629 (*f)( &new_value, &old_value, rhs );
2630
2631 /* TODO: Should this be acquire or release? */
2632 while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2633 *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2634 {
2635 KMP_CPU_PAUSE();
2636
2637 old_value = *(kmp_int16 *) lhs;
2638 (*f)( &new_value, &old_value, rhs );
2639 }
2640
2641 return;
2642 }
2643 else {
2644 //
2645 // All 2-byte data is of integer data type.
2646 //
2647
2648#ifdef KMP_GOMP_COMPAT
2649 if ( __kmp_atomic_mode == 2 ) {
2650 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2651 }
2652 else
2653#endif /* KMP_GOMP_COMPAT */
2654 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2655
2656 (*f)( lhs, lhs, rhs );
2657
2658#ifdef KMP_GOMP_COMPAT
2659 if ( __kmp_atomic_mode == 2 ) {
2660 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2661 }
2662 else
2663#endif /* KMP_GOMP_COMPAT */
2664 __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2665 }
2666}
2667
2668void
2669__kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2670{
2671 KMP_DEBUG_ASSERT( __kmp_init_serial );
2672
2673 if (
2674 //
2675 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2676 // Gomp compatibility is broken if this routine is called for floats.
2677 //
2678#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2679 TRUE /* no alignment problems */
2680#else
2681 ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2682#endif
2683 )
2684 {
2685 kmp_int32 old_value, new_value;
2686
2687 old_value = *(kmp_int32 *) lhs;
2688 (*f)( &new_value, &old_value, rhs );
2689
2690 /* TODO: Should this be acquire or release? */
2691 while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2692 *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2693 {
2694 KMP_CPU_PAUSE();
2695
2696 old_value = *(kmp_int32 *) lhs;
2697 (*f)( &new_value, &old_value, rhs );
2698 }
2699
2700 return;
2701 }
2702 else {
2703 //
2704 // Use __kmp_atomic_lock_4i for all 4-byte data,
2705 // even if it isn't of integer data type.
2706 //
2707
2708#ifdef KMP_GOMP_COMPAT
2709 if ( __kmp_atomic_mode == 2 ) {
2710 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2711 }
2712 else
2713#endif /* KMP_GOMP_COMPAT */
2714 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2715
2716 (*f)( lhs, lhs, rhs );
2717
2718#ifdef KMP_GOMP_COMPAT
2719 if ( __kmp_atomic_mode == 2 ) {
2720 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2721 }
2722 else
2723#endif /* KMP_GOMP_COMPAT */
2724 __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2725 }
2726}
2727
2728void
2729__kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2730{
2731 KMP_DEBUG_ASSERT( __kmp_init_serial );
2732 if (
2733
2734#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2735 FALSE /* must use lock */
2736#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2737 TRUE /* no alignment problems */
2738#else
2739 ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2740#endif
2741 )
2742 {
2743 kmp_int64 old_value, new_value;
2744
2745 old_value = *(kmp_int64 *) lhs;
2746 (*f)( &new_value, &old_value, rhs );
2747 /* TODO: Should this be acquire or release? */
2748 while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2749 *(kmp_int64 *) &old_value,
2750 *(kmp_int64 *) &new_value ) )
2751 {
2752 KMP_CPU_PAUSE();
2753
2754 old_value = *(kmp_int64 *) lhs;
2755 (*f)( &new_value, &old_value, rhs );
2756 }
2757
2758 return;
2759 } else {
2760 //
2761 // Use __kmp_atomic_lock_8i for all 8-byte data,
2762 // even if it isn't of integer data type.
2763 //
2764
2765#ifdef KMP_GOMP_COMPAT
2766 if ( __kmp_atomic_mode == 2 ) {
2767 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2768 }
2769 else
2770#endif /* KMP_GOMP_COMPAT */
2771 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2772
2773 (*f)( lhs, lhs, rhs );
2774
2775#ifdef KMP_GOMP_COMPAT
2776 if ( __kmp_atomic_mode == 2 ) {
2777 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2778 }
2779 else
2780#endif /* KMP_GOMP_COMPAT */
2781 __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2782 }
2783}
2784
2785void
2786__kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2787{
2788 KMP_DEBUG_ASSERT( __kmp_init_serial );
2789
2790#ifdef KMP_GOMP_COMPAT
2791 if ( __kmp_atomic_mode == 2 ) {
2792 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2793 }
2794 else
2795#endif /* KMP_GOMP_COMPAT */
2796 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2797
2798 (*f)( lhs, lhs, rhs );
2799
2800#ifdef KMP_GOMP_COMPAT
2801 if ( __kmp_atomic_mode == 2 ) {
2802 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2803 }
2804 else
2805#endif /* KMP_GOMP_COMPAT */
2806 __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2807}
2808
2809void
2810__kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2811{
2812 KMP_DEBUG_ASSERT( __kmp_init_serial );
2813
2814#ifdef KMP_GOMP_COMPAT
2815 if ( __kmp_atomic_mode == 2 ) {
2816 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2817 }
2818 else
2819#endif /* KMP_GOMP_COMPAT */
2820 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2821
2822 (*f)( lhs, lhs, rhs );
2823
2824#ifdef KMP_GOMP_COMPAT
2825 if ( __kmp_atomic_mode == 2 ) {
2826 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2827 }
2828 else
2829#endif /* KMP_GOMP_COMPAT */
2830 __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2831}
2832
2833void
2834__kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2835{
2836 KMP_DEBUG_ASSERT( __kmp_init_serial );
2837
2838#ifdef KMP_GOMP_COMPAT
2839 if ( __kmp_atomic_mode == 2 ) {
2840 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2841 }
2842 else
2843#endif /* KMP_GOMP_COMPAT */
2844 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2845
2846 (*f)( lhs, lhs, rhs );
2847
2848#ifdef KMP_GOMP_COMPAT
2849 if ( __kmp_atomic_mode == 2 ) {
2850 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2851 }
2852 else
2853#endif /* KMP_GOMP_COMPAT */
2854 __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2855}
2856
2857void
2858__kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2859{
2860 KMP_DEBUG_ASSERT( __kmp_init_serial );
2861
2862#ifdef KMP_GOMP_COMPAT
2863 if ( __kmp_atomic_mode == 2 ) {
2864 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2865 }
2866 else
2867#endif /* KMP_GOMP_COMPAT */
2868 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2869
2870 (*f)( lhs, lhs, rhs );
2871
2872#ifdef KMP_GOMP_COMPAT
2873 if ( __kmp_atomic_mode == 2 ) {
2874 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2875 }
2876 else
2877#endif /* KMP_GOMP_COMPAT */
2878 __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2879}
2880
2881// AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2882// duplicated in order to not use 3-party names in pure Intel code
2883// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2884void
2885__kmpc_atomic_start(void)
2886{
2887 int gtid = __kmp_entry_gtid();
2888 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2889 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2890}
2891
2892
2893void
2894__kmpc_atomic_end(void)
2895{
2896 int gtid = __kmp_get_gtid();
2897 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2898 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2899}
2900
2901/* ------------------------------------------------------------------------ */
2902/* ------------------------------------------------------------------------ */
2903/*!
2904@}
2905*/
2906
2907// end of file