blob: c9fbc238c40317d17fb37e0693c1a4c2ccf1c7cb [file] [log] [blame]
Dimitry Andricb9fb1222017-07-11 18:04:56 +00001// z_Linux_asm.S: - microtasking routines specifically
Jim Cownie5e8470a2013-09-27 10:38:44 +00002// written for Intel platforms running Linux* OS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003
4//
5////===----------------------------------------------------------------------===//
6////
7//// The LLVM Compiler Infrastructure
8////
9//// This file is dual licensed under the MIT and the University of Illinois Open
10//// Source Licenses. See LICENSE.txt for details.
11////
12////===----------------------------------------------------------------------===//
13//
14
15// -----------------------------------------------------------------------
16// macros
17// -----------------------------------------------------------------------
18
Jonathan Peytonc0225ca2015-08-28 18:42:10 +000019#include "kmp_config.h"
Jonathan Peyton92907c22015-05-29 16:13:56 +000020
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#if KMP_ARCH_X86 || KMP_ARCH_X86_64
22
Jonathan Peyton621743b2015-08-20 19:46:14 +000023# if KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +000024// the 'delay r16/r32/r64' should be used instead of the 'pause'.
25// The delay operation has the effect of removing the current thread from
26// the round-robin HT mechanism, and therefore speeds up the issue rate of
27// the other threads on the same core.
28//
29// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
30// barrier time to increase greatly for 3 or more threads per core.
31//
32// A value of 100 works pretty well for up to 4 threads per core, but isn't
33// quite as fast as 0 for 2 threads per core.
34//
35// We need to check what happens for oversubscription / > 4 threads per core.
36// It is possible that we need to pass the delay value in as a parameter
37// that the caller determines based on the total # threads / # cores.
38//
39//.macro pause_op
40// mov $100, %rax
41// delay %rax
42//.endm
43# else
44# define pause_op .byte 0xf3,0x90
Jonathan Peyton621743b2015-08-20 19:46:14 +000045# endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +000046
Jonathan Peyton621743b2015-08-20 19:46:14 +000047# if KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +000048# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000049# define KMP_LABEL(x) L_##x // form the name of label
50.macro KMP_CFI_DEF_OFFSET
51.endmacro
52.macro KMP_CFI_OFFSET
53.endmacro
54.macro KMP_CFI_REGISTER
55.endmacro
56.macro KMP_CFI_DEF
57.endmacro
Jim Cownie5e8470a2013-09-27 10:38:44 +000058.macro ALIGN
59 .align $0
60.endmacro
61.macro DEBUG_INFO
62/* Not sure what .size does in icc, not sure if we need to do something
63 similar for OS X*.
64*/
65.endmacro
66.macro PROC
67 ALIGN 4
68 .globl KMP_PREFIX_UNDERSCORE($0)
69KMP_PREFIX_UNDERSCORE($0):
70.endmacro
Jonathan Peyton621743b2015-08-20 19:46:14 +000071# else // KMP_OS_DARWIN
Jonathan Peyton30419822017-05-12 18:01:32 +000072# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
Andrey Churbanovc5bccf942015-02-10 19:31:17 +000073// Format labels so that they don't override function names in gdb's backtraces
Jonathan Peyton30419822017-05-12 18:01:32 +000074// MIC assembler doesn't accept .L syntax, the L works fine there (as well as
75// on OS X*)
Jonathan Peyton621743b2015-08-20 19:46:14 +000076# if KMP_MIC
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000077# define KMP_LABEL(x) L_##x // local label
78# else
79# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
Jonathan Peyton621743b2015-08-20 19:46:14 +000080# endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +000081.macro ALIGN size
82 .align 1<<(\size)
83.endm
84.macro DEBUG_INFO proc
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000085 .cfi_endproc
Jim Cownie5e8470a2013-09-27 10:38:44 +000086// Not sure why we need .type and .size for the functions
87 .align 16
88 .type \proc,@function
89 .size \proc,.-\proc
90.endm
91.macro PROC proc
92 ALIGN 4
93 .globl KMP_PREFIX_UNDERSCORE(\proc)
94KMP_PREFIX_UNDERSCORE(\proc):
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000095 .cfi_startproc
96.endm
97.macro KMP_CFI_DEF_OFFSET sz
98 .cfi_def_cfa_offset \sz
99.endm
100.macro KMP_CFI_OFFSET reg, sz
101 .cfi_offset \reg,\sz
102.endm
103.macro KMP_CFI_REGISTER reg
104 .cfi_def_cfa_register \reg
105.endm
106.macro KMP_CFI_DEF reg, sz
107 .cfi_def_cfa \reg,\sz
Jim Cownie5e8470a2013-09-27 10:38:44 +0000108.endm
Jonathan Peyton621743b2015-08-20 19:46:14 +0000109# endif // KMP_OS_DARWIN
Jim Cownie181b4bb2013-12-23 17:28:57 +0000110#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111
Andrey Churbanov44fea6b2017-04-17 11:58:20 +0000112#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
Paul Osmialowski7e5e8682016-05-13 08:26:42 +0000113
Andrey Churbanov44fea6b2017-04-17 11:58:20 +0000114# if KMP_OS_DARWIN
115# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
116# define KMP_LABEL(x) L_##x // form the name of label
117
118.macro ALIGN
119 .align $0
120.endmacro
121
122.macro DEBUG_INFO
123/* Not sure what .size does in icc, not sure if we need to do something
124 similar for OS X*.
125*/
126.endmacro
127
128.macro PROC
129 ALIGN 4
130 .globl KMP_PREFIX_UNDERSCORE($0)
131KMP_PREFIX_UNDERSCORE($0):
132.endmacro
133# else // KMP_OS_DARWIN
Paul Osmialowski7e5e8682016-05-13 08:26:42 +0000134# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
135// Format labels so that they don't override function names in gdb's backtraces
136# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
137
138.macro ALIGN size
139 .align 1<<(\size)
140.endm
141
142.macro DEBUG_INFO proc
143 .cfi_endproc
144// Not sure why we need .type and .size for the functions
145 ALIGN 2
146 .type \proc,@function
147 .size \proc,.-\proc
148.endm
149
150.macro PROC proc
151 ALIGN 2
152 .globl KMP_PREFIX_UNDERSCORE(\proc)
153KMP_PREFIX_UNDERSCORE(\proc):
154 .cfi_startproc
155.endm
Andrey Churbanov44fea6b2017-04-17 11:58:20 +0000156# endif // KMP_OS_DARWIN
Paul Osmialowski7e5e8682016-05-13 08:26:42 +0000157
Andrey Churbanov44fea6b2017-04-17 11:58:20 +0000158#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159
160// -----------------------------------------------------------------------
161// data
162// -----------------------------------------------------------------------
163
164#ifdef KMP_GOMP_COMPAT
165
Jim Cownie5e8470a2013-09-27 10:38:44 +0000166// Support for unnamed common blocks.
167//
168// Because the symbol ".gomp_critical_user_" contains a ".", we have to
169// put this stuff in assembly.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170
171# if KMP_ARCH_X86
Jonathan Peyton621743b2015-08-20 19:46:14 +0000172# if KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +0000173 .data
174 .comm .gomp_critical_user_,32
175 .data
176 .globl ___kmp_unnamed_critical_addr
177___kmp_unnamed_critical_addr:
178 .long .gomp_critical_user_
179# else /* Linux* OS */
180 .data
181 .comm .gomp_critical_user_,32,8
182 .data
183 ALIGN 4
184 .global __kmp_unnamed_critical_addr
185__kmp_unnamed_critical_addr:
186 .4byte .gomp_critical_user_
187 .type __kmp_unnamed_critical_addr,@object
188 .size __kmp_unnamed_critical_addr,4
Jonathan Peyton621743b2015-08-20 19:46:14 +0000189# endif /* KMP_OS_DARWIN */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000190# endif /* KMP_ARCH_X86 */
191
192# if KMP_ARCH_X86_64
Jonathan Peyton621743b2015-08-20 19:46:14 +0000193# if KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +0000194 .data
195 .comm .gomp_critical_user_,32
196 .data
197 .globl ___kmp_unnamed_critical_addr
198___kmp_unnamed_critical_addr:
199 .quad .gomp_critical_user_
200# else /* Linux* OS */
201 .data
202 .comm .gomp_critical_user_,32,8
203 .data
204 ALIGN 8
205 .global __kmp_unnamed_critical_addr
206__kmp_unnamed_critical_addr:
207 .8byte .gomp_critical_user_
208 .type __kmp_unnamed_critical_addr,@object
209 .size __kmp_unnamed_critical_addr,8
Jonathan Peyton621743b2015-08-20 19:46:14 +0000210# endif /* KMP_OS_DARWIN */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211# endif /* KMP_ARCH_X86_64 */
212
213#endif /* KMP_GOMP_COMPAT */
214
215
Jim Cownie3051f972014-08-07 10:12:54 +0000216#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000217
218// -----------------------------------------------------------------------
219// microtasking routines specifically written for IA-32 architecture
220// running Linux* OS
221// -----------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000222
223 .ident "Intel Corporation"
224 .data
225 ALIGN 4
226// void
227// __kmp_x86_pause( void );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000228
229 .text
230 PROC __kmp_x86_pause
231
232 pause_op
233 ret
234
235 DEBUG_INFO __kmp_x86_pause
236
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237// void
238// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
Jonathan Peyton30419822017-05-12 18:01:32 +0000239
Jim Cownie5e8470a2013-09-27 10:38:44 +0000240 PROC __kmp_x86_cpuid
241
242 pushl %ebp
243 movl %esp,%ebp
244 pushl %edi
245 pushl %ebx
246 pushl %ecx
247 pushl %edx
248
249 movl 8(%ebp), %eax
250 movl 12(%ebp), %ecx
Jonathan Peyton30419822017-05-12 18:01:32 +0000251 cpuid // Query the CPUID for the current processor
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252
253 movl 16(%ebp), %edi
254 movl %eax, 0(%edi)
255 movl %ebx, 4(%edi)
256 movl %ecx, 8(%edi)
257 movl %edx, 12(%edi)
258
259 popl %edx
260 popl %ecx
261 popl %ebx
262 popl %edi
263 movl %ebp, %esp
264 popl %ebp
265 ret
266
267 DEBUG_INFO __kmp_x86_cpuid
268
269
270# if !KMP_ASM_INTRINS
271
272//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000273// kmp_int32
274// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000275
276 PROC __kmp_test_then_add32
277
278 movl 4(%esp), %ecx
279 movl 8(%esp), %eax
280 lock
281 xaddl %eax,(%ecx)
282 ret
283
284 DEBUG_INFO __kmp_test_then_add32
285
286//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000287// FUNCTION __kmp_xchg_fixed8
288//
289// kmp_int32
290// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
291//
292// parameters:
293// p: 4(%esp)
294// d: 8(%esp)
295//
296// return: %al
Jim Cownie5e8470a2013-09-27 10:38:44 +0000297 PROC __kmp_xchg_fixed8
298
299 movl 4(%esp), %ecx // "p"
300 movb 8(%esp), %al // "d"
301
302 lock
303 xchgb %al,(%ecx)
304 ret
305
306 DEBUG_INFO __kmp_xchg_fixed8
307
308
309//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310// FUNCTION __kmp_xchg_fixed16
311//
312// kmp_int16
313// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
314//
315// parameters:
316// p: 4(%esp)
317// d: 8(%esp)
318// return: %ax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000319 PROC __kmp_xchg_fixed16
320
321 movl 4(%esp), %ecx // "p"
322 movw 8(%esp), %ax // "d"
323
324 lock
325 xchgw %ax,(%ecx)
326 ret
327
328 DEBUG_INFO __kmp_xchg_fixed16
329
330
331//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000332// FUNCTION __kmp_xchg_fixed32
333//
334// kmp_int32
335// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
336//
337// parameters:
338// p: 4(%esp)
339// d: 8(%esp)
340//
341// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000342 PROC __kmp_xchg_fixed32
343
344 movl 4(%esp), %ecx // "p"
345 movl 8(%esp), %eax // "d"
346
347 lock
348 xchgl %eax,(%ecx)
349 ret
350
351 DEBUG_INFO __kmp_xchg_fixed32
352
353
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354// kmp_int8
355// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000356 PROC __kmp_compare_and_store8
357
358 movl 4(%esp), %ecx
359 movb 8(%esp), %al
360 movb 12(%esp), %dl
361 lock
362 cmpxchgb %dl,(%ecx)
363 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
364 and $1, %eax // sign extend previous instruction
365 ret
366
367 DEBUG_INFO __kmp_compare_and_store8
368
Jim Cownie5e8470a2013-09-27 10:38:44 +0000369// kmp_int16
Jonathan Peyton30419822017-05-12 18:01:32 +0000370// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000371 PROC __kmp_compare_and_store16
372
373 movl 4(%esp), %ecx
374 movw 8(%esp), %ax
375 movw 12(%esp), %dx
376 lock
377 cmpxchgw %dx,(%ecx)
378 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
379 and $1, %eax // sign extend previous instruction
380 ret
381
382 DEBUG_INFO __kmp_compare_and_store16
383
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384// kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +0000385// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000386 PROC __kmp_compare_and_store32
387
388 movl 4(%esp), %ecx
389 movl 8(%esp), %eax
390 movl 12(%esp), %edx
391 lock
392 cmpxchgl %edx,(%ecx)
Jonathan Peyton30419822017-05-12 18:01:32 +0000393 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
394 and $1, %eax // sign extend previous instruction
Jim Cownie5e8470a2013-09-27 10:38:44 +0000395 ret
396
397 DEBUG_INFO __kmp_compare_and_store32
398
Jim Cownie5e8470a2013-09-27 10:38:44 +0000399// kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +0000400// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000401 PROC __kmp_compare_and_store64
402
403 pushl %ebp
404 movl %esp, %ebp
405 pushl %ebx
406 pushl %edi
407 movl 8(%ebp), %edi
408 movl 12(%ebp), %eax // "cv" low order word
409 movl 16(%ebp), %edx // "cv" high order word
410 movl 20(%ebp), %ebx // "sv" low order word
411 movl 24(%ebp), %ecx // "sv" high order word
412 lock
413 cmpxchg8b (%edi)
Jonathan Peyton30419822017-05-12 18:01:32 +0000414 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
415 and $1, %eax // sign extend previous instruction
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416 popl %edi
417 popl %ebx
418 movl %ebp, %esp
419 popl %ebp
420 ret
421
422 DEBUG_INFO __kmp_compare_and_store64
423
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424// kmp_int8
Jonathan Peyton30419822017-05-12 18:01:32 +0000425// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000426 PROC __kmp_compare_and_store_ret8
427
428 movl 4(%esp), %ecx
429 movb 8(%esp), %al
430 movb 12(%esp), %dl
431 lock
432 cmpxchgb %dl,(%ecx)
433 ret
434
435 DEBUG_INFO __kmp_compare_and_store_ret8
436
Jim Cownie5e8470a2013-09-27 10:38:44 +0000437// kmp_int16
Jonathan Peyton30419822017-05-12 18:01:32 +0000438// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
439// kmp_int16 sv);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000440 PROC __kmp_compare_and_store_ret16
441
442 movl 4(%esp), %ecx
443 movw 8(%esp), %ax
444 movw 12(%esp), %dx
445 lock
446 cmpxchgw %dx,(%ecx)
447 ret
448
449 DEBUG_INFO __kmp_compare_and_store_ret16
450
Jim Cownie5e8470a2013-09-27 10:38:44 +0000451// kmp_int32
Jonathan Peyton30419822017-05-12 18:01:32 +0000452// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
453// kmp_int32 sv);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000454 PROC __kmp_compare_and_store_ret32
455
456 movl 4(%esp), %ecx
457 movl 8(%esp), %eax
458 movl 12(%esp), %edx
459 lock
460 cmpxchgl %edx,(%ecx)
461 ret
462
463 DEBUG_INFO __kmp_compare_and_store_ret32
464
Jim Cownie5e8470a2013-09-27 10:38:44 +0000465// kmp_int64
Jonathan Peyton30419822017-05-12 18:01:32 +0000466// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
467// kmp_int64 sv);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000468 PROC __kmp_compare_and_store_ret64
469
470 pushl %ebp
471 movl %esp, %ebp
472 pushl %ebx
473 pushl %edi
474 movl 8(%ebp), %edi
475 movl 12(%ebp), %eax // "cv" low order word
476 movl 16(%ebp), %edx // "cv" high order word
477 movl 20(%ebp), %ebx // "sv" low order word
478 movl 24(%ebp), %ecx // "sv" high order word
479 lock
480 cmpxchg8b (%edi)
481 popl %edi
482 popl %ebx
483 movl %ebp, %esp
484 popl %ebp
485 ret
486
487 DEBUG_INFO __kmp_compare_and_store_ret64
488
489
490//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491// FUNCTION __kmp_xchg_real32
492//
493// kmp_real32
494// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
495//
496// parameters:
497// addr: 4(%esp)
498// data: 8(%esp)
499//
500// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000501 PROC __kmp_xchg_real32
502
503 pushl %ebp
504 movl %esp, %ebp
505 subl $4, %esp
506 pushl %esi
507
508 movl 4(%ebp), %esi
509 flds (%esi)
510 // load <addr>
511 fsts -4(%ebp)
512 // store old value
513
514 movl 8(%ebp), %eax
515
516 lock
517 xchgl %eax, (%esi)
518
519 flds -4(%ebp)
520 // return old value
521
522 popl %esi
523 movl %ebp, %esp
524 popl %ebp
525 ret
526
527 DEBUG_INFO __kmp_xchg_real32
528
529# endif /* !KMP_ASM_INTRINS */
530
531
532//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533// FUNCTION __kmp_load_x87_fpu_control_word
534//
535// void
536// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
537//
538// parameters:
539// p: 4(%esp)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540 PROC __kmp_load_x87_fpu_control_word
541
542 movl 4(%esp), %eax
543 fldcw (%eax)
544 ret
545
546 DEBUG_INFO __kmp_load_x87_fpu_control_word
547
548
549//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000550// FUNCTION __kmp_store_x87_fpu_control_word
551//
552// void
553// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
554//
555// parameters:
556// p: 4(%esp)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000557 PROC __kmp_store_x87_fpu_control_word
558
559 movl 4(%esp), %eax
560 fstcw (%eax)
561 ret
562
563 DEBUG_INFO __kmp_store_x87_fpu_control_word
564
565
566//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000567// FUNCTION __kmp_clear_x87_fpu_status_word
568//
569// void
570// __kmp_clear_x87_fpu_status_word();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571 PROC __kmp_clear_x87_fpu_status_word
572
573 fnclex
574 ret
575
576 DEBUG_INFO __kmp_clear_x87_fpu_status_word
577
578
579//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000580// typedef void (*microtask_t)( int *gtid, int *tid, ... );
581//
582// int
583// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid,
584// int argc, void *p_argv[] ) {
585// (*pkfn)( & gtid, & gtid, argv[0], ... );
586// return 1;
587// }
588
589// -- Begin __kmp_invoke_microtask
590// mark_begin;
591 PROC __kmp_invoke_microtask
592
593 pushl %ebp
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000594 KMP_CFI_DEF_OFFSET 8
595 KMP_CFI_OFFSET ebp,-8
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596 movl %esp,%ebp // establish the base pointer for this routine.
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000597 KMP_CFI_REGISTER ebp
Jim Cownie5e8470a2013-09-27 10:38:44 +0000598 subl $8,%esp // allocate space for two local variables.
599 // These varibales are:
600 // argv: -4(%ebp)
601 // temp: -8(%ebp)
602 //
603 pushl %ebx // save %ebx to use during this routine
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000604 //
605#if OMPT_SUPPORT
606 movl 28(%ebp),%ebx // get exit_frame address
607 movl %ebp,(%ebx) // save exit_frame
608#endif
609
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610 movl 20(%ebp),%ebx // Stack alignment - # args
611 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
612 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
613 movl %esp,%eax //
614 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
615 movl %eax,%ebx // Save to %ebx
616 andl $0xFFFFFF80,%eax // mask off 7 bits
617 subl %eax,%ebx // Amount to subtract from %esp
618 subl %ebx,%esp // Prepare the stack ptr --
619 // now it will be aligned on 128-byte boundary at the call
620
621 movl 24(%ebp),%eax // copy from p_argv[]
622 movl %eax,-4(%ebp) // into the local variable *argv.
623
624 movl 20(%ebp),%ebx // argc is 20(%ebp)
625 shll $2,%ebx
626
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000627KMP_LABEL(invoke_2):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000628 cmpl $0,%ebx
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000629 jg KMP_LABEL(invoke_4)
630 jmp KMP_LABEL(invoke_3)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000631 ALIGN 2
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000632KMP_LABEL(invoke_4):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633 movl -4(%ebp),%eax
634 subl $4,%ebx // decrement argc.
635 addl %ebx,%eax // index into argv.
636 movl (%eax),%edx
637 pushl %edx
638
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000639 jmp KMP_LABEL(invoke_2)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640 ALIGN 2
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000641KMP_LABEL(invoke_3):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000642 leal 16(%ebp),%eax // push & tid
643 pushl %eax
644
645 leal 12(%ebp),%eax // push & gtid
646 pushl %eax
647
648 movl 8(%ebp),%ebx
649 call *%ebx // call (*pkfn)();
650
651 movl $1,%eax // return 1;
652
653 movl -12(%ebp),%ebx // restore %ebx
654 leave
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000655 KMP_CFI_DEF esp,4
Jim Cownie5e8470a2013-09-27 10:38:44 +0000656 ret
657
658 DEBUG_INFO __kmp_invoke_microtask
659// -- End __kmp_invoke_microtask
660
661
662// kmp_uint64
663// __kmp_hardware_timestamp(void)
664 PROC __kmp_hardware_timestamp
665 rdtsc
666 ret
667
668 DEBUG_INFO __kmp_hardware_timestamp
669// -- End __kmp_hardware_timestamp
670
Jim Cownie5e8470a2013-09-27 10:38:44 +0000671#endif /* KMP_ARCH_X86 */
672
673
674#if KMP_ARCH_X86_64
675
676// -----------------------------------------------------------------------
677// microtasking routines specifically written for IA-32 architecture and
678// Intel(R) 64 running Linux* OS
679// -----------------------------------------------------------------------
680
681// -- Machine type P
682// mark_description "Intel Corporation";
683 .ident "Intel Corporation"
Dimitry Andricb9fb1222017-07-11 18:04:56 +0000684// -- .file "z_Linux_asm.S"
Jim Cownie5e8470a2013-09-27 10:38:44 +0000685 .data
686 ALIGN 4
687
Jonathan Peyton30419822017-05-12 18:01:32 +0000688// To prevent getting our code into .data section .text added to every routine
689// definition for x86_64.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000690//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691// FUNCTION __kmp_x86_cpuid
692//
693// void
694// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
695//
696// parameters:
697// mode: %edi
698// mode2: %esi
699// cpuid_buffer: %rdx
Jim Cownie5e8470a2013-09-27 10:38:44 +0000700 .text
701 PROC __kmp_x86_cpuid
702
703 pushq %rbp
704 movq %rsp,%rbp
705 pushq %rbx // callee-save register
706
707 movl %esi, %ecx // "mode2"
708 movl %edi, %eax // "mode"
709 movq %rdx, %rsi // cpuid_buffer
710 cpuid // Query the CPUID for the current processor
711
712 movl %eax, 0(%rsi) // store results into buffer
713 movl %ebx, 4(%rsi)
714 movl %ecx, 8(%rsi)
715 movl %edx, 12(%rsi)
716
717 popq %rbx // callee-save register
718 movq %rbp, %rsp
719 popq %rbp
720 ret
721
722 DEBUG_INFO __kmp_x86_cpuid
723
724
725
726# if !KMP_ASM_INTRINS
727
728//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000729// FUNCTION __kmp_test_then_add32
730//
731// kmp_int32
732// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
733//
734// parameters:
735// p: %rdi
736// d: %esi
737//
738// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000739 .text
740 PROC __kmp_test_then_add32
741
742 movl %esi, %eax // "d"
743 lock
744 xaddl %eax,(%rdi)
745 ret
746
747 DEBUG_INFO __kmp_test_then_add32
748
749
750//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751// FUNCTION __kmp_test_then_add64
752//
753// kmp_int64
754// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
755//
756// parameters:
757// p: %rdi
758// d: %rsi
759// return: %rax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000760 .text
761 PROC __kmp_test_then_add64
762
763 movq %rsi, %rax // "d"
764 lock
765 xaddq %rax,(%rdi)
766 ret
767
768 DEBUG_INFO __kmp_test_then_add64
769
770
771//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000772// FUNCTION __kmp_xchg_fixed8
773//
774// kmp_int32
775// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
776//
777// parameters:
778// p: %rdi
779// d: %sil
780//
781// return: %al
Jim Cownie5e8470a2013-09-27 10:38:44 +0000782 .text
783 PROC __kmp_xchg_fixed8
784
785 movb %sil, %al // "d"
786
787 lock
788 xchgb %al,(%rdi)
789 ret
790
791 DEBUG_INFO __kmp_xchg_fixed8
792
793
794//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795// FUNCTION __kmp_xchg_fixed16
796//
797// kmp_int16
798// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
799//
800// parameters:
801// p: %rdi
802// d: %si
803// return: %ax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000804 .text
805 PROC __kmp_xchg_fixed16
806
807 movw %si, %ax // "d"
808
809 lock
810 xchgw %ax,(%rdi)
811 ret
812
813 DEBUG_INFO __kmp_xchg_fixed16
814
815
816//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000817// FUNCTION __kmp_xchg_fixed32
818//
819// kmp_int32
820// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
821//
822// parameters:
823// p: %rdi
824// d: %esi
825//
826// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000827 .text
828 PROC __kmp_xchg_fixed32
829
830 movl %esi, %eax // "d"
831
832 lock
833 xchgl %eax,(%rdi)
834 ret
835
836 DEBUG_INFO __kmp_xchg_fixed32
837
838
839//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840// FUNCTION __kmp_xchg_fixed64
841//
842// kmp_int64
843// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
844//
845// parameters:
846// p: %rdi
847// d: %rsi
848// return: %rax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000849 .text
850 PROC __kmp_xchg_fixed64
851
852 movq %rsi, %rax // "d"
853
854 lock
855 xchgq %rax,(%rdi)
856 ret
857
858 DEBUG_INFO __kmp_xchg_fixed64
859
860
861//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000862// FUNCTION __kmp_compare_and_store8
863//
864// kmp_int8
865// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
866//
867// parameters:
868// p: %rdi
869// cv: %esi
870// sv: %edx
871//
872// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000873 .text
874 PROC __kmp_compare_and_store8
875
876 movb %sil, %al // "cv"
877 lock
878 cmpxchgb %dl,(%rdi)
879 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
880 andq $1, %rax // sign extend previous instruction for return value
881 ret
882
883 DEBUG_INFO __kmp_compare_and_store8
884
885
886//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000887// FUNCTION __kmp_compare_and_store16
888//
889// kmp_int16
890// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
891//
892// parameters:
893// p: %rdi
894// cv: %si
895// sv: %dx
896//
897// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000898 .text
899 PROC __kmp_compare_and_store16
900
901 movw %si, %ax // "cv"
902 lock
903 cmpxchgw %dx,(%rdi)
904 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
905 andq $1, %rax // sign extend previous instruction for return value
906 ret
907
908 DEBUG_INFO __kmp_compare_and_store16
909
910
911//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000912// FUNCTION __kmp_compare_and_store32
913//
914// kmp_int32
915// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
916//
917// parameters:
918// p: %rdi
919// cv: %esi
920// sv: %edx
921//
922// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000923 .text
924 PROC __kmp_compare_and_store32
925
926 movl %esi, %eax // "cv"
927 lock
928 cmpxchgl %edx,(%rdi)
929 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
930 andq $1, %rax // sign extend previous instruction for return value
931 ret
932
933 DEBUG_INFO __kmp_compare_and_store32
934
935
936//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000937// FUNCTION __kmp_compare_and_store64
938//
939// kmp_int32
940// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
941//
942// parameters:
943// p: %rdi
944// cv: %rsi
945// sv: %rdx
946// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000947 .text
948 PROC __kmp_compare_and_store64
949
950 movq %rsi, %rax // "cv"
951 lock
952 cmpxchgq %rdx,(%rdi)
953 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
954 andq $1, %rax // sign extend previous instruction for return value
955 ret
956
957 DEBUG_INFO __kmp_compare_and_store64
958
959//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000960// FUNCTION __kmp_compare_and_store_ret8
961//
962// kmp_int8
963// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
964//
965// parameters:
966// p: %rdi
967// cv: %esi
968// sv: %edx
969//
970// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000971 .text
972 PROC __kmp_compare_and_store_ret8
973
974 movb %sil, %al // "cv"
975 lock
976 cmpxchgb %dl,(%rdi)
977 ret
978
979 DEBUG_INFO __kmp_compare_and_store_ret8
980
981
982//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000983// FUNCTION __kmp_compare_and_store_ret16
984//
985// kmp_int16
986// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
987//
988// parameters:
989// p: %rdi
990// cv: %si
991// sv: %dx
992//
993// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000994 .text
995 PROC __kmp_compare_and_store_ret16
996
997 movw %si, %ax // "cv"
998 lock
999 cmpxchgw %dx,(%rdi)
1000 ret
1001
1002 DEBUG_INFO __kmp_compare_and_store_ret16
1003
1004
1005//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001006// FUNCTION __kmp_compare_and_store_ret32
1007//
1008// kmp_int32
1009// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
1010//
1011// parameters:
1012// p: %rdi
1013// cv: %esi
1014// sv: %edx
1015//
1016// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017 .text
1018 PROC __kmp_compare_and_store_ret32
1019
1020 movl %esi, %eax // "cv"
1021 lock
1022 cmpxchgl %edx,(%rdi)
1023 ret
1024
1025 DEBUG_INFO __kmp_compare_and_store_ret32
1026
1027
1028//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001029// FUNCTION __kmp_compare_and_store_ret64
1030//
1031// kmp_int64
1032// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1033//
1034// parameters:
1035// p: %rdi
1036// cv: %rsi
1037// sv: %rdx
1038// return: %eax
Jim Cownie5e8470a2013-09-27 10:38:44 +00001039 .text
1040 PROC __kmp_compare_and_store_ret64
1041
1042 movq %rsi, %rax // "cv"
1043 lock
1044 cmpxchgq %rdx,(%rdi)
1045 ret
1046
1047 DEBUG_INFO __kmp_compare_and_store_ret64
1048
1049# endif /* !KMP_ASM_INTRINS */
1050
1051
Jonathan Peyton621743b2015-08-20 19:46:14 +00001052# if !KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001053
Jim Cownie5e8470a2013-09-27 10:38:44 +00001054# if !KMP_ASM_INTRINS
1055
1056//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001057// FUNCTION __kmp_xchg_real32
1058//
1059// kmp_real32
1060// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
1061//
1062// parameters:
1063// addr: %rdi
1064// data: %xmm0 (lower 4 bytes)
1065//
1066// return: %xmm0 (lower 4 bytes)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001067 .text
1068 PROC __kmp_xchg_real32
1069
1070 movd %xmm0, %eax // load "data" to eax
1071
1072 lock
1073 xchgl %eax, (%rdi)
1074
1075 movd %eax, %xmm0 // load old value into return register
1076
1077 ret
1078
1079 DEBUG_INFO __kmp_xchg_real32
1080
1081
1082//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001083// FUNCTION __kmp_xchg_real64
1084//
1085// kmp_real64
1086// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
1087//
1088// parameters:
1089// addr: %rdi
1090// data: %xmm0 (lower 8 bytes)
1091// return: %xmm0 (lower 8 bytes)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092 .text
1093 PROC __kmp_xchg_real64
1094
1095 movd %xmm0, %rax // load "data" to rax
1096
1097 lock
1098 xchgq %rax, (%rdi)
1099
1100 movd %rax, %xmm0 // load old value into return register
1101 ret
1102
1103 DEBUG_INFO __kmp_xchg_real64
1104
1105
Jonathan Peyton621743b2015-08-20 19:46:14 +00001106# endif /* !KMP_MIC */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001107
1108# endif /* !KMP_ASM_INTRINS */
1109
1110
1111//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001112// FUNCTION __kmp_load_x87_fpu_control_word
1113//
1114// void
1115// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1116//
1117// parameters:
1118// p: %rdi
Jim Cownie5e8470a2013-09-27 10:38:44 +00001119 .text
1120 PROC __kmp_load_x87_fpu_control_word
1121
1122 fldcw (%rdi)
1123 ret
1124
1125 DEBUG_INFO __kmp_load_x87_fpu_control_word
1126
1127
1128//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001129// FUNCTION __kmp_store_x87_fpu_control_word
1130//
1131// void
1132// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1133//
1134// parameters:
1135// p: %rdi
Jim Cownie5e8470a2013-09-27 10:38:44 +00001136 .text
1137 PROC __kmp_store_x87_fpu_control_word
1138
1139 fstcw (%rdi)
1140 ret
1141
1142 DEBUG_INFO __kmp_store_x87_fpu_control_word
1143
1144
1145//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001146// FUNCTION __kmp_clear_x87_fpu_status_word
1147//
1148// void
1149// __kmp_clear_x87_fpu_status_word();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001150 .text
1151 PROC __kmp_clear_x87_fpu_status_word
1152
Jonathan Peyton621743b2015-08-20 19:46:14 +00001153#if KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001154// TODO: remove the workaround for problem with fnclex instruction (no CQ known)
1155 fstenv -32(%rsp) // store FP env
1156 andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW
1157 fldenv -32(%rsp) // load FP env back
1158 ret
1159#else
1160 fnclex
1161 ret
1162#endif
1163
1164 DEBUG_INFO __kmp_clear_x87_fpu_status_word
1165
1166
1167//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001168// typedef void (*microtask_t)( int *gtid, int *tid, ... );
1169//
1170// int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001171// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
Jim Cownie5e8470a2013-09-27 10:38:44 +00001172// int gtid, int tid,
1173// int argc, void *p_argv[] ) {
1174// (*pkfn)( & gtid, & tid, argv[0], ... );
1175// return 1;
1176// }
1177//
Jonathan Peyton30419822017-05-12 18:01:32 +00001178// note: at call to pkfn must have %rsp 128-byte aligned for compiler
Jim Cownie5e8470a2013-09-27 10:38:44 +00001179//
1180// parameters:
1181// %rdi: pkfn
1182// %esi: gtid
1183// %edx: tid
1184// %ecx: argc
1185// %r8: p_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001186// %r9: &exit_frame
Jim Cownie5e8470a2013-09-27 10:38:44 +00001187//
1188// locals:
1189// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1190// __tid: tid parm pushed on stack so can pass &tid to pkfn
1191//
1192// reg temps:
1193// %rax: used all over the place
1194// %rdx: used in stack pointer alignment calculation
1195// %r11: used to traverse p_argv array
1196// %rsi: used as temporary for stack parameters
1197// used as temporary for number of pkfn parms to push
1198// %rbx: used to hold pkfn address, and zero constant, callee-save
1199//
1200// return: %eax (always 1/TRUE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001201__gtid = -16
1202__tid = -24
1203
1204// -- Begin __kmp_invoke_microtask
1205// mark_begin;
1206 .text
1207 PROC __kmp_invoke_microtask
1208
1209 pushq %rbp // save base pointer
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001210 KMP_CFI_DEF_OFFSET 16
1211 KMP_CFI_OFFSET rbp,-16
Jim Cownie5e8470a2013-09-27 10:38:44 +00001212 movq %rsp,%rbp // establish the base pointer for this routine.
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001213 KMP_CFI_REGISTER rbp
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001214
1215#if OMPT_SUPPORT
1216 movq %rbp, (%r9) // save exit_frame
1217#endif
1218
Jim Cownie5e8470a2013-09-27 10:38:44 +00001219 pushq %rbx // %rbx is callee-saved register
Jim Cownie5e8470a2013-09-27 10:38:44 +00001220 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
1221 pushq %rdx // Put tid on stack so can pass &tid to pkfn
1222
1223 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
1224 movq $0, %rbx // constant for cmovs later
1225 subq $4, %rax // subtract four args passed in registers to pkfn
Jonathan Peyton621743b2015-08-20 19:46:14 +00001226#if KMP_MIC
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001227 js KMP_LABEL(kmp_0) // jump to movq
1228 jmp KMP_LABEL(kmp_0_exit) // jump ahead
1229KMP_LABEL(kmp_0):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001230 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001231KMP_LABEL(kmp_0_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001232#else
1233 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
Jonathan Peyton621743b2015-08-20 19:46:14 +00001234#endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001235
1236 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
1237 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
1238
1239 movq %rsp, %rdx //
1240 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
1241 // without align, stack ptr would be this
1242 movq %rdx, %rax // Save to %rax
1243
1244 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
1245 subq %rax, %rdx // Amount to subtract from %rsp
1246 subq %rdx, %rsp // Prepare the stack ptr --
1247 // now %rsp will align to 128-byte boundary at call site
1248
1249 // setup pkfn parameter reg and stack
1250 movq %rcx, %rax // argc -> %rax
1251 cmpq $0, %rsi
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001252 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
Jim Cownie5e8470a2013-09-27 10:38:44 +00001253 shlq $3, %rcx // argc*8 -> %rcx
1254 movq %r8, %rdx // p_argv -> %rdx
1255 addq %rcx, %rdx // &p_argv[argc] -> %rdx
1256
1257 movq %rsi, %rcx // max (0, argc-4) -> %rcx
1258
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001259KMP_LABEL(kmp_invoke_push_parms):
1260 // push nth - 7th parms to pkfn on stack
Jim Cownie5e8470a2013-09-27 10:38:44 +00001261 subq $8, %rdx // decrement p_argv pointer to previous parm
1262 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
1263 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
1264 subl $1, %ecx
1265
1266// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
1267// if the name of the label that is an operand of this jecxz starts with a dot (".");
1268// Apple's linker does not support 1-byte length relocation;
1269// Resolution: replace all .labelX entries with L_labelX.
1270
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001271 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
1272 jmp KMP_LABEL(kmp_invoke_push_parms)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001273 ALIGN 3
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001274KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001275 // order here is important to avoid trashing
1276 // registers used for both input and output parms!
1277 movq %rdi, %rbx // pkfn -> %rbx
1278 leaq __gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
1279 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
1280
1281 movq %r8, %r11 // p_argv -> %r11
1282
Jonathan Peyton621743b2015-08-20 19:46:14 +00001283#if KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001284 cmpq $4, %rax // argc >= 4?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001285 jns KMP_LABEL(kmp_4) // jump to movq
1286 jmp KMP_LABEL(kmp_4_exit) // jump ahead
1287KMP_LABEL(kmp_4):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001288 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001289KMP_LABEL(kmp_4_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001290
1291 cmpq $3, %rax // argc >= 3?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001292 jns KMP_LABEL(kmp_3) // jump to movq
1293 jmp KMP_LABEL(kmp_3_exit) // jump ahead
1294KMP_LABEL(kmp_3):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001295 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001296KMP_LABEL(kmp_3_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001297
1298 cmpq $2, %rax // argc >= 2?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001299 jns KMP_LABEL(kmp_2) // jump to movq
1300 jmp KMP_LABEL(kmp_2_exit) // jump ahead
1301KMP_LABEL(kmp_2):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001302 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001303KMP_LABEL(kmp_2_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001304
1305 cmpq $1, %rax // argc >= 1?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001306 jns KMP_LABEL(kmp_1) // jump to movq
1307 jmp KMP_LABEL(kmp_1_exit) // jump ahead
1308KMP_LABEL(kmp_1):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001309 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001310KMP_LABEL(kmp_1_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001311#else
1312 cmpq $4, %rax // argc >= 4?
1313 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1314
1315 cmpq $3, %rax // argc >= 3?
1316 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1317
1318 cmpq $2, %rax // argc >= 2?
1319 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1320
1321 cmpq $1, %rax // argc >= 1?
1322 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
Jonathan Peyton621743b2015-08-20 19:46:14 +00001323#endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001324
1325 call *%rbx // call (*pkfn)();
1326 movq $1, %rax // move 1 into return register;
1327
1328 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
1329 movq %rbp, %rsp // restore stack pointer
1330 popq %rbp // restore frame pointer
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001331 KMP_CFI_DEF rsp,8
Jim Cownie5e8470a2013-09-27 10:38:44 +00001332 ret
1333
1334 DEBUG_INFO __kmp_invoke_microtask
1335// -- End __kmp_invoke_microtask
1336
1337// kmp_uint64
1338// __kmp_hardware_timestamp(void)
1339 .text
1340 PROC __kmp_hardware_timestamp
1341 rdtsc
1342 shlq $32, %rdx
1343 orq %rdx, %rax
1344 ret
1345
1346 DEBUG_INFO __kmp_hardware_timestamp
1347// -- End __kmp_hardware_timestamp
1348
1349//------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001350// FUNCTION __kmp_bsr32
1351//
1352// int
1353// __kmp_bsr32( int );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001354 .text
1355 PROC __kmp_bsr32
1356
1357 bsr %edi,%eax
1358 ret
1359
1360 DEBUG_INFO __kmp_bsr32
1361
Jonathan Peyton61118492016-05-20 19:03:38 +00001362
Jim Cownie5e8470a2013-09-27 10:38:44 +00001363// -----------------------------------------------------------------------
1364#endif /* KMP_ARCH_X86_64 */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001365
Paul Osmialowski7e5e8682016-05-13 08:26:42 +00001366// '
Andrey Churbanov44fea6b2017-04-17 11:58:20 +00001367#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
Paul Osmialowski7e5e8682016-05-13 08:26:42 +00001368
1369//------------------------------------------------------------------------
1370//
1371// typedef void (*microtask_t)( int *gtid, int *tid, ... );
1372//
1373// int
1374// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1375// int gtid, int tid,
1376// int argc, void *p_argv[] ) {
1377// (*pkfn)( & gtid, & tid, argv[0], ... );
1378// return 1;
1379// }
1380//
1381// parameters:
1382// x0: pkfn
1383// w1: gtid
1384// w2: tid
1385// w3: argc
1386// x4: p_argv
1387// x5: &exit_frame
1388//
1389// locals:
1390// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1391// __tid: tid parm pushed on stack so can pass &tid to pkfn
1392//
1393// reg temps:
1394// x8: used to hold pkfn address
1395// w9: used as temporary for number of pkfn parms
1396// x10: used to traverse p_argv array
1397// x11: used as temporary for stack placement calculation
1398// x12: used as temporary for stack parameters
1399// x19: used to preserve exit_frame_ptr, callee-save
1400//
1401// return: w0 (always 1/TRUE)
1402//
1403
1404__gtid = 4
1405__tid = 8
1406
1407// -- Begin __kmp_invoke_microtask
1408// mark_begin;
1409 .text
1410 PROC __kmp_invoke_microtask
1411
1412 stp x29, x30, [sp, #-16]!
1413# if OMPT_SUPPORT
1414 stp x19, x20, [sp, #-16]!
1415# endif
1416 mov x29, sp
1417
1418 orr w9, wzr, #1
1419 add w9, w9, w3, lsr #1
1420 sub sp, sp, w9, lsl #4
1421 mov x11, sp
1422
1423 mov x8, x0
1424 str w1, [x29, #-__gtid]
1425 str w2, [x29, #-__tid]
1426 mov w9, w3
1427 mov x10, x4
1428# if OMPT_SUPPORT
1429 mov x19, x5
1430 str x29, [x19]
1431# endif
1432
1433 sub x0, x29, #__gtid
1434 sub x1, x29, #__tid
1435
1436 cbz w9, KMP_LABEL(kmp_1)
1437 ldr x2, [x10]
1438
1439 sub w9, w9, #1
1440 cbz w9, KMP_LABEL(kmp_1)
1441 ldr x3, [x10, #8]!
1442
1443 sub w9, w9, #1
1444 cbz w9, KMP_LABEL(kmp_1)
1445 ldr x4, [x10, #8]!
1446
1447 sub w9, w9, #1
1448 cbz w9, KMP_LABEL(kmp_1)
1449 ldr x5, [x10, #8]!
1450
1451 sub w9, w9, #1
1452 cbz w9, KMP_LABEL(kmp_1)
1453 ldr x6, [x10, #8]!
1454
1455 sub w9, w9, #1
1456 cbz w9, KMP_LABEL(kmp_1)
1457 ldr x7, [x10, #8]!
1458
1459KMP_LABEL(kmp_0):
1460 sub w9, w9, #1
1461 cbz w9, KMP_LABEL(kmp_1)
1462 ldr x12, [x10, #8]!
1463 str x12, [x11], #8
1464 b KMP_LABEL(kmp_0)
1465KMP_LABEL(kmp_1):
1466 blr x8
1467 orr w0, wzr, #1
1468 mov sp, x29
1469# if OMPT_SUPPORT
1470 str xzr, [x19]
1471 ldp x19, x20, [sp], #16
1472# endif
1473 ldp x29, x30, [sp], #16
1474 ret
1475
1476 DEBUG_INFO __kmp_invoke_microtask
1477// -- End __kmp_invoke_microtask
1478
Andrey Churbanov44fea6b2017-04-17 11:58:20 +00001479#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */
Paul Osmialowski7e5e8682016-05-13 08:26:42 +00001480
Hal Finkel91e19a32016-05-26 04:48:14 +00001481#if KMP_ARCH_PPC64
1482
1483//------------------------------------------------------------------------
1484//
1485// typedef void (*microtask_t)( int *gtid, int *tid, ... );
1486//
1487// int
1488// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1489// int gtid, int tid,
1490// int argc, void *p_argv[] ) {
1491// (*pkfn)( & gtid, & tid, argv[0], ... );
1492// return 1;
1493// }
1494//
1495// parameters:
1496// r3: pkfn
1497// r4: gtid
1498// r5: tid
1499// r6: argc
1500// r7: p_argv
1501// r8: &exit_frame
1502//
1503// return: r3 (always 1/TRUE)
1504//
1505 .text
1506# if KMP_ARCH_PPC64_LE
1507 .abiversion 2
1508# endif
1509 .globl __kmp_invoke_microtask
1510
1511# if KMP_ARCH_PPC64_LE
1512 .p2align 4
1513# else
1514 .p2align 2
1515# endif
1516
1517 .type __kmp_invoke_microtask,@function
1518
1519# if KMP_ARCH_PPC64_LE
1520__kmp_invoke_microtask:
1521.Lfunc_begin0:
1522.Lfunc_gep0:
1523 addis 2, 12, .TOC.-.Lfunc_gep0@ha
1524 addi 2, 2, .TOC.-.Lfunc_gep0@l
1525.Lfunc_lep0:
1526 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
1527# else
1528 .section .opd,"aw",@progbits
1529__kmp_invoke_microtask:
1530 .p2align 3
1531 .quad .Lfunc_begin0
1532 .quad .TOC.@tocbase
1533 .quad 0
1534 .text
1535.Lfunc_begin0:
1536# endif
1537
1538// -- Begin __kmp_invoke_microtask
1539// mark_begin;
1540
1541// We need to allocate a stack frame large enough to hold all of the parameters
1542// on the stack for the microtask plus what this function needs. That's 48
1543// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
1544// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
1545// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
1546// to save r30 to hold a copy of r8.
1547
1548 .cfi_startproc
1549 mflr 0
1550 std 31, -8(1)
1551 std 0, 16(1)
1552
1553// This is unusual because normally we'd set r31 equal to r1 after the stack
1554// frame is established. In this case, however, we need to dynamically compute
1555// the stack frame size, and so we keep a direct copy of r1 to access our
1556// register save areas and restore the r1 value before returning.
1557 mr 31, 1
1558 .cfi_def_cfa_register r31
1559 .cfi_offset r31, -8
1560 .cfi_offset lr, 16
1561
1562// Compute the size necessary for the local stack frame.
1563# if KMP_ARCH_PPC64_LE
1564 li 12, 72
1565# else
1566 li 12, 88
1567# endif
1568 sldi 0, 6, 3
1569 add 12, 0, 12
1570 neg 12, 12
1571
1572// We need to make sure that the stack frame stays aligned (to 16 bytes, except
1573// under the BG/Q CNK, where it must be to 32 bytes).
1574# if KMP_OS_CNK
1575 li 0, -32
1576# else
1577 li 0, -16
1578# endif
1579 and 12, 0, 12
1580
1581// Establish the local stack frame.
1582 stdux 1, 1, 12
1583
1584# if OMPT_SUPPORT
1585 .cfi_offset r30, -16
1586 std 30, -16(31)
Hal Finkel49bee002016-05-27 19:04:05 +00001587 std 1, 0(8)
Hal Finkel91e19a32016-05-26 04:48:14 +00001588 mr 30, 8
1589# endif
1590
1591// Store gtid and tid to the stack because they're passed by reference to the microtask.
1592 stw 4, -20(31)
1593 stw 5, -24(31)
1594
1595 mr 12, 6
1596 mr 4, 7
1597
1598 cmpwi 0, 12, 1
1599 blt 0, .Lcall
1600
1601 ld 5, 0(4)
1602
1603 cmpwi 0, 12, 2
1604 blt 0, .Lcall
1605
1606 ld 6, 8(4)
1607
1608 cmpwi 0, 12, 3
1609 blt 0, .Lcall
1610
1611 ld 7, 16(4)
1612
1613 cmpwi 0, 12, 4
1614 blt 0, .Lcall
1615
1616 ld 8, 24(4)
1617
1618 cmpwi 0, 12, 5
1619 blt 0, .Lcall
1620
1621 ld 9, 32(4)
1622
1623 cmpwi 0, 12, 6
1624 blt 0, .Lcall
1625
1626 ld 10, 40(4)
1627
1628 cmpwi 0, 12, 7
1629 blt 0, .Lcall
1630
1631// There are more than 6 microtask parameters, so we need to store the
1632// remainder to the stack.
1633 addi 12, 12, -6
1634 mtctr 12
1635
1636// These are set to 8 bytes before the first desired store address (we're using
1637// pre-increment loads and stores in the loop below). The parameter save area
1638// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
1639// 32 + 8*8 == 96 bytes above r1 for ELFv2.
1640 addi 4, 4, 40
1641# if KMP_ARCH_PPC64_LE
1642 addi 12, 1, 88
1643# else
1644 addi 12, 1, 104
1645# endif
1646
1647.Lnext:
1648 ldu 0, 8(4)
1649 stdu 0, 8(12)
1650 bdnz .Lnext
1651
1652.Lcall:
1653# if KMP_ARCH_PPC64_LE
1654 std 2, 24(1)
1655 mr 12, 3
1656#else
1657 std 2, 40(1)
1658// For ELFv1, we need to load the actual function address from the function descriptor.
1659 ld 12, 0(3)
1660 ld 2, 8(3)
1661 ld 11, 16(3)
1662#endif
1663
1664 addi 3, 31, -20
1665 addi 4, 31, -24
1666
1667 mtctr 12
1668 bctrl
1669# if KMP_ARCH_PPC64_LE
1670 ld 2, 24(1)
1671# else
1672 ld 2, 40(1)
1673# endif
1674
1675# if OMPT_SUPPORT
1676 li 3, 0
1677 std 3, 0(30)
1678# endif
1679
1680 li 3, 1
1681
1682# if OMPT_SUPPORT
1683 ld 30, -16(31)
1684# endif
1685
1686 mr 1, 31
1687 ld 0, 16(1)
1688 ld 31, -8(1)
1689 mtlr 0
1690 blr
1691
1692 .long 0
1693 .quad 0
1694.Lfunc_end0:
1695 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
1696 .cfi_endproc
1697
1698// -- End __kmp_invoke_microtask
1699
1700#endif /* KMP_ARCH_PPC64 */
1701
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00001702#if KMP_ARCH_ARM || KMP_ARCH_MIPS
Jim Cownie181b4bb2013-12-23 17:28:57 +00001703 .data
1704 .comm .gomp_critical_user_,32,8
1705 .data
1706 .align 4
1707 .global __kmp_unnamed_critical_addr
1708__kmp_unnamed_critical_addr:
1709 .4byte .gomp_critical_user_
1710 .size __kmp_unnamed_critical_addr,4
1711#endif /* KMP_ARCH_ARM */
1712
Sylvestre Ledrucd9d3742016-12-08 09:22:24 +00001713#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
Jim Cownie3051f972014-08-07 10:12:54 +00001714 .data
1715 .comm .gomp_critical_user_,32,8
1716 .data
1717 .align 8
1718 .global __kmp_unnamed_critical_addr
1719__kmp_unnamed_critical_addr:
1720 .8byte .gomp_critical_user_
1721 .size __kmp_unnamed_critical_addr,8
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001722#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001723
Jonathan Peyton621743b2015-08-20 19:46:14 +00001724#if KMP_OS_LINUX
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001725# if KMP_ARCH_ARM
1726.section .note.GNU-stack,"",%progbits
1727# else
Jim Cownie181b4bb2013-12-23 17:28:57 +00001728.section .note.GNU-stack,"",@progbits
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001729# endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001730#endif