blob: 5ed8e8809fcc612667e1e8a6c17bd854cbb9320a [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001// z_Linux_asm.s: - microtasking routines specifically
2// written for Intel platforms running Linux* OS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003
4//
5////===----------------------------------------------------------------------===//
6////
7//// The LLVM Compiler Infrastructure
8////
9//// This file is dual licensed under the MIT and the University of Illinois Open
10//// Source Licenses. See LICENSE.txt for details.
11////
12////===----------------------------------------------------------------------===//
13//
14
15// -----------------------------------------------------------------------
16// macros
17// -----------------------------------------------------------------------
18
Jonathan Peytonc0225ca2015-08-28 18:42:10 +000019#include "kmp_config.h"
Jonathan Peyton92907c22015-05-29 16:13:56 +000020
Jim Cownie5e8470a2013-09-27 10:38:44 +000021#if KMP_ARCH_X86 || KMP_ARCH_X86_64
22
Jonathan Peyton621743b2015-08-20 19:46:14 +000023# if KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +000024//
25// the 'delay r16/r32/r64' should be used instead of the 'pause'.
26// The delay operation has the effect of removing the current thread from
27// the round-robin HT mechanism, and therefore speeds up the issue rate of
28// the other threads on the same core.
29//
30// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
31// barrier time to increase greatly for 3 or more threads per core.
32//
33// A value of 100 works pretty well for up to 4 threads per core, but isn't
34// quite as fast as 0 for 2 threads per core.
35//
36// We need to check what happens for oversubscription / > 4 threads per core.
37// It is possible that we need to pass the delay value in as a parameter
38// that the caller determines based on the total # threads / # cores.
39//
40//.macro pause_op
41// mov $100, %rax
42// delay %rax
43//.endm
44# else
45# define pause_op .byte 0xf3,0x90
Jonathan Peyton621743b2015-08-20 19:46:14 +000046# endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +000047
Jonathan Peyton621743b2015-08-20 19:46:14 +000048# if KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +000049# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000050# define KMP_LABEL(x) L_##x // form the name of label
51.macro KMP_CFI_DEF_OFFSET
52.endmacro
53.macro KMP_CFI_OFFSET
54.endmacro
55.macro KMP_CFI_REGISTER
56.endmacro
57.macro KMP_CFI_DEF
58.endmacro
Jim Cownie5e8470a2013-09-27 10:38:44 +000059.macro ALIGN
60 .align $0
61.endmacro
62.macro DEBUG_INFO
63/* Not sure what .size does in icc, not sure if we need to do something
64 similar for OS X*.
65*/
66.endmacro
67.macro PROC
68 ALIGN 4
69 .globl KMP_PREFIX_UNDERSCORE($0)
70KMP_PREFIX_UNDERSCORE($0):
71.endmacro
Jonathan Peyton621743b2015-08-20 19:46:14 +000072# else // KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +000073# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
Andrey Churbanovc5bccf942015-02-10 19:31:17 +000074// Format labels so that they don't override function names in gdb's backtraces
75// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*)
Jonathan Peyton621743b2015-08-20 19:46:14 +000076# if KMP_MIC
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000077# define KMP_LABEL(x) L_##x // local label
78# else
79# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
Jonathan Peyton621743b2015-08-20 19:46:14 +000080# endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +000081.macro ALIGN size
82 .align 1<<(\size)
83.endm
84.macro DEBUG_INFO proc
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000085 .cfi_endproc
Jim Cownie5e8470a2013-09-27 10:38:44 +000086// Not sure why we need .type and .size for the functions
87 .align 16
88 .type \proc,@function
89 .size \proc,.-\proc
90.endm
91.macro PROC proc
92 ALIGN 4
93 .globl KMP_PREFIX_UNDERSCORE(\proc)
94KMP_PREFIX_UNDERSCORE(\proc):
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000095 .cfi_startproc
96.endm
97.macro KMP_CFI_DEF_OFFSET sz
98 .cfi_def_cfa_offset \sz
99.endm
100.macro KMP_CFI_OFFSET reg, sz
101 .cfi_offset \reg,\sz
102.endm
103.macro KMP_CFI_REGISTER reg
104 .cfi_def_cfa_register \reg
105.endm
106.macro KMP_CFI_DEF reg, sz
107 .cfi_def_cfa \reg,\sz
Jim Cownie5e8470a2013-09-27 10:38:44 +0000108.endm
Jonathan Peyton621743b2015-08-20 19:46:14 +0000109# endif // KMP_OS_DARWIN
Jim Cownie181b4bb2013-12-23 17:28:57 +0000110#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111
Paul Osmialowski7e5e8682016-05-13 08:26:42 +0000112#if KMP_OS_LINUX && KMP_ARCH_AARCH64
113
114# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
115// Format labels so that they don't override function names in gdb's backtraces
116# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
117
118.macro ALIGN size
119 .align 1<<(\size)
120.endm
121
122.macro DEBUG_INFO proc
123 .cfi_endproc
124// Not sure why we need .type and .size for the functions
125 ALIGN 2
126 .type \proc,@function
127 .size \proc,.-\proc
128.endm
129
130.macro PROC proc
131 ALIGN 2
132 .globl KMP_PREFIX_UNDERSCORE(\proc)
133KMP_PREFIX_UNDERSCORE(\proc):
134 .cfi_startproc
135.endm
136
137#endif // KMP_OS_LINUX && KMP_ARCH_AARCH64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000138
139// -----------------------------------------------------------------------
140// data
141// -----------------------------------------------------------------------
142
143#ifdef KMP_GOMP_COMPAT
144
145//
146// Support for unnamed common blocks.
147//
148// Because the symbol ".gomp_critical_user_" contains a ".", we have to
149// put this stuff in assembly.
150//
151
152# if KMP_ARCH_X86
Jonathan Peyton621743b2015-08-20 19:46:14 +0000153# if KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154 .data
155 .comm .gomp_critical_user_,32
156 .data
157 .globl ___kmp_unnamed_critical_addr
158___kmp_unnamed_critical_addr:
159 .long .gomp_critical_user_
160# else /* Linux* OS */
161 .data
162 .comm .gomp_critical_user_,32,8
163 .data
164 ALIGN 4
165 .global __kmp_unnamed_critical_addr
166__kmp_unnamed_critical_addr:
167 .4byte .gomp_critical_user_
168 .type __kmp_unnamed_critical_addr,@object
169 .size __kmp_unnamed_critical_addr,4
Jonathan Peyton621743b2015-08-20 19:46:14 +0000170# endif /* KMP_OS_DARWIN */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000171# endif /* KMP_ARCH_X86 */
172
173# if KMP_ARCH_X86_64
Jonathan Peyton621743b2015-08-20 19:46:14 +0000174# if KMP_OS_DARWIN
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175 .data
176 .comm .gomp_critical_user_,32
177 .data
178 .globl ___kmp_unnamed_critical_addr
179___kmp_unnamed_critical_addr:
180 .quad .gomp_critical_user_
181# else /* Linux* OS */
182 .data
183 .comm .gomp_critical_user_,32,8
184 .data
185 ALIGN 8
186 .global __kmp_unnamed_critical_addr
187__kmp_unnamed_critical_addr:
188 .8byte .gomp_critical_user_
189 .type __kmp_unnamed_critical_addr,@object
190 .size __kmp_unnamed_critical_addr,8
Jonathan Peyton621743b2015-08-20 19:46:14 +0000191# endif /* KMP_OS_DARWIN */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000192# endif /* KMP_ARCH_X86_64 */
193
194#endif /* KMP_GOMP_COMPAT */
195
196
Jim Cownie3051f972014-08-07 10:12:54 +0000197#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000198
199// -----------------------------------------------------------------------
200// microtasking routines specifically written for IA-32 architecture
201// running Linux* OS
202// -----------------------------------------------------------------------
203//
204
205 .ident "Intel Corporation"
206 .data
207 ALIGN 4
208// void
209// __kmp_x86_pause( void );
210//
211
212 .text
213 PROC __kmp_x86_pause
214
215 pause_op
216 ret
217
218 DEBUG_INFO __kmp_x86_pause
219
220//
221// void
222// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
223//
224 PROC __kmp_x86_cpuid
225
226 pushl %ebp
227 movl %esp,%ebp
228 pushl %edi
229 pushl %ebx
230 pushl %ecx
231 pushl %edx
232
233 movl 8(%ebp), %eax
234 movl 12(%ebp), %ecx
235 cpuid // Query the CPUID for the current processor
236
237 movl 16(%ebp), %edi
238 movl %eax, 0(%edi)
239 movl %ebx, 4(%edi)
240 movl %ecx, 8(%edi)
241 movl %edx, 12(%edi)
242
243 popl %edx
244 popl %ecx
245 popl %ebx
246 popl %edi
247 movl %ebp, %esp
248 popl %ebp
249 ret
250
251 DEBUG_INFO __kmp_x86_cpuid
252
253
254# if !KMP_ASM_INTRINS
255
256//------------------------------------------------------------------------
257//
258// kmp_int32
259// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
260//
261
262 PROC __kmp_test_then_add32
263
264 movl 4(%esp), %ecx
265 movl 8(%esp), %eax
266 lock
267 xaddl %eax,(%ecx)
268 ret
269
270 DEBUG_INFO __kmp_test_then_add32
271
272//------------------------------------------------------------------------
273//
274// FUNCTION __kmp_xchg_fixed8
275//
276// kmp_int32
277// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
278//
279// parameters:
280// p: 4(%esp)
281// d: 8(%esp)
282//
283// return: %al
284
285 PROC __kmp_xchg_fixed8
286
287 movl 4(%esp), %ecx // "p"
288 movb 8(%esp), %al // "d"
289
290 lock
291 xchgb %al,(%ecx)
292 ret
293
294 DEBUG_INFO __kmp_xchg_fixed8
295
296
297//------------------------------------------------------------------------
298//
299// FUNCTION __kmp_xchg_fixed16
300//
301// kmp_int16
302// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
303//
304// parameters:
305// p: 4(%esp)
306// d: 8(%esp)
307// return: %ax
308
309 PROC __kmp_xchg_fixed16
310
311 movl 4(%esp), %ecx // "p"
312 movw 8(%esp), %ax // "d"
313
314 lock
315 xchgw %ax,(%ecx)
316 ret
317
318 DEBUG_INFO __kmp_xchg_fixed16
319
320
321//------------------------------------------------------------------------
322//
323// FUNCTION __kmp_xchg_fixed32
324//
325// kmp_int32
326// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
327//
328// parameters:
329// p: 4(%esp)
330// d: 8(%esp)
331//
332// return: %eax
333
334 PROC __kmp_xchg_fixed32
335
336 movl 4(%esp), %ecx // "p"
337 movl 8(%esp), %eax // "d"
338
339 lock
340 xchgl %eax,(%ecx)
341 ret
342
343 DEBUG_INFO __kmp_xchg_fixed32
344
345
346//
347// kmp_int8
348// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
349//
350
351 PROC __kmp_compare_and_store8
352
353 movl 4(%esp), %ecx
354 movb 8(%esp), %al
355 movb 12(%esp), %dl
356 lock
357 cmpxchgb %dl,(%ecx)
358 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
359 and $1, %eax // sign extend previous instruction
360 ret
361
362 DEBUG_INFO __kmp_compare_and_store8
363
364//
365// kmp_int16
366// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
367//
368
369 PROC __kmp_compare_and_store16
370
371 movl 4(%esp), %ecx
372 movw 8(%esp), %ax
373 movw 12(%esp), %dx
374 lock
375 cmpxchgw %dx,(%ecx)
376 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
377 and $1, %eax // sign extend previous instruction
378 ret
379
380 DEBUG_INFO __kmp_compare_and_store16
381
382//
383// kmp_int32
384// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
385//
386
387 PROC __kmp_compare_and_store32
388
389 movl 4(%esp), %ecx
390 movl 8(%esp), %eax
391 movl 12(%esp), %edx
392 lock
393 cmpxchgl %edx,(%ecx)
394 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
395 and $1, %eax // sign extend previous instruction
396 ret
397
398 DEBUG_INFO __kmp_compare_and_store32
399
400//
401// kmp_int32
402// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
403//
404 PROC __kmp_compare_and_store64
405
406 pushl %ebp
407 movl %esp, %ebp
408 pushl %ebx
409 pushl %edi
410 movl 8(%ebp), %edi
411 movl 12(%ebp), %eax // "cv" low order word
412 movl 16(%ebp), %edx // "cv" high order word
413 movl 20(%ebp), %ebx // "sv" low order word
414 movl 24(%ebp), %ecx // "sv" high order word
415 lock
416 cmpxchg8b (%edi)
417 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
418 and $1, %eax // sign extend previous instruction
419 popl %edi
420 popl %ebx
421 movl %ebp, %esp
422 popl %ebp
423 ret
424
425 DEBUG_INFO __kmp_compare_and_store64
426
427//
428// kmp_int8
429// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
430//
431
432 PROC __kmp_compare_and_store_ret8
433
434 movl 4(%esp), %ecx
435 movb 8(%esp), %al
436 movb 12(%esp), %dl
437 lock
438 cmpxchgb %dl,(%ecx)
439 ret
440
441 DEBUG_INFO __kmp_compare_and_store_ret8
442
443//
444// kmp_int16
445// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
446//
447
448 PROC __kmp_compare_and_store_ret16
449
450 movl 4(%esp), %ecx
451 movw 8(%esp), %ax
452 movw 12(%esp), %dx
453 lock
454 cmpxchgw %dx,(%ecx)
455 ret
456
457 DEBUG_INFO __kmp_compare_and_store_ret16
458
459//
460// kmp_int32
461// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
462//
463
464 PROC __kmp_compare_and_store_ret32
465
466 movl 4(%esp), %ecx
467 movl 8(%esp), %eax
468 movl 12(%esp), %edx
469 lock
470 cmpxchgl %edx,(%ecx)
471 ret
472
473 DEBUG_INFO __kmp_compare_and_store_ret32
474
475//
476// kmp_int64
477// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
478//
479 PROC __kmp_compare_and_store_ret64
480
481 pushl %ebp
482 movl %esp, %ebp
483 pushl %ebx
484 pushl %edi
485 movl 8(%ebp), %edi
486 movl 12(%ebp), %eax // "cv" low order word
487 movl 16(%ebp), %edx // "cv" high order word
488 movl 20(%ebp), %ebx // "sv" low order word
489 movl 24(%ebp), %ecx // "sv" high order word
490 lock
491 cmpxchg8b (%edi)
492 popl %edi
493 popl %ebx
494 movl %ebp, %esp
495 popl %ebp
496 ret
497
498 DEBUG_INFO __kmp_compare_and_store_ret64
499
500
501//------------------------------------------------------------------------
502//
503// FUNCTION __kmp_xchg_real32
504//
505// kmp_real32
506// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
507//
508// parameters:
509// addr: 4(%esp)
510// data: 8(%esp)
511//
512// return: %eax
513
514
515 PROC __kmp_xchg_real32
516
517 pushl %ebp
518 movl %esp, %ebp
519 subl $4, %esp
520 pushl %esi
521
522 movl 4(%ebp), %esi
523 flds (%esi)
524 // load <addr>
525 fsts -4(%ebp)
526 // store old value
527
528 movl 8(%ebp), %eax
529
530 lock
531 xchgl %eax, (%esi)
532
533 flds -4(%ebp)
534 // return old value
535
536 popl %esi
537 movl %ebp, %esp
538 popl %ebp
539 ret
540
541 DEBUG_INFO __kmp_xchg_real32
542
543# endif /* !KMP_ASM_INTRINS */
544
545
546//------------------------------------------------------------------------
547//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000548// FUNCTION __kmp_load_x87_fpu_control_word
549//
550// void
551// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
552//
553// parameters:
554// p: 4(%esp)
555//
556
557 PROC __kmp_load_x87_fpu_control_word
558
559 movl 4(%esp), %eax
560 fldcw (%eax)
561 ret
562
563 DEBUG_INFO __kmp_load_x87_fpu_control_word
564
565
566//------------------------------------------------------------------------
567//
568// FUNCTION __kmp_store_x87_fpu_control_word
569//
570// void
571// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
572//
573// parameters:
574// p: 4(%esp)
575//
576
577 PROC __kmp_store_x87_fpu_control_word
578
579 movl 4(%esp), %eax
580 fstcw (%eax)
581 ret
582
583 DEBUG_INFO __kmp_store_x87_fpu_control_word
584
585
586//------------------------------------------------------------------------
587//
588// FUNCTION __kmp_clear_x87_fpu_status_word
589//
590// void
591// __kmp_clear_x87_fpu_status_word();
592//
593//
594
595 PROC __kmp_clear_x87_fpu_status_word
596
597 fnclex
598 ret
599
600 DEBUG_INFO __kmp_clear_x87_fpu_status_word
601
602
603//------------------------------------------------------------------------
604//
605// typedef void (*microtask_t)( int *gtid, int *tid, ... );
606//
607// int
608// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid,
609// int argc, void *p_argv[] ) {
610// (*pkfn)( & gtid, & gtid, argv[0], ... );
611// return 1;
612// }
613
614// -- Begin __kmp_invoke_microtask
615// mark_begin;
616 PROC __kmp_invoke_microtask
617
618 pushl %ebp
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000619 KMP_CFI_DEF_OFFSET 8
620 KMP_CFI_OFFSET ebp,-8
Jim Cownie5e8470a2013-09-27 10:38:44 +0000621 movl %esp,%ebp // establish the base pointer for this routine.
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000622 KMP_CFI_REGISTER ebp
Jim Cownie5e8470a2013-09-27 10:38:44 +0000623 subl $8,%esp // allocate space for two local variables.
624 // These varibales are:
625 // argv: -4(%ebp)
626 // temp: -8(%ebp)
627 //
628 pushl %ebx // save %ebx to use during this routine
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000629 //
630#if OMPT_SUPPORT
631 movl 28(%ebp),%ebx // get exit_frame address
632 movl %ebp,(%ebx) // save exit_frame
633#endif
634
Jim Cownie5e8470a2013-09-27 10:38:44 +0000635 movl 20(%ebp),%ebx // Stack alignment - # args
636 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
637 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
638 movl %esp,%eax //
639 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
640 movl %eax,%ebx // Save to %ebx
641 andl $0xFFFFFF80,%eax // mask off 7 bits
642 subl %eax,%ebx // Amount to subtract from %esp
643 subl %ebx,%esp // Prepare the stack ptr --
644 // now it will be aligned on 128-byte boundary at the call
645
646 movl 24(%ebp),%eax // copy from p_argv[]
647 movl %eax,-4(%ebp) // into the local variable *argv.
648
649 movl 20(%ebp),%ebx // argc is 20(%ebp)
650 shll $2,%ebx
651
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000652KMP_LABEL(invoke_2):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000653 cmpl $0,%ebx
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000654 jg KMP_LABEL(invoke_4)
655 jmp KMP_LABEL(invoke_3)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000656 ALIGN 2
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000657KMP_LABEL(invoke_4):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000658 movl -4(%ebp),%eax
659 subl $4,%ebx // decrement argc.
660 addl %ebx,%eax // index into argv.
661 movl (%eax),%edx
662 pushl %edx
663
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000664 jmp KMP_LABEL(invoke_2)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000665 ALIGN 2
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000666KMP_LABEL(invoke_3):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000667 leal 16(%ebp),%eax // push & tid
668 pushl %eax
669
670 leal 12(%ebp),%eax // push & gtid
671 pushl %eax
672
673 movl 8(%ebp),%ebx
674 call *%ebx // call (*pkfn)();
675
676 movl $1,%eax // return 1;
677
678 movl -12(%ebp),%ebx // restore %ebx
679 leave
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000680 KMP_CFI_DEF esp,4
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681 ret
682
683 DEBUG_INFO __kmp_invoke_microtask
684// -- End __kmp_invoke_microtask
685
686
687// kmp_uint64
688// __kmp_hardware_timestamp(void)
689 PROC __kmp_hardware_timestamp
690 rdtsc
691 ret
692
693 DEBUG_INFO __kmp_hardware_timestamp
694// -- End __kmp_hardware_timestamp
695
696// -----------------------------------------------------------------------
697#endif /* KMP_ARCH_X86 */
698
699
700#if KMP_ARCH_X86_64
701
702// -----------------------------------------------------------------------
703// microtasking routines specifically written for IA-32 architecture and
704// Intel(R) 64 running Linux* OS
705// -----------------------------------------------------------------------
706
707// -- Machine type P
708// mark_description "Intel Corporation";
709 .ident "Intel Corporation"
710// -- .file "z_Linux_asm.s"
711 .data
712 ALIGN 4
713
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000714// To prevent getting our code into .data section .text added to every routine definition for x86_64.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715//------------------------------------------------------------------------
716//
717// FUNCTION __kmp_x86_cpuid
718//
719// void
720// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
721//
722// parameters:
723// mode: %edi
724// mode2: %esi
725// cpuid_buffer: %rdx
726
727 .text
728 PROC __kmp_x86_cpuid
729
730 pushq %rbp
731 movq %rsp,%rbp
732 pushq %rbx // callee-save register
733
734 movl %esi, %ecx // "mode2"
735 movl %edi, %eax // "mode"
736 movq %rdx, %rsi // cpuid_buffer
737 cpuid // Query the CPUID for the current processor
738
739 movl %eax, 0(%rsi) // store results into buffer
740 movl %ebx, 4(%rsi)
741 movl %ecx, 8(%rsi)
742 movl %edx, 12(%rsi)
743
744 popq %rbx // callee-save register
745 movq %rbp, %rsp
746 popq %rbp
747 ret
748
749 DEBUG_INFO __kmp_x86_cpuid
750
751
752
753# if !KMP_ASM_INTRINS
754
755//------------------------------------------------------------------------
756//
757// FUNCTION __kmp_test_then_add32
758//
759// kmp_int32
760// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
761//
762// parameters:
763// p: %rdi
764// d: %esi
765//
766// return: %eax
767
768 .text
769 PROC __kmp_test_then_add32
770
771 movl %esi, %eax // "d"
772 lock
773 xaddl %eax,(%rdi)
774 ret
775
776 DEBUG_INFO __kmp_test_then_add32
777
778
779//------------------------------------------------------------------------
780//
781// FUNCTION __kmp_test_then_add64
782//
783// kmp_int64
784// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
785//
786// parameters:
787// p: %rdi
788// d: %rsi
789// return: %rax
790
791 .text
792 PROC __kmp_test_then_add64
793
794 movq %rsi, %rax // "d"
795 lock
796 xaddq %rax,(%rdi)
797 ret
798
799 DEBUG_INFO __kmp_test_then_add64
800
801
802//------------------------------------------------------------------------
803//
804// FUNCTION __kmp_xchg_fixed8
805//
806// kmp_int32
807// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
808//
809// parameters:
810// p: %rdi
811// d: %sil
812//
813// return: %al
814
815 .text
816 PROC __kmp_xchg_fixed8
817
818 movb %sil, %al // "d"
819
820 lock
821 xchgb %al,(%rdi)
822 ret
823
824 DEBUG_INFO __kmp_xchg_fixed8
825
826
827//------------------------------------------------------------------------
828//
829// FUNCTION __kmp_xchg_fixed16
830//
831// kmp_int16
832// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
833//
834// parameters:
835// p: %rdi
836// d: %si
837// return: %ax
838
839 .text
840 PROC __kmp_xchg_fixed16
841
842 movw %si, %ax // "d"
843
844 lock
845 xchgw %ax,(%rdi)
846 ret
847
848 DEBUG_INFO __kmp_xchg_fixed16
849
850
851//------------------------------------------------------------------------
852//
853// FUNCTION __kmp_xchg_fixed32
854//
855// kmp_int32
856// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
857//
858// parameters:
859// p: %rdi
860// d: %esi
861//
862// return: %eax
863
864 .text
865 PROC __kmp_xchg_fixed32
866
867 movl %esi, %eax // "d"
868
869 lock
870 xchgl %eax,(%rdi)
871 ret
872
873 DEBUG_INFO __kmp_xchg_fixed32
874
875
876//------------------------------------------------------------------------
877//
878// FUNCTION __kmp_xchg_fixed64
879//
880// kmp_int64
881// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
882//
883// parameters:
884// p: %rdi
885// d: %rsi
886// return: %rax
887
888 .text
889 PROC __kmp_xchg_fixed64
890
891 movq %rsi, %rax // "d"
892
893 lock
894 xchgq %rax,(%rdi)
895 ret
896
897 DEBUG_INFO __kmp_xchg_fixed64
898
899
900//------------------------------------------------------------------------
901//
902// FUNCTION __kmp_compare_and_store8
903//
904// kmp_int8
905// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
906//
907// parameters:
908// p: %rdi
909// cv: %esi
910// sv: %edx
911//
912// return: %eax
913
914 .text
915 PROC __kmp_compare_and_store8
916
917 movb %sil, %al // "cv"
918 lock
919 cmpxchgb %dl,(%rdi)
920 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
921 andq $1, %rax // sign extend previous instruction for return value
922 ret
923
924 DEBUG_INFO __kmp_compare_and_store8
925
926
927//------------------------------------------------------------------------
928//
929// FUNCTION __kmp_compare_and_store16
930//
931// kmp_int16
932// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
933//
934// parameters:
935// p: %rdi
936// cv: %si
937// sv: %dx
938//
939// return: %eax
940
941 .text
942 PROC __kmp_compare_and_store16
943
944 movw %si, %ax // "cv"
945 lock
946 cmpxchgw %dx,(%rdi)
947 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
948 andq $1, %rax // sign extend previous instruction for return value
949 ret
950
951 DEBUG_INFO __kmp_compare_and_store16
952
953
954//------------------------------------------------------------------------
955//
956// FUNCTION __kmp_compare_and_store32
957//
958// kmp_int32
959// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
960//
961// parameters:
962// p: %rdi
963// cv: %esi
964// sv: %edx
965//
966// return: %eax
967
968 .text
969 PROC __kmp_compare_and_store32
970
971 movl %esi, %eax // "cv"
972 lock
973 cmpxchgl %edx,(%rdi)
974 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
975 andq $1, %rax // sign extend previous instruction for return value
976 ret
977
978 DEBUG_INFO __kmp_compare_and_store32
979
980
981//------------------------------------------------------------------------
982//
983// FUNCTION __kmp_compare_and_store64
984//
985// kmp_int32
986// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
987//
988// parameters:
989// p: %rdi
990// cv: %rsi
991// sv: %rdx
992// return: %eax
993
994 .text
995 PROC __kmp_compare_and_store64
996
997 movq %rsi, %rax // "cv"
998 lock
999 cmpxchgq %rdx,(%rdi)
1000 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
1001 andq $1, %rax // sign extend previous instruction for return value
1002 ret
1003
1004 DEBUG_INFO __kmp_compare_and_store64
1005
1006//------------------------------------------------------------------------
1007//
1008// FUNCTION __kmp_compare_and_store_ret8
1009//
1010// kmp_int8
1011// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
1012//
1013// parameters:
1014// p: %rdi
1015// cv: %esi
1016// sv: %edx
1017//
1018// return: %eax
1019
1020 .text
1021 PROC __kmp_compare_and_store_ret8
1022
1023 movb %sil, %al // "cv"
1024 lock
1025 cmpxchgb %dl,(%rdi)
1026 ret
1027
1028 DEBUG_INFO __kmp_compare_and_store_ret8
1029
1030
1031//------------------------------------------------------------------------
1032//
1033// FUNCTION __kmp_compare_and_store_ret16
1034//
1035// kmp_int16
1036// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
1037//
1038// parameters:
1039// p: %rdi
1040// cv: %si
1041// sv: %dx
1042//
1043// return: %eax
1044
1045 .text
1046 PROC __kmp_compare_and_store_ret16
1047
1048 movw %si, %ax // "cv"
1049 lock
1050 cmpxchgw %dx,(%rdi)
1051 ret
1052
1053 DEBUG_INFO __kmp_compare_and_store_ret16
1054
1055
1056//------------------------------------------------------------------------
1057//
1058// FUNCTION __kmp_compare_and_store_ret32
1059//
1060// kmp_int32
1061// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
1062//
1063// parameters:
1064// p: %rdi
1065// cv: %esi
1066// sv: %edx
1067//
1068// return: %eax
1069
1070 .text
1071 PROC __kmp_compare_and_store_ret32
1072
1073 movl %esi, %eax // "cv"
1074 lock
1075 cmpxchgl %edx,(%rdi)
1076 ret
1077
1078 DEBUG_INFO __kmp_compare_and_store_ret32
1079
1080
1081//------------------------------------------------------------------------
1082//
1083// FUNCTION __kmp_compare_and_store_ret64
1084//
1085// kmp_int64
1086// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1087//
1088// parameters:
1089// p: %rdi
1090// cv: %rsi
1091// sv: %rdx
1092// return: %eax
1093
1094 .text
1095 PROC __kmp_compare_and_store_ret64
1096
1097 movq %rsi, %rax // "cv"
1098 lock
1099 cmpxchgq %rdx,(%rdi)
1100 ret
1101
1102 DEBUG_INFO __kmp_compare_and_store_ret64
1103
1104# endif /* !KMP_ASM_INTRINS */
1105
1106
Jonathan Peyton621743b2015-08-20 19:46:14 +00001107# if !KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001108
Jim Cownie5e8470a2013-09-27 10:38:44 +00001109# if !KMP_ASM_INTRINS
1110
1111//------------------------------------------------------------------------
1112//
1113// FUNCTION __kmp_xchg_real32
1114//
1115// kmp_real32
1116// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
1117//
1118// parameters:
1119// addr: %rdi
1120// data: %xmm0 (lower 4 bytes)
1121//
1122// return: %xmm0 (lower 4 bytes)
1123
1124 .text
1125 PROC __kmp_xchg_real32
1126
1127 movd %xmm0, %eax // load "data" to eax
1128
1129 lock
1130 xchgl %eax, (%rdi)
1131
1132 movd %eax, %xmm0 // load old value into return register
1133
1134 ret
1135
1136 DEBUG_INFO __kmp_xchg_real32
1137
1138
1139//------------------------------------------------------------------------
1140//
1141// FUNCTION __kmp_xchg_real64
1142//
1143// kmp_real64
1144// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
1145//
1146// parameters:
1147// addr: %rdi
1148// data: %xmm0 (lower 8 bytes)
1149// return: %xmm0 (lower 8 bytes)
1150//
1151
1152 .text
1153 PROC __kmp_xchg_real64
1154
1155 movd %xmm0, %rax // load "data" to rax
1156
1157 lock
1158 xchgq %rax, (%rdi)
1159
1160 movd %rax, %xmm0 // load old value into return register
1161 ret
1162
1163 DEBUG_INFO __kmp_xchg_real64
1164
1165
Jonathan Peyton621743b2015-08-20 19:46:14 +00001166# endif /* !KMP_MIC */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001167
1168# endif /* !KMP_ASM_INTRINS */
1169
1170
1171//------------------------------------------------------------------------
1172//
1173// FUNCTION __kmp_load_x87_fpu_control_word
1174//
1175// void
1176// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1177//
1178// parameters:
1179// p: %rdi
1180//
1181
1182 .text
1183 PROC __kmp_load_x87_fpu_control_word
1184
1185 fldcw (%rdi)
1186 ret
1187
1188 DEBUG_INFO __kmp_load_x87_fpu_control_word
1189
1190
1191//------------------------------------------------------------------------
1192//
1193// FUNCTION __kmp_store_x87_fpu_control_word
1194//
1195// void
1196// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1197//
1198// parameters:
1199// p: %rdi
1200//
1201
1202 .text
1203 PROC __kmp_store_x87_fpu_control_word
1204
1205 fstcw (%rdi)
1206 ret
1207
1208 DEBUG_INFO __kmp_store_x87_fpu_control_word
1209
1210
1211//------------------------------------------------------------------------
1212//
1213// FUNCTION __kmp_clear_x87_fpu_status_word
1214//
1215// void
1216// __kmp_clear_x87_fpu_status_word();
1217//
1218//
1219
1220 .text
1221 PROC __kmp_clear_x87_fpu_status_word
1222
Jonathan Peyton621743b2015-08-20 19:46:14 +00001223#if KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001224// TODO: remove the workaround for problem with fnclex instruction (no CQ known)
1225 fstenv -32(%rsp) // store FP env
1226 andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW
1227 fldenv -32(%rsp) // load FP env back
1228 ret
1229#else
1230 fnclex
1231 ret
1232#endif
1233
1234 DEBUG_INFO __kmp_clear_x87_fpu_status_word
1235
1236
1237//------------------------------------------------------------------------
1238//
1239// typedef void (*microtask_t)( int *gtid, int *tid, ... );
1240//
1241// int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001242// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
Jim Cownie5e8470a2013-09-27 10:38:44 +00001243// int gtid, int tid,
1244// int argc, void *p_argv[] ) {
1245// (*pkfn)( & gtid, & tid, argv[0], ... );
1246// return 1;
1247// }
1248//
1249// note:
1250// at call to pkfn must have %rsp 128-byte aligned for compiler
1251//
1252// parameters:
1253// %rdi: pkfn
1254// %esi: gtid
1255// %edx: tid
1256// %ecx: argc
1257// %r8: p_argv
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001258// %r9: &exit_frame
Jim Cownie5e8470a2013-09-27 10:38:44 +00001259//
1260// locals:
1261// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1262// __tid: tid parm pushed on stack so can pass &tid to pkfn
1263//
1264// reg temps:
1265// %rax: used all over the place
1266// %rdx: used in stack pointer alignment calculation
1267// %r11: used to traverse p_argv array
1268// %rsi: used as temporary for stack parameters
1269// used as temporary for number of pkfn parms to push
1270// %rbx: used to hold pkfn address, and zero constant, callee-save
1271//
1272// return: %eax (always 1/TRUE)
1273//
1274
1275__gtid = -16
1276__tid = -24
1277
1278// -- Begin __kmp_invoke_microtask
1279// mark_begin;
1280 .text
1281 PROC __kmp_invoke_microtask
1282
1283 pushq %rbp // save base pointer
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001284 KMP_CFI_DEF_OFFSET 16
1285 KMP_CFI_OFFSET rbp,-16
Jim Cownie5e8470a2013-09-27 10:38:44 +00001286 movq %rsp,%rbp // establish the base pointer for this routine.
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001287 KMP_CFI_REGISTER rbp
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001288
1289#if OMPT_SUPPORT
1290 movq %rbp, (%r9) // save exit_frame
1291#endif
1292
Jim Cownie5e8470a2013-09-27 10:38:44 +00001293 pushq %rbx // %rbx is callee-saved register
Jim Cownie5e8470a2013-09-27 10:38:44 +00001294 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
1295 pushq %rdx // Put tid on stack so can pass &tid to pkfn
1296
1297 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
1298 movq $0, %rbx // constant for cmovs later
1299 subq $4, %rax // subtract four args passed in registers to pkfn
Jonathan Peyton621743b2015-08-20 19:46:14 +00001300#if KMP_MIC
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001301 js KMP_LABEL(kmp_0) // jump to movq
1302 jmp KMP_LABEL(kmp_0_exit) // jump ahead
1303KMP_LABEL(kmp_0):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001304 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001305KMP_LABEL(kmp_0_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001306#else
1307 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
Jonathan Peyton621743b2015-08-20 19:46:14 +00001308#endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001309
1310 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
1311 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
1312
1313 movq %rsp, %rdx //
1314 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
1315 // without align, stack ptr would be this
1316 movq %rdx, %rax // Save to %rax
1317
1318 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
1319 subq %rax, %rdx // Amount to subtract from %rsp
1320 subq %rdx, %rsp // Prepare the stack ptr --
1321 // now %rsp will align to 128-byte boundary at call site
1322
1323 // setup pkfn parameter reg and stack
1324 movq %rcx, %rax // argc -> %rax
1325 cmpq $0, %rsi
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001326 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
Jim Cownie5e8470a2013-09-27 10:38:44 +00001327 shlq $3, %rcx // argc*8 -> %rcx
1328 movq %r8, %rdx // p_argv -> %rdx
1329 addq %rcx, %rdx // &p_argv[argc] -> %rdx
1330
1331 movq %rsi, %rcx // max (0, argc-4) -> %rcx
1332
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001333KMP_LABEL(kmp_invoke_push_parms):
1334 // push nth - 7th parms to pkfn on stack
Jim Cownie5e8470a2013-09-27 10:38:44 +00001335 subq $8, %rdx // decrement p_argv pointer to previous parm
1336 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
1337 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
1338 subl $1, %ecx
1339
1340// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
1341// if the name of the label that is an operand of this jecxz starts with a dot (".");
1342// Apple's linker does not support 1-byte length relocation;
1343// Resolution: replace all .labelX entries with L_labelX.
1344
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001345 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
1346 jmp KMP_LABEL(kmp_invoke_push_parms)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001347 ALIGN 3
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001348KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001349 // order here is important to avoid trashing
1350 // registers used for both input and output parms!
1351 movq %rdi, %rbx // pkfn -> %rbx
1352 leaq __gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
1353 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
1354
1355 movq %r8, %r11 // p_argv -> %r11
1356
Jonathan Peyton621743b2015-08-20 19:46:14 +00001357#if KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001358 cmpq $4, %rax // argc >= 4?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001359 jns KMP_LABEL(kmp_4) // jump to movq
1360 jmp KMP_LABEL(kmp_4_exit) // jump ahead
1361KMP_LABEL(kmp_4):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001362 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001363KMP_LABEL(kmp_4_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001364
1365 cmpq $3, %rax // argc >= 3?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001366 jns KMP_LABEL(kmp_3) // jump to movq
1367 jmp KMP_LABEL(kmp_3_exit) // jump ahead
1368KMP_LABEL(kmp_3):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001369 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001370KMP_LABEL(kmp_3_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001371
1372 cmpq $2, %rax // argc >= 2?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001373 jns KMP_LABEL(kmp_2) // jump to movq
1374 jmp KMP_LABEL(kmp_2_exit) // jump ahead
1375KMP_LABEL(kmp_2):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001376 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001377KMP_LABEL(kmp_2_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001378
1379 cmpq $1, %rax // argc >= 1?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001380 jns KMP_LABEL(kmp_1) // jump to movq
1381 jmp KMP_LABEL(kmp_1_exit) // jump ahead
1382KMP_LABEL(kmp_1):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001383 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001384KMP_LABEL(kmp_1_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001385#else
1386 cmpq $4, %rax // argc >= 4?
1387 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1388
1389 cmpq $3, %rax // argc >= 3?
1390 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1391
1392 cmpq $2, %rax // argc >= 2?
1393 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1394
1395 cmpq $1, %rax // argc >= 1?
1396 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
Jonathan Peyton621743b2015-08-20 19:46:14 +00001397#endif // KMP_MIC
Jim Cownie5e8470a2013-09-27 10:38:44 +00001398
1399 call *%rbx // call (*pkfn)();
1400 movq $1, %rax // move 1 into return register;
1401
1402 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
1403 movq %rbp, %rsp // restore stack pointer
1404 popq %rbp // restore frame pointer
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001405 KMP_CFI_DEF rsp,8
Jim Cownie5e8470a2013-09-27 10:38:44 +00001406 ret
1407
1408 DEBUG_INFO __kmp_invoke_microtask
1409// -- End __kmp_invoke_microtask
1410
1411// kmp_uint64
1412// __kmp_hardware_timestamp(void)
1413 .text
1414 PROC __kmp_hardware_timestamp
1415 rdtsc
1416 shlq $32, %rdx
1417 orq %rdx, %rax
1418 ret
1419
1420 DEBUG_INFO __kmp_hardware_timestamp
1421// -- End __kmp_hardware_timestamp
1422
1423//------------------------------------------------------------------------
1424//
1425// FUNCTION __kmp_bsr32
1426//
1427// int
1428// __kmp_bsr32( int );
1429//
1430
1431 .text
1432 PROC __kmp_bsr32
1433
1434 bsr %edi,%eax
1435 ret
1436
1437 DEBUG_INFO __kmp_bsr32
1438
Jonathan Peyton61118492016-05-20 19:03:38 +00001439
Jim Cownie5e8470a2013-09-27 10:38:44 +00001440// -----------------------------------------------------------------------
1441#endif /* KMP_ARCH_X86_64 */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001442
Paul Osmialowski7e5e8682016-05-13 08:26:42 +00001443// '
1444#if KMP_OS_LINUX && KMP_ARCH_AARCH64
1445
1446//------------------------------------------------------------------------
1447//
1448// typedef void (*microtask_t)( int *gtid, int *tid, ... );
1449//
1450// int
1451// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1452// int gtid, int tid,
1453// int argc, void *p_argv[] ) {
1454// (*pkfn)( & gtid, & tid, argv[0], ... );
1455// return 1;
1456// }
1457//
1458// parameters:
1459// x0: pkfn
1460// w1: gtid
1461// w2: tid
1462// w3: argc
1463// x4: p_argv
1464// x5: &exit_frame
1465//
1466// locals:
1467// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1468// __tid: tid parm pushed on stack so can pass &tid to pkfn
1469//
1470// reg temps:
1471// x8: used to hold pkfn address
1472// w9: used as temporary for number of pkfn parms
1473// x10: used to traverse p_argv array
1474// x11: used as temporary for stack placement calculation
1475// x12: used as temporary for stack parameters
1476// x19: used to preserve exit_frame_ptr, callee-save
1477//
1478// return: w0 (always 1/TRUE)
1479//
1480
1481__gtid = 4
1482__tid = 8
1483
1484// -- Begin __kmp_invoke_microtask
1485// mark_begin;
1486 .text
1487 PROC __kmp_invoke_microtask
1488
1489 stp x29, x30, [sp, #-16]!
1490# if OMPT_SUPPORT
1491 stp x19, x20, [sp, #-16]!
1492# endif
1493 mov x29, sp
1494
1495 orr w9, wzr, #1
1496 add w9, w9, w3, lsr #1
1497 sub sp, sp, w9, lsl #4
1498 mov x11, sp
1499
1500 mov x8, x0
1501 str w1, [x29, #-__gtid]
1502 str w2, [x29, #-__tid]
1503 mov w9, w3
1504 mov x10, x4
1505# if OMPT_SUPPORT
1506 mov x19, x5
1507 str x29, [x19]
1508# endif
1509
1510 sub x0, x29, #__gtid
1511 sub x1, x29, #__tid
1512
1513 cbz w9, KMP_LABEL(kmp_1)
1514 ldr x2, [x10]
1515
1516 sub w9, w9, #1
1517 cbz w9, KMP_LABEL(kmp_1)
1518 ldr x3, [x10, #8]!
1519
1520 sub w9, w9, #1
1521 cbz w9, KMP_LABEL(kmp_1)
1522 ldr x4, [x10, #8]!
1523
1524 sub w9, w9, #1
1525 cbz w9, KMP_LABEL(kmp_1)
1526 ldr x5, [x10, #8]!
1527
1528 sub w9, w9, #1
1529 cbz w9, KMP_LABEL(kmp_1)
1530 ldr x6, [x10, #8]!
1531
1532 sub w9, w9, #1
1533 cbz w9, KMP_LABEL(kmp_1)
1534 ldr x7, [x10, #8]!
1535
1536KMP_LABEL(kmp_0):
1537 sub w9, w9, #1
1538 cbz w9, KMP_LABEL(kmp_1)
1539 ldr x12, [x10, #8]!
1540 str x12, [x11], #8
1541 b KMP_LABEL(kmp_0)
1542KMP_LABEL(kmp_1):
1543 blr x8
1544 orr w0, wzr, #1
1545 mov sp, x29
1546# if OMPT_SUPPORT
1547 str xzr, [x19]
1548 ldp x19, x20, [sp], #16
1549# endif
1550 ldp x29, x30, [sp], #16
1551 ret
1552
1553 DEBUG_INFO __kmp_invoke_microtask
1554// -- End __kmp_invoke_microtask
1555
1556#endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
1557
Jim Cownie181b4bb2013-12-23 17:28:57 +00001558#if KMP_ARCH_ARM
1559 .data
1560 .comm .gomp_critical_user_,32,8
1561 .data
1562 .align 4
1563 .global __kmp_unnamed_critical_addr
1564__kmp_unnamed_critical_addr:
1565 .4byte .gomp_critical_user_
1566 .size __kmp_unnamed_critical_addr,4
1567#endif /* KMP_ARCH_ARM */
1568
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001569#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
Jim Cownie3051f972014-08-07 10:12:54 +00001570 .data
1571 .comm .gomp_critical_user_,32,8
1572 .data
1573 .align 8
1574 .global __kmp_unnamed_critical_addr
1575__kmp_unnamed_critical_addr:
1576 .8byte .gomp_critical_user_
1577 .size __kmp_unnamed_critical_addr,8
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001578#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001579
Jonathan Peyton621743b2015-08-20 19:46:14 +00001580#if KMP_OS_LINUX
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001581# if KMP_ARCH_ARM
1582.section .note.GNU-stack,"",%progbits
1583# else
Jim Cownie181b4bb2013-12-23 17:28:57 +00001584.section .note.GNU-stack,"",@progbits
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001585# endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001586#endif