blob: c33c817bf4878f8f537cdc4e740f59d8ad78b031 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001// z_Linux_asm.s: - microtasking routines specifically
2// written for Intel platforms running Linux* OS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003
4//
5////===----------------------------------------------------------------------===//
6////
7//// The LLVM Compiler Infrastructure
8////
9//// This file is dual licensed under the MIT and the University of Illinois Open
10//// Source Licenses. See LICENSE.txt for details.
11////
12////===----------------------------------------------------------------------===//
13//
14
15// -----------------------------------------------------------------------
16// macros
17// -----------------------------------------------------------------------
18
19#if KMP_ARCH_X86 || KMP_ARCH_X86_64
20
21# if __MIC__ || __MIC2__
22//
23// the 'delay r16/r32/r64' should be used instead of the 'pause'.
24// The delay operation has the effect of removing the current thread from
25// the round-robin HT mechanism, and therefore speeds up the issue rate of
26// the other threads on the same core.
27//
28// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
29// barrier time to increase greatly for 3 or more threads per core.
30//
31// A value of 100 works pretty well for up to 4 threads per core, but isn't
32// quite as fast as 0 for 2 threads per core.
33//
34// We need to check what happens for oversubscription / > 4 threads per core.
35// It is possible that we need to pass the delay value in as a parameter
36// that the caller determines based on the total # threads / # cores.
37//
38//.macro pause_op
39// mov $100, %rax
40// delay %rax
41//.endm
42# else
43# define pause_op .byte 0xf3,0x90
44# endif // __MIC__ || __MIC2__
45
46# if defined __APPLE__ && defined __MACH__
47# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000048# define KMP_LABEL(x) L_##x // form the name of label
49.macro KMP_CFI_DEF_OFFSET
50.endmacro
51.macro KMP_CFI_OFFSET
52.endmacro
53.macro KMP_CFI_REGISTER
54.endmacro
55.macro KMP_CFI_DEF
56.endmacro
Jim Cownie5e8470a2013-09-27 10:38:44 +000057.macro ALIGN
58 .align $0
59.endmacro
60.macro DEBUG_INFO
61/* Not sure what .size does in icc, not sure if we need to do something
62 similar for OS X*.
63*/
64.endmacro
65.macro PROC
66 ALIGN 4
67 .globl KMP_PREFIX_UNDERSCORE($0)
68KMP_PREFIX_UNDERSCORE($0):
69.endmacro
70# else // defined __APPLE__ && defined __MACH__
71# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
Andrey Churbanovc5bccf942015-02-10 19:31:17 +000072// Format labels so that they don't override function names in gdb's backtraces
73// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000074# if __MIC__ || __MIC2__
75# define KMP_LABEL(x) L_##x // local label
76# else
77# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
78# endif // __MIC__ || __MIC2__
Jim Cownie5e8470a2013-09-27 10:38:44 +000079.macro ALIGN size
80 .align 1<<(\size)
81.endm
82.macro DEBUG_INFO proc
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000083 .cfi_endproc
Jim Cownie5e8470a2013-09-27 10:38:44 +000084// Not sure why we need .type and .size for the functions
85 .align 16
86 .type \proc,@function
87 .size \proc,.-\proc
88.endm
89.macro PROC proc
90 ALIGN 4
91 .globl KMP_PREFIX_UNDERSCORE(\proc)
92KMP_PREFIX_UNDERSCORE(\proc):
Andrey Churbanov054c50bf2015-02-10 18:51:52 +000093 .cfi_startproc
94.endm
95.macro KMP_CFI_DEF_OFFSET sz
96 .cfi_def_cfa_offset \sz
97.endm
98.macro KMP_CFI_OFFSET reg, sz
99 .cfi_offset \reg,\sz
100.endm
101.macro KMP_CFI_REGISTER reg
102 .cfi_def_cfa_register \reg
103.endm
104.macro KMP_CFI_DEF reg, sz
105 .cfi_def_cfa \reg,\sz
Jim Cownie5e8470a2013-09-27 10:38:44 +0000106.endm
107# endif // defined __APPLE__ && defined __MACH__
Jim Cownie181b4bb2013-12-23 17:28:57 +0000108#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000109
110
111// -----------------------------------------------------------------------
112// data
113// -----------------------------------------------------------------------
114
115#ifdef KMP_GOMP_COMPAT
116
117//
118// Support for unnamed common blocks.
119//
120// Because the symbol ".gomp_critical_user_" contains a ".", we have to
121// put this stuff in assembly.
122//
123
124# if KMP_ARCH_X86
125# if defined __APPLE__ && defined __MACH__
126 .data
127 .comm .gomp_critical_user_,32
128 .data
129 .globl ___kmp_unnamed_critical_addr
130___kmp_unnamed_critical_addr:
131 .long .gomp_critical_user_
132# else /* Linux* OS */
133 .data
134 .comm .gomp_critical_user_,32,8
135 .data
136 ALIGN 4
137 .global __kmp_unnamed_critical_addr
138__kmp_unnamed_critical_addr:
139 .4byte .gomp_critical_user_
140 .type __kmp_unnamed_critical_addr,@object
141 .size __kmp_unnamed_critical_addr,4
142# endif /* defined __APPLE__ && defined __MACH__ */
143# endif /* KMP_ARCH_X86 */
144
145# if KMP_ARCH_X86_64
146# if defined __APPLE__ && defined __MACH__
147 .data
148 .comm .gomp_critical_user_,32
149 .data
150 .globl ___kmp_unnamed_critical_addr
151___kmp_unnamed_critical_addr:
152 .quad .gomp_critical_user_
153# else /* Linux* OS */
154 .data
155 .comm .gomp_critical_user_,32,8
156 .data
157 ALIGN 8
158 .global __kmp_unnamed_critical_addr
159__kmp_unnamed_critical_addr:
160 .8byte .gomp_critical_user_
161 .type __kmp_unnamed_critical_addr,@object
162 .size __kmp_unnamed_critical_addr,8
163# endif /* defined __APPLE__ && defined __MACH__ */
164# endif /* KMP_ARCH_X86_64 */
165
166#endif /* KMP_GOMP_COMPAT */
167
168
Jim Cownie3051f972014-08-07 10:12:54 +0000169#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170
171// -----------------------------------------------------------------------
172// microtasking routines specifically written for IA-32 architecture
173// running Linux* OS
174// -----------------------------------------------------------------------
175//
176
177 .ident "Intel Corporation"
178 .data
179 ALIGN 4
180// void
181// __kmp_x86_pause( void );
182//
183
184 .text
185 PROC __kmp_x86_pause
186
187 pause_op
188 ret
189
190 DEBUG_INFO __kmp_x86_pause
191
192//
193// void
194// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
195//
196 PROC __kmp_x86_cpuid
197
198 pushl %ebp
199 movl %esp,%ebp
200 pushl %edi
201 pushl %ebx
202 pushl %ecx
203 pushl %edx
204
205 movl 8(%ebp), %eax
206 movl 12(%ebp), %ecx
207 cpuid // Query the CPUID for the current processor
208
209 movl 16(%ebp), %edi
210 movl %eax, 0(%edi)
211 movl %ebx, 4(%edi)
212 movl %ecx, 8(%edi)
213 movl %edx, 12(%edi)
214
215 popl %edx
216 popl %ecx
217 popl %ebx
218 popl %edi
219 movl %ebp, %esp
220 popl %ebp
221 ret
222
223 DEBUG_INFO __kmp_x86_cpuid
224
225
226# if !KMP_ASM_INTRINS
227
228//------------------------------------------------------------------------
229//
230// kmp_int32
231// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
232//
233
234 PROC __kmp_test_then_add32
235
236 movl 4(%esp), %ecx
237 movl 8(%esp), %eax
238 lock
239 xaddl %eax,(%ecx)
240 ret
241
242 DEBUG_INFO __kmp_test_then_add32
243
244//------------------------------------------------------------------------
245//
246// FUNCTION __kmp_xchg_fixed8
247//
248// kmp_int32
249// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
250//
251// parameters:
252// p: 4(%esp)
253// d: 8(%esp)
254//
255// return: %al
256
257 PROC __kmp_xchg_fixed8
258
259 movl 4(%esp), %ecx // "p"
260 movb 8(%esp), %al // "d"
261
262 lock
263 xchgb %al,(%ecx)
264 ret
265
266 DEBUG_INFO __kmp_xchg_fixed8
267
268
269//------------------------------------------------------------------------
270//
271// FUNCTION __kmp_xchg_fixed16
272//
273// kmp_int16
274// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
275//
276// parameters:
277// p: 4(%esp)
278// d: 8(%esp)
279// return: %ax
280
281 PROC __kmp_xchg_fixed16
282
283 movl 4(%esp), %ecx // "p"
284 movw 8(%esp), %ax // "d"
285
286 lock
287 xchgw %ax,(%ecx)
288 ret
289
290 DEBUG_INFO __kmp_xchg_fixed16
291
292
293//------------------------------------------------------------------------
294//
295// FUNCTION __kmp_xchg_fixed32
296//
297// kmp_int32
298// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
299//
300// parameters:
301// p: 4(%esp)
302// d: 8(%esp)
303//
304// return: %eax
305
306 PROC __kmp_xchg_fixed32
307
308 movl 4(%esp), %ecx // "p"
309 movl 8(%esp), %eax // "d"
310
311 lock
312 xchgl %eax,(%ecx)
313 ret
314
315 DEBUG_INFO __kmp_xchg_fixed32
316
317
318//
319// kmp_int8
320// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
321//
322
323 PROC __kmp_compare_and_store8
324
325 movl 4(%esp), %ecx
326 movb 8(%esp), %al
327 movb 12(%esp), %dl
328 lock
329 cmpxchgb %dl,(%ecx)
330 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
331 and $1, %eax // sign extend previous instruction
332 ret
333
334 DEBUG_INFO __kmp_compare_and_store8
335
336//
337// kmp_int16
338// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
339//
340
341 PROC __kmp_compare_and_store16
342
343 movl 4(%esp), %ecx
344 movw 8(%esp), %ax
345 movw 12(%esp), %dx
346 lock
347 cmpxchgw %dx,(%ecx)
348 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
349 and $1, %eax // sign extend previous instruction
350 ret
351
352 DEBUG_INFO __kmp_compare_and_store16
353
354//
355// kmp_int32
356// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
357//
358
359 PROC __kmp_compare_and_store32
360
361 movl 4(%esp), %ecx
362 movl 8(%esp), %eax
363 movl 12(%esp), %edx
364 lock
365 cmpxchgl %edx,(%ecx)
366 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
367 and $1, %eax // sign extend previous instruction
368 ret
369
370 DEBUG_INFO __kmp_compare_and_store32
371
372//
373// kmp_int32
374// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
375//
376 PROC __kmp_compare_and_store64
377
378 pushl %ebp
379 movl %esp, %ebp
380 pushl %ebx
381 pushl %edi
382 movl 8(%ebp), %edi
383 movl 12(%ebp), %eax // "cv" low order word
384 movl 16(%ebp), %edx // "cv" high order word
385 movl 20(%ebp), %ebx // "sv" low order word
386 movl 24(%ebp), %ecx // "sv" high order word
387 lock
388 cmpxchg8b (%edi)
389 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
390 and $1, %eax // sign extend previous instruction
391 popl %edi
392 popl %ebx
393 movl %ebp, %esp
394 popl %ebp
395 ret
396
397 DEBUG_INFO __kmp_compare_and_store64
398
399//
400// kmp_int8
401// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
402//
403
404 PROC __kmp_compare_and_store_ret8
405
406 movl 4(%esp), %ecx
407 movb 8(%esp), %al
408 movb 12(%esp), %dl
409 lock
410 cmpxchgb %dl,(%ecx)
411 ret
412
413 DEBUG_INFO __kmp_compare_and_store_ret8
414
415//
416// kmp_int16
417// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
418//
419
420 PROC __kmp_compare_and_store_ret16
421
422 movl 4(%esp), %ecx
423 movw 8(%esp), %ax
424 movw 12(%esp), %dx
425 lock
426 cmpxchgw %dx,(%ecx)
427 ret
428
429 DEBUG_INFO __kmp_compare_and_store_ret16
430
431//
432// kmp_int32
433// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
434//
435
436 PROC __kmp_compare_and_store_ret32
437
438 movl 4(%esp), %ecx
439 movl 8(%esp), %eax
440 movl 12(%esp), %edx
441 lock
442 cmpxchgl %edx,(%ecx)
443 ret
444
445 DEBUG_INFO __kmp_compare_and_store_ret32
446
447//
448// kmp_int64
449// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
450//
451 PROC __kmp_compare_and_store_ret64
452
453 pushl %ebp
454 movl %esp, %ebp
455 pushl %ebx
456 pushl %edi
457 movl 8(%ebp), %edi
458 movl 12(%ebp), %eax // "cv" low order word
459 movl 16(%ebp), %edx // "cv" high order word
460 movl 20(%ebp), %ebx // "sv" low order word
461 movl 24(%ebp), %ecx // "sv" high order word
462 lock
463 cmpxchg8b (%edi)
464 popl %edi
465 popl %ebx
466 movl %ebp, %esp
467 popl %ebp
468 ret
469
470 DEBUG_INFO __kmp_compare_and_store_ret64
471
472
473//------------------------------------------------------------------------
474//
475// FUNCTION __kmp_xchg_real32
476//
477// kmp_real32
478// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
479//
480// parameters:
481// addr: 4(%esp)
482// data: 8(%esp)
483//
484// return: %eax
485
486
487 PROC __kmp_xchg_real32
488
489 pushl %ebp
490 movl %esp, %ebp
491 subl $4, %esp
492 pushl %esi
493
494 movl 4(%ebp), %esi
495 flds (%esi)
496 // load <addr>
497 fsts -4(%ebp)
498 // store old value
499
500 movl 8(%ebp), %eax
501
502 lock
503 xchgl %eax, (%esi)
504
505 flds -4(%ebp)
506 // return old value
507
508 popl %esi
509 movl %ebp, %esp
510 popl %ebp
511 ret
512
513 DEBUG_INFO __kmp_xchg_real32
514
515# endif /* !KMP_ASM_INTRINS */
516
517
518//------------------------------------------------------------------------
519//
Jim Cownie5e8470a2013-09-27 10:38:44 +0000520// FUNCTION __kmp_load_x87_fpu_control_word
521//
522// void
523// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
524//
525// parameters:
526// p: 4(%esp)
527//
528
529 PROC __kmp_load_x87_fpu_control_word
530
531 movl 4(%esp), %eax
532 fldcw (%eax)
533 ret
534
535 DEBUG_INFO __kmp_load_x87_fpu_control_word
536
537
538//------------------------------------------------------------------------
539//
540// FUNCTION __kmp_store_x87_fpu_control_word
541//
542// void
543// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
544//
545// parameters:
546// p: 4(%esp)
547//
548
549 PROC __kmp_store_x87_fpu_control_word
550
551 movl 4(%esp), %eax
552 fstcw (%eax)
553 ret
554
555 DEBUG_INFO __kmp_store_x87_fpu_control_word
556
557
558//------------------------------------------------------------------------
559//
560// FUNCTION __kmp_clear_x87_fpu_status_word
561//
562// void
563// __kmp_clear_x87_fpu_status_word();
564//
565//
566
567 PROC __kmp_clear_x87_fpu_status_word
568
569 fnclex
570 ret
571
572 DEBUG_INFO __kmp_clear_x87_fpu_status_word
573
574
575//------------------------------------------------------------------------
576//
577// typedef void (*microtask_t)( int *gtid, int *tid, ... );
578//
579// int
580// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid,
581// int argc, void *p_argv[] ) {
582// (*pkfn)( & gtid, & gtid, argv[0], ... );
583// return 1;
584// }
585
586// -- Begin __kmp_invoke_microtask
587// mark_begin;
588 PROC __kmp_invoke_microtask
589
590 pushl %ebp
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000591 KMP_CFI_DEF_OFFSET 8
592 KMP_CFI_OFFSET ebp,-8
Jim Cownie5e8470a2013-09-27 10:38:44 +0000593 movl %esp,%ebp // establish the base pointer for this routine.
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000594 KMP_CFI_REGISTER ebp
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595 subl $8,%esp // allocate space for two local variables.
596 // These varibales are:
597 // argv: -4(%ebp)
598 // temp: -8(%ebp)
599 //
600 pushl %ebx // save %ebx to use during this routine
Jim Cownie5e8470a2013-09-27 10:38:44 +0000601 movl 20(%ebp),%ebx // Stack alignment - # args
602 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
603 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
604 movl %esp,%eax //
605 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
606 movl %eax,%ebx // Save to %ebx
607 andl $0xFFFFFF80,%eax // mask off 7 bits
608 subl %eax,%ebx // Amount to subtract from %esp
609 subl %ebx,%esp // Prepare the stack ptr --
610 // now it will be aligned on 128-byte boundary at the call
611
612 movl 24(%ebp),%eax // copy from p_argv[]
613 movl %eax,-4(%ebp) // into the local variable *argv.
614
615 movl 20(%ebp),%ebx // argc is 20(%ebp)
616 shll $2,%ebx
617
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000618KMP_LABEL(invoke_2):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000619 cmpl $0,%ebx
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000620 jg KMP_LABEL(invoke_4)
621 jmp KMP_LABEL(invoke_3)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000622 ALIGN 2
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000623KMP_LABEL(invoke_4):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000624 movl -4(%ebp),%eax
625 subl $4,%ebx // decrement argc.
626 addl %ebx,%eax // index into argv.
627 movl (%eax),%edx
628 pushl %edx
629
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000630 jmp KMP_LABEL(invoke_2)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000631 ALIGN 2
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000632KMP_LABEL(invoke_3):
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633 leal 16(%ebp),%eax // push & tid
634 pushl %eax
635
636 leal 12(%ebp),%eax // push & gtid
637 pushl %eax
638
639 movl 8(%ebp),%ebx
640 call *%ebx // call (*pkfn)();
641
642 movl $1,%eax // return 1;
643
644 movl -12(%ebp),%ebx // restore %ebx
645 leave
Andrey Churbanov054c50bf2015-02-10 18:51:52 +0000646 KMP_CFI_DEF esp,4
Jim Cownie5e8470a2013-09-27 10:38:44 +0000647 ret
648
649 DEBUG_INFO __kmp_invoke_microtask
650// -- End __kmp_invoke_microtask
651
652
653// kmp_uint64
654// __kmp_hardware_timestamp(void)
655 PROC __kmp_hardware_timestamp
656 rdtsc
657 ret
658
659 DEBUG_INFO __kmp_hardware_timestamp
660// -- End __kmp_hardware_timestamp
661
662// -----------------------------------------------------------------------
663#endif /* KMP_ARCH_X86 */
664
665
666#if KMP_ARCH_X86_64
667
668// -----------------------------------------------------------------------
669// microtasking routines specifically written for IA-32 architecture and
670// Intel(R) 64 running Linux* OS
671// -----------------------------------------------------------------------
672
673// -- Machine type P
674// mark_description "Intel Corporation";
675 .ident "Intel Corporation"
676// -- .file "z_Linux_asm.s"
677 .data
678 ALIGN 4
679
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000680// To prevent getting our code into .data section .text added to every routine definition for x86_64.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681//------------------------------------------------------------------------
682//
683// FUNCTION __kmp_x86_cpuid
684//
685// void
686// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
687//
688// parameters:
689// mode: %edi
690// mode2: %esi
691// cpuid_buffer: %rdx
692
693 .text
694 PROC __kmp_x86_cpuid
695
696 pushq %rbp
697 movq %rsp,%rbp
698 pushq %rbx // callee-save register
699
700 movl %esi, %ecx // "mode2"
701 movl %edi, %eax // "mode"
702 movq %rdx, %rsi // cpuid_buffer
703 cpuid // Query the CPUID for the current processor
704
705 movl %eax, 0(%rsi) // store results into buffer
706 movl %ebx, 4(%rsi)
707 movl %ecx, 8(%rsi)
708 movl %edx, 12(%rsi)
709
710 popq %rbx // callee-save register
711 movq %rbp, %rsp
712 popq %rbp
713 ret
714
715 DEBUG_INFO __kmp_x86_cpuid
716
717
718
719# if !KMP_ASM_INTRINS
720
721//------------------------------------------------------------------------
722//
723// FUNCTION __kmp_test_then_add32
724//
725// kmp_int32
726// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
727//
728// parameters:
729// p: %rdi
730// d: %esi
731//
732// return: %eax
733
734 .text
735 PROC __kmp_test_then_add32
736
737 movl %esi, %eax // "d"
738 lock
739 xaddl %eax,(%rdi)
740 ret
741
742 DEBUG_INFO __kmp_test_then_add32
743
744
745//------------------------------------------------------------------------
746//
747// FUNCTION __kmp_test_then_add64
748//
749// kmp_int64
750// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
751//
752// parameters:
753// p: %rdi
754// d: %rsi
755// return: %rax
756
757 .text
758 PROC __kmp_test_then_add64
759
760 movq %rsi, %rax // "d"
761 lock
762 xaddq %rax,(%rdi)
763 ret
764
765 DEBUG_INFO __kmp_test_then_add64
766
767
768//------------------------------------------------------------------------
769//
770// FUNCTION __kmp_xchg_fixed8
771//
772// kmp_int32
773// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
774//
775// parameters:
776// p: %rdi
777// d: %sil
778//
779// return: %al
780
781 .text
782 PROC __kmp_xchg_fixed8
783
784 movb %sil, %al // "d"
785
786 lock
787 xchgb %al,(%rdi)
788 ret
789
790 DEBUG_INFO __kmp_xchg_fixed8
791
792
793//------------------------------------------------------------------------
794//
795// FUNCTION __kmp_xchg_fixed16
796//
797// kmp_int16
798// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
799//
800// parameters:
801// p: %rdi
802// d: %si
803// return: %ax
804
805 .text
806 PROC __kmp_xchg_fixed16
807
808 movw %si, %ax // "d"
809
810 lock
811 xchgw %ax,(%rdi)
812 ret
813
814 DEBUG_INFO __kmp_xchg_fixed16
815
816
817//------------------------------------------------------------------------
818//
819// FUNCTION __kmp_xchg_fixed32
820//
821// kmp_int32
822// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
823//
824// parameters:
825// p: %rdi
826// d: %esi
827//
828// return: %eax
829
830 .text
831 PROC __kmp_xchg_fixed32
832
833 movl %esi, %eax // "d"
834
835 lock
836 xchgl %eax,(%rdi)
837 ret
838
839 DEBUG_INFO __kmp_xchg_fixed32
840
841
842//------------------------------------------------------------------------
843//
844// FUNCTION __kmp_xchg_fixed64
845//
846// kmp_int64
847// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
848//
849// parameters:
850// p: %rdi
851// d: %rsi
852// return: %rax
853
854 .text
855 PROC __kmp_xchg_fixed64
856
857 movq %rsi, %rax // "d"
858
859 lock
860 xchgq %rax,(%rdi)
861 ret
862
863 DEBUG_INFO __kmp_xchg_fixed64
864
865
866//------------------------------------------------------------------------
867//
868// FUNCTION __kmp_compare_and_store8
869//
870// kmp_int8
871// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
872//
873// parameters:
874// p: %rdi
875// cv: %esi
876// sv: %edx
877//
878// return: %eax
879
880 .text
881 PROC __kmp_compare_and_store8
882
883 movb %sil, %al // "cv"
884 lock
885 cmpxchgb %dl,(%rdi)
886 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
887 andq $1, %rax // sign extend previous instruction for return value
888 ret
889
890 DEBUG_INFO __kmp_compare_and_store8
891
892
893//------------------------------------------------------------------------
894//
895// FUNCTION __kmp_compare_and_store16
896//
897// kmp_int16
898// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
899//
900// parameters:
901// p: %rdi
902// cv: %si
903// sv: %dx
904//
905// return: %eax
906
907 .text
908 PROC __kmp_compare_and_store16
909
910 movw %si, %ax // "cv"
911 lock
912 cmpxchgw %dx,(%rdi)
913 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
914 andq $1, %rax // sign extend previous instruction for return value
915 ret
916
917 DEBUG_INFO __kmp_compare_and_store16
918
919
920//------------------------------------------------------------------------
921//
922// FUNCTION __kmp_compare_and_store32
923//
924// kmp_int32
925// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
926//
927// parameters:
928// p: %rdi
929// cv: %esi
930// sv: %edx
931//
932// return: %eax
933
934 .text
935 PROC __kmp_compare_and_store32
936
937 movl %esi, %eax // "cv"
938 lock
939 cmpxchgl %edx,(%rdi)
940 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
941 andq $1, %rax // sign extend previous instruction for return value
942 ret
943
944 DEBUG_INFO __kmp_compare_and_store32
945
946
947//------------------------------------------------------------------------
948//
949// FUNCTION __kmp_compare_and_store64
950//
951// kmp_int32
952// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
953//
954// parameters:
955// p: %rdi
956// cv: %rsi
957// sv: %rdx
958// return: %eax
959
960 .text
961 PROC __kmp_compare_and_store64
962
963 movq %rsi, %rax // "cv"
964 lock
965 cmpxchgq %rdx,(%rdi)
966 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
967 andq $1, %rax // sign extend previous instruction for return value
968 ret
969
970 DEBUG_INFO __kmp_compare_and_store64
971
972//------------------------------------------------------------------------
973//
974// FUNCTION __kmp_compare_and_store_ret8
975//
976// kmp_int8
977// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
978//
979// parameters:
980// p: %rdi
981// cv: %esi
982// sv: %edx
983//
984// return: %eax
985
986 .text
987 PROC __kmp_compare_and_store_ret8
988
989 movb %sil, %al // "cv"
990 lock
991 cmpxchgb %dl,(%rdi)
992 ret
993
994 DEBUG_INFO __kmp_compare_and_store_ret8
995
996
997//------------------------------------------------------------------------
998//
999// FUNCTION __kmp_compare_and_store_ret16
1000//
1001// kmp_int16
1002// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
1003//
1004// parameters:
1005// p: %rdi
1006// cv: %si
1007// sv: %dx
1008//
1009// return: %eax
1010
1011 .text
1012 PROC __kmp_compare_and_store_ret16
1013
1014 movw %si, %ax // "cv"
1015 lock
1016 cmpxchgw %dx,(%rdi)
1017 ret
1018
1019 DEBUG_INFO __kmp_compare_and_store_ret16
1020
1021
1022//------------------------------------------------------------------------
1023//
1024// FUNCTION __kmp_compare_and_store_ret32
1025//
1026// kmp_int32
1027// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
1028//
1029// parameters:
1030// p: %rdi
1031// cv: %esi
1032// sv: %edx
1033//
1034// return: %eax
1035
1036 .text
1037 PROC __kmp_compare_and_store_ret32
1038
1039 movl %esi, %eax // "cv"
1040 lock
1041 cmpxchgl %edx,(%rdi)
1042 ret
1043
1044 DEBUG_INFO __kmp_compare_and_store_ret32
1045
1046
1047//------------------------------------------------------------------------
1048//
1049// FUNCTION __kmp_compare_and_store_ret64
1050//
1051// kmp_int64
1052// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1053//
1054// parameters:
1055// p: %rdi
1056// cv: %rsi
1057// sv: %rdx
1058// return: %eax
1059
1060 .text
1061 PROC __kmp_compare_and_store_ret64
1062
1063 movq %rsi, %rax // "cv"
1064 lock
1065 cmpxchgq %rdx,(%rdi)
1066 ret
1067
1068 DEBUG_INFO __kmp_compare_and_store_ret64
1069
1070# endif /* !KMP_ASM_INTRINS */
1071
1072
1073# if ! (__MIC__ || __MIC2__)
1074
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075# if !KMP_ASM_INTRINS
1076
1077//------------------------------------------------------------------------
1078//
1079// FUNCTION __kmp_xchg_real32
1080//
1081// kmp_real32
1082// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
1083//
1084// parameters:
1085// addr: %rdi
1086// data: %xmm0 (lower 4 bytes)
1087//
1088// return: %xmm0 (lower 4 bytes)
1089
1090 .text
1091 PROC __kmp_xchg_real32
1092
1093 movd %xmm0, %eax // load "data" to eax
1094
1095 lock
1096 xchgl %eax, (%rdi)
1097
1098 movd %eax, %xmm0 // load old value into return register
1099
1100 ret
1101
1102 DEBUG_INFO __kmp_xchg_real32
1103
1104
1105//------------------------------------------------------------------------
1106//
1107// FUNCTION __kmp_xchg_real64
1108//
1109// kmp_real64
1110// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
1111//
1112// parameters:
1113// addr: %rdi
1114// data: %xmm0 (lower 8 bytes)
1115// return: %xmm0 (lower 8 bytes)
1116//
1117
1118 .text
1119 PROC __kmp_xchg_real64
1120
1121 movd %xmm0, %rax // load "data" to rax
1122
1123 lock
1124 xchgq %rax, (%rdi)
1125
1126 movd %rax, %xmm0 // load old value into return register
1127 ret
1128
1129 DEBUG_INFO __kmp_xchg_real64
1130
1131
1132# endif /* !(__MIC__ || __MIC2__) */
1133
1134# endif /* !KMP_ASM_INTRINS */
1135
1136
1137//------------------------------------------------------------------------
1138//
1139// FUNCTION __kmp_load_x87_fpu_control_word
1140//
1141// void
1142// __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1143//
1144// parameters:
1145// p: %rdi
1146//
1147
1148 .text
1149 PROC __kmp_load_x87_fpu_control_word
1150
1151 fldcw (%rdi)
1152 ret
1153
1154 DEBUG_INFO __kmp_load_x87_fpu_control_word
1155
1156
1157//------------------------------------------------------------------------
1158//
1159// FUNCTION __kmp_store_x87_fpu_control_word
1160//
1161// void
1162// __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1163//
1164// parameters:
1165// p: %rdi
1166//
1167
1168 .text
1169 PROC __kmp_store_x87_fpu_control_word
1170
1171 fstcw (%rdi)
1172 ret
1173
1174 DEBUG_INFO __kmp_store_x87_fpu_control_word
1175
1176
1177//------------------------------------------------------------------------
1178//
1179// FUNCTION __kmp_clear_x87_fpu_status_word
1180//
1181// void
1182// __kmp_clear_x87_fpu_status_word();
1183//
1184//
1185
1186 .text
1187 PROC __kmp_clear_x87_fpu_status_word
1188
1189#if __MIC__ || __MIC2__
1190// TODO: remove the workaround for problem with fnclex instruction (no CQ known)
1191 fstenv -32(%rsp) // store FP env
1192 andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW
1193 fldenv -32(%rsp) // load FP env back
1194 ret
1195#else
1196 fnclex
1197 ret
1198#endif
1199
1200 DEBUG_INFO __kmp_clear_x87_fpu_status_word
1201
1202
1203//------------------------------------------------------------------------
1204//
1205// typedef void (*microtask_t)( int *gtid, int *tid, ... );
1206//
1207// int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001208// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
Jim Cownie5e8470a2013-09-27 10:38:44 +00001209// int gtid, int tid,
1210// int argc, void *p_argv[] ) {
1211// (*pkfn)( & gtid, & tid, argv[0], ... );
1212// return 1;
1213// }
1214//
1215// note:
1216// at call to pkfn must have %rsp 128-byte aligned for compiler
1217//
1218// parameters:
1219// %rdi: pkfn
1220// %esi: gtid
1221// %edx: tid
1222// %ecx: argc
1223// %r8: p_argv
1224//
1225// locals:
1226// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
1227// __tid: tid parm pushed on stack so can pass &tid to pkfn
1228//
1229// reg temps:
1230// %rax: used all over the place
1231// %rdx: used in stack pointer alignment calculation
1232// %r11: used to traverse p_argv array
1233// %rsi: used as temporary for stack parameters
1234// used as temporary for number of pkfn parms to push
1235// %rbx: used to hold pkfn address, and zero constant, callee-save
1236//
1237// return: %eax (always 1/TRUE)
1238//
1239
1240__gtid = -16
1241__tid = -24
1242
1243// -- Begin __kmp_invoke_microtask
1244// mark_begin;
1245 .text
1246 PROC __kmp_invoke_microtask
1247
1248 pushq %rbp // save base pointer
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001249 KMP_CFI_DEF_OFFSET 16
1250 KMP_CFI_OFFSET rbp,-16
Jim Cownie5e8470a2013-09-27 10:38:44 +00001251 movq %rsp,%rbp // establish the base pointer for this routine.
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001252 KMP_CFI_REGISTER rbp
Jim Cownie5e8470a2013-09-27 10:38:44 +00001253 pushq %rbx // %rbx is callee-saved register
Jim Cownie5e8470a2013-09-27 10:38:44 +00001254 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
1255 pushq %rdx // Put tid on stack so can pass &tid to pkfn
1256
1257 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
1258 movq $0, %rbx // constant for cmovs later
1259 subq $4, %rax // subtract four args passed in registers to pkfn
1260#if __MIC__ || __MIC2__
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001261 js KMP_LABEL(kmp_0) // jump to movq
1262 jmp KMP_LABEL(kmp_0_exit) // jump ahead
1263KMP_LABEL(kmp_0):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001264 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001265KMP_LABEL(kmp_0_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001266#else
1267 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
1268#endif // __MIC__ || __MIC2__
1269
1270 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
1271 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
1272
1273 movq %rsp, %rdx //
1274 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
1275 // without align, stack ptr would be this
1276 movq %rdx, %rax // Save to %rax
1277
1278 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
1279 subq %rax, %rdx // Amount to subtract from %rsp
1280 subq %rdx, %rsp // Prepare the stack ptr --
1281 // now %rsp will align to 128-byte boundary at call site
1282
1283 // setup pkfn parameter reg and stack
1284 movq %rcx, %rax // argc -> %rax
1285 cmpq $0, %rsi
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001286 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
Jim Cownie5e8470a2013-09-27 10:38:44 +00001287 shlq $3, %rcx // argc*8 -> %rcx
1288 movq %r8, %rdx // p_argv -> %rdx
1289 addq %rcx, %rdx // &p_argv[argc] -> %rdx
1290
1291 movq %rsi, %rcx // max (0, argc-4) -> %rcx
1292
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001293KMP_LABEL(kmp_invoke_push_parms):
1294 // push nth - 7th parms to pkfn on stack
Jim Cownie5e8470a2013-09-27 10:38:44 +00001295 subq $8, %rdx // decrement p_argv pointer to previous parm
1296 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
1297 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
1298 subl $1, %ecx
1299
1300// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
1301// if the name of the label that is an operand of this jecxz starts with a dot (".");
1302// Apple's linker does not support 1-byte length relocation;
1303// Resolution: replace all .labelX entries with L_labelX.
1304
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001305 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
1306 jmp KMP_LABEL(kmp_invoke_push_parms)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001307 ALIGN 3
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001308KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001309 // order here is important to avoid trashing
1310 // registers used for both input and output parms!
1311 movq %rdi, %rbx // pkfn -> %rbx
1312 leaq __gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
1313 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
1314
1315 movq %r8, %r11 // p_argv -> %r11
1316
1317#if __MIC__ || __MIC2__
1318 cmpq $4, %rax // argc >= 4?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001319 jns KMP_LABEL(kmp_4) // jump to movq
1320 jmp KMP_LABEL(kmp_4_exit) // jump ahead
1321KMP_LABEL(kmp_4):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001322 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001323KMP_LABEL(kmp_4_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001324
1325 cmpq $3, %rax // argc >= 3?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001326 jns KMP_LABEL(kmp_3) // jump to movq
1327 jmp KMP_LABEL(kmp_3_exit) // jump ahead
1328KMP_LABEL(kmp_3):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001329 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001330KMP_LABEL(kmp_3_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001331
1332 cmpq $2, %rax // argc >= 2?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001333 jns KMP_LABEL(kmp_2) // jump to movq
1334 jmp KMP_LABEL(kmp_2_exit) // jump ahead
1335KMP_LABEL(kmp_2):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001336 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001337KMP_LABEL(kmp_2_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001338
1339 cmpq $1, %rax // argc >= 1?
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001340 jns KMP_LABEL(kmp_1) // jump to movq
1341 jmp KMP_LABEL(kmp_1_exit) // jump ahead
1342KMP_LABEL(kmp_1):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001343 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001344KMP_LABEL(kmp_1_exit):
Jim Cownie5e8470a2013-09-27 10:38:44 +00001345#else
1346 cmpq $4, %rax // argc >= 4?
1347 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1348
1349 cmpq $3, %rax // argc >= 3?
1350 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1351
1352 cmpq $2, %rax // argc >= 2?
1353 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1354
1355 cmpq $1, %rax // argc >= 1?
1356 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
1357#endif // __MIC__ || __MIC2__
1358
1359 call *%rbx // call (*pkfn)();
1360 movq $1, %rax // move 1 into return register;
1361
1362 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
1363 movq %rbp, %rsp // restore stack pointer
1364 popq %rbp // restore frame pointer
Andrey Churbanov054c50bf2015-02-10 18:51:52 +00001365 KMP_CFI_DEF rsp,8
Jim Cownie5e8470a2013-09-27 10:38:44 +00001366 ret
1367
1368 DEBUG_INFO __kmp_invoke_microtask
1369// -- End __kmp_invoke_microtask
1370
1371// kmp_uint64
1372// __kmp_hardware_timestamp(void)
1373 .text
1374 PROC __kmp_hardware_timestamp
1375 rdtsc
1376 shlq $32, %rdx
1377 orq %rdx, %rax
1378 ret
1379
1380 DEBUG_INFO __kmp_hardware_timestamp
1381// -- End __kmp_hardware_timestamp
1382
1383//------------------------------------------------------------------------
1384//
1385// FUNCTION __kmp_bsr32
1386//
1387// int
1388// __kmp_bsr32( int );
1389//
1390
1391 .text
1392 PROC __kmp_bsr32
1393
1394 bsr %edi,%eax
1395 ret
1396
1397 DEBUG_INFO __kmp_bsr32
1398
1399
1400// -----------------------------------------------------------------------
1401#endif /* KMP_ARCH_X86_64 */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001402
1403#if KMP_ARCH_ARM
1404 .data
1405 .comm .gomp_critical_user_,32,8
1406 .data
1407 .align 4
1408 .global __kmp_unnamed_critical_addr
1409__kmp_unnamed_critical_addr:
1410 .4byte .gomp_critical_user_
1411 .size __kmp_unnamed_critical_addr,4
1412#endif /* KMP_ARCH_ARM */
1413
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001414#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
Jim Cownie3051f972014-08-07 10:12:54 +00001415 .data
1416 .comm .gomp_critical_user_,32,8
1417 .data
1418 .align 8
1419 .global __kmp_unnamed_critical_addr
1420__kmp_unnamed_critical_addr:
1421 .8byte .gomp_critical_user_
1422 .size __kmp_unnamed_critical_addr,8
Andrey Churbanovcbda8682015-01-13 14:43:35 +00001423#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */
Jim Cownie181b4bb2013-12-23 17:28:57 +00001424
1425#if defined(__linux__)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001426# if KMP_ARCH_ARM
1427.section .note.GNU-stack,"",%progbits
1428# else
Jim Cownie181b4bb2013-12-23 17:28:57 +00001429.section .note.GNU-stack,"",@progbits
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001430# endif
Jim Cownie181b4bb2013-12-23 17:28:57 +00001431#endif