blob: 7d0e32e107e4de3a9376e27292a9ae2060ad71be [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001; z_Windows_NT-586_asm.asm: - microtasking routines specifically
2; written for IA-32 architecture and Intel(R) 64 running Windows* OS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003
4;
5;//===----------------------------------------------------------------------===//
6;//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007;// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8;// See https://llvm.org/LICENSE.txt for license information.
9;// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010;//
11;//===----------------------------------------------------------------------===//
12;
13
14 TITLE z_Windows_NT-586_asm.asm
15
16; ============================= IA-32 architecture ==========================
17ifdef _M_IA32
18
19 .586P
20
21if @Version gt 510
22 .model HUGE
23else
24_TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
25_TEXT ENDS
26_DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
27_DATA ENDS
28CONST SEGMENT DWORD USE32 PUBLIC 'CONST'
29CONST ENDS
30_BSS SEGMENT DWORD USE32 PUBLIC 'BSS'
31_BSS ENDS
32$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM'
33$$SYMBOLS ENDS
34$$TYPES SEGMENT BYTE USE32 'DEBTYP'
35$$TYPES ENDS
36_TLS SEGMENT DWORD USE32 PUBLIC 'TLS'
37_TLS ENDS
38FLAT GROUP _DATA, CONST, _BSS
39 ASSUME CS: FLAT, DS: FLAT, SS: FLAT
40endif
41
42
43;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +000044; FUNCTION ___kmp_x86_pause
45;
46; void
47; __kmp_x86_pause( void )
Jim Cownie5e8470a2013-09-27 10:38:44 +000048PUBLIC ___kmp_x86_pause
49_p$ = 4
50_d$ = 8
51_TEXT SEGMENT
52 ALIGN 16
53___kmp_x86_pause PROC NEAR
54
55 db 0f3H
56 db 090H ;; pause
57 ret
58
59___kmp_x86_pause ENDP
60_TEXT ENDS
61
62;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +000063; FUNCTION ___kmp_x86_cpuid
64;
65; void
66; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
Jim Cownie5e8470a2013-09-27 10:38:44 +000067PUBLIC ___kmp_x86_cpuid
68_TEXT SEGMENT
69 ALIGN 16
70_mode$ = 8
71_mode2$ = 12
72_p$ = 16
73_eax$ = 0
74_ebx$ = 4
75_ecx$ = 8
76_edx$ = 12
77
78___kmp_x86_cpuid PROC NEAR
79
80 push ebp
81 mov ebp, esp
82
83 push edi
84 push ebx
85 push ecx
86 push edx
87
88 mov eax, DWORD PTR _mode$[ebp]
89 mov ecx, DWORD PTR _mode2$[ebp]
90 cpuid ; Query the CPUID for the current processor
91
92 mov edi, DWORD PTR _p$[ebp]
93 mov DWORD PTR _eax$[ edi ], eax
94 mov DWORD PTR _ebx$[ edi ], ebx
95 mov DWORD PTR _ecx$[ edi ], ecx
96 mov DWORD PTR _edx$[ edi ], edx
97
98 pop edx
99 pop ecx
100 pop ebx
101 pop edi
102
103 mov esp, ebp
104 pop ebp
105 ret
106
107___kmp_x86_cpuid ENDP
108_TEXT ENDS
109
110;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000111; FUNCTION ___kmp_test_then_add32
112;
113; kmp_int32
114; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115PUBLIC ___kmp_test_then_add32
116_p$ = 4
117_d$ = 8
118_TEXT SEGMENT
119 ALIGN 16
120___kmp_test_then_add32 PROC NEAR
121
122 mov eax, DWORD PTR _d$[esp]
123 mov ecx, DWORD PTR _p$[esp]
124lock xadd DWORD PTR [ecx], eax
125 ret
126
127___kmp_test_then_add32 ENDP
128_TEXT ENDS
129
130;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131; FUNCTION ___kmp_compare_and_store8
132;
133; kmp_int8
134; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000135PUBLIC ___kmp_compare_and_store8
136_TEXT SEGMENT
137 ALIGN 16
138_p$ = 4
139_cv$ = 8
140_sv$ = 12
141
142___kmp_compare_and_store8 PROC NEAR
143
144 mov ecx, DWORD PTR _p$[esp]
145 mov al, BYTE PTR _cv$[esp]
146 mov dl, BYTE PTR _sv$[esp]
147lock cmpxchg BYTE PTR [ecx], dl
148 sete al ; if al == [ecx] set al = 1 else set al = 0
149 and eax, 1 ; sign extend previous instruction
150 ret
151
152___kmp_compare_and_store8 ENDP
153_TEXT ENDS
154
155;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000156; FUNCTION ___kmp_compare_and_store16
157;
158; kmp_int16
159; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000160PUBLIC ___kmp_compare_and_store16
161_TEXT SEGMENT
162 ALIGN 16
163_p$ = 4
164_cv$ = 8
165_sv$ = 12
166
167___kmp_compare_and_store16 PROC NEAR
168
169 mov ecx, DWORD PTR _p$[esp]
170 mov ax, WORD PTR _cv$[esp]
171 mov dx, WORD PTR _sv$[esp]
172lock cmpxchg WORD PTR [ecx], dx
173 sete al ; if ax == [ecx] set al = 1 else set al = 0
174 and eax, 1 ; sign extend previous instruction
175 ret
176
177___kmp_compare_and_store16 ENDP
178_TEXT ENDS
179
180;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181; FUNCTION ___kmp_compare_and_store32
182;
183; kmp_int32
184; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000185PUBLIC ___kmp_compare_and_store32
186_TEXT SEGMENT
187 ALIGN 16
188_p$ = 4
189_cv$ = 8
190_sv$ = 12
191
192___kmp_compare_and_store32 PROC NEAR
193
194 mov ecx, DWORD PTR _p$[esp]
195 mov eax, DWORD PTR _cv$[esp]
196 mov edx, DWORD PTR _sv$[esp]
197lock cmpxchg DWORD PTR [ecx], edx
198 sete al ; if eax == [ecx] set al = 1 else set al = 0
199 and eax, 1 ; sign extend previous instruction
200 ret
201
202___kmp_compare_and_store32 ENDP
203_TEXT ENDS
204
205;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206; FUNCTION ___kmp_compare_and_store64
207;
208; kmp_int32
209; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210PUBLIC ___kmp_compare_and_store64
211_TEXT SEGMENT
212 ALIGN 16
213_p$ = 8
214_cv_low$ = 12
215_cv_high$ = 16
216_sv_low$ = 20
217_sv_high$ = 24
218
219___kmp_compare_and_store64 PROC NEAR
220
221 push ebp
222 mov ebp, esp
223 push ebx
224 push edi
225 mov edi, DWORD PTR _p$[ebp]
226 mov eax, DWORD PTR _cv_low$[ebp]
227 mov edx, DWORD PTR _cv_high$[ebp]
228 mov ebx, DWORD PTR _sv_low$[ebp]
229 mov ecx, DWORD PTR _sv_high$[ebp]
230lock cmpxchg8b QWORD PTR [edi]
231 sete al ; if edx:eax == [edi] set al = 1 else set al = 0
232 and eax, 1 ; sign extend previous instruction
233 pop edi
234 pop ebx
235 mov esp, ebp
236 pop ebp
237 ret
238
239___kmp_compare_and_store64 ENDP
240_TEXT ENDS
241
242;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000243; FUNCTION ___kmp_xchg_fixed8
244;
245; kmp_int8
246; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247PUBLIC ___kmp_xchg_fixed8
248_TEXT SEGMENT
249 ALIGN 16
250_p$ = 4
251_d$ = 8
252
253___kmp_xchg_fixed8 PROC NEAR
254
255 mov ecx, DWORD PTR _p$[esp]
256 mov al, BYTE PTR _d$[esp]
257lock xchg BYTE PTR [ecx], al
258 ret
259
260___kmp_xchg_fixed8 ENDP
261_TEXT ENDS
262
263;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000264; FUNCTION ___kmp_xchg_fixed16
265;
266; kmp_int16
267; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000268PUBLIC ___kmp_xchg_fixed16
269_TEXT SEGMENT
270 ALIGN 16
271_p$ = 4
272_d$ = 8
273
274___kmp_xchg_fixed16 PROC NEAR
275
276 mov ecx, DWORD PTR _p$[esp]
277 mov ax, WORD PTR _d$[esp]
278lock xchg WORD PTR [ecx], ax
279 ret
280
281___kmp_xchg_fixed16 ENDP
282_TEXT ENDS
283
284;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000285; FUNCTION ___kmp_xchg_fixed32
286;
287; kmp_int32
288; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289PUBLIC ___kmp_xchg_fixed32
290_TEXT SEGMENT
291 ALIGN 16
292_p$ = 4
293_d$ = 8
294
295___kmp_xchg_fixed32 PROC NEAR
296
297 mov ecx, DWORD PTR _p$[esp]
298 mov eax, DWORD PTR _d$[esp]
299lock xchg DWORD PTR [ecx], eax
300 ret
301
302___kmp_xchg_fixed32 ENDP
303_TEXT ENDS
304
305
306;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307; FUNCTION ___kmp_xchg_real32
308;
309; kmp_real32
310; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311PUBLIC ___kmp_xchg_real32
312_TEXT SEGMENT
313 ALIGN 16
314_p$ = 8
315_d$ = 12
316_old_value$ = -4
317
318___kmp_xchg_real32 PROC NEAR
319
320 push ebp
321 mov ebp, esp
322 sub esp, 4
323 push esi
324 mov esi, DWORD PTR _p$[ebp]
325
326 fld DWORD PTR [esi]
327 ;; load <addr>
328 fst DWORD PTR _old_value$[ebp]
329 ;; store into old_value
330
331 mov eax, DWORD PTR _d$[ebp]
332
333lock xchg DWORD PTR [esi], eax
334
335 fld DWORD PTR _old_value$[ebp]
336 ;; return old_value
337 pop esi
338 mov esp, ebp
339 pop ebp
340 ret
341
342___kmp_xchg_real32 ENDP
343_TEXT ENDS
344
345
346;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000347; FUNCTION ___kmp_compare_and_store_ret8
348;
349; kmp_int8
350; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000351PUBLIC ___kmp_compare_and_store_ret8
352_TEXT SEGMENT
353 ALIGN 16
354_p$ = 4
355_cv$ = 8
356_sv$ = 12
357
358___kmp_compare_and_store_ret8 PROC NEAR
359
360 mov ecx, DWORD PTR _p$[esp]
361 mov al, BYTE PTR _cv$[esp]
362 mov dl, BYTE PTR _sv$[esp]
363lock cmpxchg BYTE PTR [ecx], dl
364 ret
365
366___kmp_compare_and_store_ret8 ENDP
367_TEXT ENDS
368
369;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370; FUNCTION ___kmp_compare_and_store_ret16
371;
372; kmp_int16
373; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000374PUBLIC ___kmp_compare_and_store_ret16
375_TEXT SEGMENT
376 ALIGN 16
377_p$ = 4
378_cv$ = 8
379_sv$ = 12
380
381___kmp_compare_and_store_ret16 PROC NEAR
382
383 mov ecx, DWORD PTR _p$[esp]
384 mov ax, WORD PTR _cv$[esp]
385 mov dx, WORD PTR _sv$[esp]
386lock cmpxchg WORD PTR [ecx], dx
387 ret
388
389___kmp_compare_and_store_ret16 ENDP
390_TEXT ENDS
391
392;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000393; FUNCTION ___kmp_compare_and_store_ret32
394;
395; kmp_int32
396; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000397PUBLIC ___kmp_compare_and_store_ret32
398_TEXT SEGMENT
399 ALIGN 16
400_p$ = 4
401_cv$ = 8
402_sv$ = 12
403
404___kmp_compare_and_store_ret32 PROC NEAR
405
406 mov ecx, DWORD PTR _p$[esp]
407 mov eax, DWORD PTR _cv$[esp]
408 mov edx, DWORD PTR _sv$[esp]
409lock cmpxchg DWORD PTR [ecx], edx
410 ret
411
412___kmp_compare_and_store_ret32 ENDP
413_TEXT ENDS
414
415;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416; FUNCTION ___kmp_compare_and_store_ret64
417;
418; kmp_int64
419; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000420PUBLIC ___kmp_compare_and_store_ret64
421_TEXT SEGMENT
422 ALIGN 16
423_p$ = 8
424_cv_low$ = 12
425_cv_high$ = 16
426_sv_low$ = 20
427_sv_high$ = 24
428
429___kmp_compare_and_store_ret64 PROC NEAR
430
431 push ebp
432 mov ebp, esp
433 push ebx
434 push edi
435 mov edi, DWORD PTR _p$[ebp]
436 mov eax, DWORD PTR _cv_low$[ebp]
437 mov edx, DWORD PTR _cv_high$[ebp]
438 mov ebx, DWORD PTR _sv_low$[ebp]
439 mov ecx, DWORD PTR _sv_high$[ebp]
440lock cmpxchg8b QWORD PTR [edi]
441 pop edi
442 pop ebx
443 mov esp, ebp
444 pop ebp
445 ret
446
447___kmp_compare_and_store_ret64 ENDP
448_TEXT ENDS
449
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000451; FUNCTION ___kmp_load_x87_fpu_control_word
452;
453; void
454; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
455;
456; parameters:
457; p: 4(%esp)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000458PUBLIC ___kmp_load_x87_fpu_control_word
459_TEXT SEGMENT
460 ALIGN 16
461_p$ = 4
462
463___kmp_load_x87_fpu_control_word PROC NEAR
464
465 mov eax, DWORD PTR _p$[esp]
466 fldcw WORD PTR [eax]
467 ret
468
469___kmp_load_x87_fpu_control_word ENDP
470_TEXT ENDS
471
472;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000473; FUNCTION ___kmp_store_x87_fpu_control_word
474;
475; void
476; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
477;
478; parameters:
479; p: 4(%esp)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000480PUBLIC ___kmp_store_x87_fpu_control_word
481_TEXT SEGMENT
482 ALIGN 16
483_p$ = 4
484
485___kmp_store_x87_fpu_control_word PROC NEAR
486
487 mov eax, DWORD PTR _p$[esp]
488 fstcw WORD PTR [eax]
489 ret
490
491___kmp_store_x87_fpu_control_word ENDP
492_TEXT ENDS
493
494;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000495; FUNCTION ___kmp_clear_x87_fpu_status_word
496;
497; void
498; __kmp_clear_x87_fpu_status_word();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499PUBLIC ___kmp_clear_x87_fpu_status_word
500_TEXT SEGMENT
501 ALIGN 16
502
503___kmp_clear_x87_fpu_status_word PROC NEAR
504
505 fnclex
506 ret
507
508___kmp_clear_x87_fpu_status_word ENDP
509_TEXT ENDS
510
511
512;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000513; FUNCTION ___kmp_invoke_microtask
514;
515; typedef void (*microtask_t)( int *gtid, int *tid, ... );
516;
517; int
518; __kmp_invoke_microtask( microtask_t pkfn,
519; int gtid, int tid,
520; int argc, void *p_argv[] )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000521PUBLIC ___kmp_invoke_microtask
522_TEXT SEGMENT
523 ALIGN 16
524_pkfn$ = 8
525_gtid$ = 12
526_tid$ = 16
527_argc$ = 20
528_argv$ = 24
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000529if OMPT_SUPPORT
530_exit_frame$ = 28
531endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532_i$ = -8
533_stk_adj$ = -16
534_vptr$ = -12
535_qptr$ = -4
536
537___kmp_invoke_microtask PROC NEAR
538; Line 102
539 push ebp
540 mov ebp, esp
541 sub esp, 16 ; 00000010H
542 push ebx
543 push esi
544 push edi
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000545if OMPT_SUPPORT
546 mov eax, DWORD PTR _exit_frame$[ebp]
547 mov DWORD PTR [eax], ebp
548endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549; Line 114
550 mov eax, DWORD PTR _argc$[ebp]
551 mov DWORD PTR _i$[ebp], eax
552
553;; ------------------------------------------------------------
554 lea edx, DWORD PTR [eax*4+8]
555 mov ecx, esp ; Save current SP into ECX
556 mov eax,edx ; Save the size of the args in eax
557 sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this
558 mov edx,ecx ; Save to edx
559 and ecx,-128 ; Mask off 7 bits
560 sub edx,ecx ; Amount to subtract from esp
561 sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call
562
563 add edx,eax ; Calculate total size of the stack decrement.
564 mov DWORD PTR _stk_adj$[ebp], edx
565;; ------------------------------------------------------------
566
567 jmp SHORT $L22237
568$L22238:
569 mov ecx, DWORD PTR _i$[ebp]
570 sub ecx, 1
571 mov DWORD PTR _i$[ebp], ecx
572$L22237:
573 cmp DWORD PTR _i$[ebp], 0
574 jle SHORT $L22239
575; Line 116
576 mov edx, DWORD PTR _i$[ebp]
577 mov eax, DWORD PTR _argv$[ebp]
578 mov ecx, DWORD PTR [eax+edx*4-4]
579 mov DWORD PTR _vptr$[ebp], ecx
580; Line 123
581 mov eax, DWORD PTR _vptr$[ebp]
582; Line 124
583 push eax
584; Line 127
585 jmp SHORT $L22238
586$L22239:
587; Line 129
588 lea edx, DWORD PTR _tid$[ebp]
589 mov DWORD PTR _vptr$[ebp], edx
590; Line 130
591 lea eax, DWORD PTR _gtid$[ebp]
592 mov DWORD PTR _qptr$[ebp], eax
593; Line 143
594 mov eax, DWORD PTR _vptr$[ebp]
595; Line 144
596 push eax
597; Line 145
598 mov eax, DWORD PTR _qptr$[ebp]
599; Line 146
600 push eax
601; Line 147
602 call DWORD PTR _pkfn$[ebp]
603; Line 148
604 add esp, DWORD PTR _stk_adj$[ebp]
605; Line 152
606 mov eax, 1
607; Line 153
608 pop edi
609 pop esi
610 pop ebx
611 mov esp, ebp
612 pop ebp
613 ret 0
614___kmp_invoke_microtask ENDP
615_TEXT ENDS
616
617endif
618
619; ==================================== Intel(R) 64 ===================================
620
621ifdef _M_AMD64
622
623;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000624; FUNCTION __kmp_x86_cpuid
625;
626; void
627; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
628;
629; parameters:
630; mode: ecx
631; mode2: edx
632; cpuid_buffer: r8
Jim Cownie5e8470a2013-09-27 10:38:44 +0000633PUBLIC __kmp_x86_cpuid
634_TEXT SEGMENT
635 ALIGN 16
636
637__kmp_x86_cpuid PROC FRAME ;NEAR
638
639 push rbp
640 .pushreg rbp
641 mov rbp, rsp
642 .setframe rbp, 0
643 push rbx ; callee-save register
644 .pushreg rbx
645 .ENDPROLOG
646
647 mov r10, r8 ; p parameter
648 mov eax, ecx ; mode parameter
649 mov ecx, edx ; mode2 parameter
650 cpuid ; Query the CPUID for the current processor
651
652 mov DWORD PTR 0[ r10 ], eax ; store results into buffer
653 mov DWORD PTR 4[ r10 ], ebx
654 mov DWORD PTR 8[ r10 ], ecx
655 mov DWORD PTR 12[ r10 ], edx
656
657 pop rbx ; callee-save register
658 mov rsp, rbp
659 pop rbp
660 ret
661
662__kmp_x86_cpuid ENDP
663_TEXT ENDS
664
665
666;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000667; FUNCTION __kmp_test_then_add32
668;
669; kmp_int32
670; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
671;
672; parameters:
673; p: rcx
674; d: edx
675;
676; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677PUBLIC __kmp_test_then_add32
678_TEXT SEGMENT
679 ALIGN 16
680__kmp_test_then_add32 PROC ;NEAR
681
682 mov eax, edx
683lock xadd DWORD PTR [rcx], eax
684 ret
685
686__kmp_test_then_add32 ENDP
687_TEXT ENDS
688
689
690;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000691; FUNCTION __kmp_test_then_add64
692;
693; kmp_int32
694; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
695;
696; parameters:
697; p: rcx
698; d: rdx
699;
700; return: rax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000701PUBLIC __kmp_test_then_add64
702_TEXT SEGMENT
703 ALIGN 16
704__kmp_test_then_add64 PROC ;NEAR
705
706 mov rax, rdx
707lock xadd QWORD PTR [rcx], rax
708 ret
709
710__kmp_test_then_add64 ENDP
711_TEXT ENDS
712
713
714;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715; FUNCTION __kmp_compare_and_store8
716;
717; kmp_int8
718; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
719; parameters:
720; p: rcx
721; cv: edx
722; sv: r8d
723;
724; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000725PUBLIC __kmp_compare_and_store8
726_TEXT SEGMENT
727 ALIGN 16
728
729__kmp_compare_and_store8 PROC ;NEAR
730
731 mov al, dl ; "cv"
732 mov edx, r8d ; "sv"
733lock cmpxchg BYTE PTR [rcx], dl
734 sete al ; if al == [rcx] set al = 1 else set al = 0
735 and rax, 1 ; sign extend previous instruction
736 ret
737
738__kmp_compare_and_store8 ENDP
739_TEXT ENDS
740
741
742;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000743; FUNCTION __kmp_compare_and_store16
744;
745; kmp_int16
746; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
747; parameters:
748; p: rcx
749; cv: edx
750; sv: r8d
751;
752; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000753PUBLIC __kmp_compare_and_store16
754_TEXT SEGMENT
755 ALIGN 16
756
757__kmp_compare_and_store16 PROC ;NEAR
758
759 mov ax, dx ; "cv"
760 mov edx, r8d ; "sv"
761lock cmpxchg WORD PTR [rcx], dx
762 sete al ; if ax == [rcx] set al = 1 else set al = 0
763 and rax, 1 ; sign extend previous instruction
764 ret
765
766__kmp_compare_and_store16 ENDP
767_TEXT ENDS
768
769
770;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771; FUNCTION __kmp_compare_and_store32
772;
773; kmp_int32
774; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
775; parameters:
776; p: rcx
777; cv: edx
778; sv: r8d
779;
780; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000781PUBLIC __kmp_compare_and_store32
782_TEXT SEGMENT
783 ALIGN 16
784
785__kmp_compare_and_store32 PROC ;NEAR
786
787 mov eax, edx ; "cv"
788 mov edx, r8d ; "sv"
789lock cmpxchg DWORD PTR [rcx], edx
790 sete al ; if eax == [rcx] set al = 1 else set al = 0
791 and rax, 1 ; sign extend previous instruction
792 ret
793
794__kmp_compare_and_store32 ENDP
795_TEXT ENDS
796
797
798;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000799; FUNCTION __kmp_compare_and_store64
800;
801; kmp_int32
802; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
803; parameters:
804; p: rcx
805; cv: rdx
806; sv: r8
807;
808; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000809PUBLIC __kmp_compare_and_store64
810_TEXT SEGMENT
811 ALIGN 16
812
813__kmp_compare_and_store64 PROC ;NEAR
814
815 mov rax, rdx ; "cv"
816 mov rdx, r8 ; "sv"
817lock cmpxchg QWORD PTR [rcx], rdx
818 sete al ; if rax == [rcx] set al = 1 else set al = 0
819 and rax, 1 ; sign extend previous instruction
820 ret
821
822__kmp_compare_and_store64 ENDP
823_TEXT ENDS
824
825
826;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000827; FUNCTION ___kmp_xchg_fixed8
828;
829; kmp_int8
830; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
831;
832; parameters:
833; p: rcx
834; d: dl
835;
836; return: al
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837PUBLIC __kmp_xchg_fixed8
838_TEXT SEGMENT
839 ALIGN 16
840
841__kmp_xchg_fixed8 PROC ;NEAR
842
843 mov al, dl
844lock xchg BYTE PTR [rcx], al
845 ret
846
847__kmp_xchg_fixed8 ENDP
848_TEXT ENDS
849
850
851;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000852; FUNCTION ___kmp_xchg_fixed16
853;
854; kmp_int16
855; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
856;
857; parameters:
858; p: rcx
859; d: dx
860;
861; return: ax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000862PUBLIC __kmp_xchg_fixed16
863_TEXT SEGMENT
864 ALIGN 16
865
866__kmp_xchg_fixed16 PROC ;NEAR
867
868 mov ax, dx
869lock xchg WORD PTR [rcx], ax
870 ret
871
872__kmp_xchg_fixed16 ENDP
873_TEXT ENDS
874
875
876;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000877; FUNCTION ___kmp_xchg_fixed32
878;
879; kmp_int32
880; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
881;
882; parameters:
883; p: rcx
884; d: edx
885;
886; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000887PUBLIC __kmp_xchg_fixed32
888_TEXT SEGMENT
889 ALIGN 16
890__kmp_xchg_fixed32 PROC ;NEAR
891
892 mov eax, edx
893lock xchg DWORD PTR [rcx], eax
894 ret
895
896__kmp_xchg_fixed32 ENDP
897_TEXT ENDS
898
899
900;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000901; FUNCTION ___kmp_xchg_fixed64
902;
903; kmp_int64
904; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
905;
906; parameters:
907; p: rcx
908; d: rdx
909;
910; return: rax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000911PUBLIC __kmp_xchg_fixed64
912_TEXT SEGMENT
913 ALIGN 16
914__kmp_xchg_fixed64 PROC ;NEAR
915
916 mov rax, rdx
917lock xchg QWORD PTR [rcx], rax
918 ret
919
920__kmp_xchg_fixed64 ENDP
921_TEXT ENDS
922
923
924;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000925; FUNCTION __kmp_compare_and_store_ret8
926;
927; kmp_int8
928; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
929; parameters:
930; p: rcx
931; cv: edx
932; sv: r8d
933;
934; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000935PUBLIC __kmp_compare_and_store_ret8
936_TEXT SEGMENT
937 ALIGN 16
938
939__kmp_compare_and_store_ret8 PROC ;NEAR
940 mov al, dl ; "cv"
941 mov edx, r8d ; "sv"
942lock cmpxchg BYTE PTR [rcx], dl
943 ; Compare AL with [rcx]. If equal set
944 ; ZF and exchange DL with [rcx]. Else, clear
945 ; ZF and load [rcx] into AL.
946 ret
947
948__kmp_compare_and_store_ret8 ENDP
949_TEXT ENDS
950
951
952;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000953; FUNCTION __kmp_compare_and_store_ret16
954;
955; kmp_int16
956; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
957; parameters:
958; p: rcx
959; cv: edx
960; sv: r8d
961;
962; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000963PUBLIC __kmp_compare_and_store_ret16
964_TEXT SEGMENT
965 ALIGN 16
966
967__kmp_compare_and_store_ret16 PROC ;NEAR
968
969 mov ax, dx ; "cv"
970 mov edx, r8d ; "sv"
971lock cmpxchg WORD PTR [rcx], dx
972 ret
973
974__kmp_compare_and_store_ret16 ENDP
975_TEXT ENDS
976
977
978;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +0000979; FUNCTION __kmp_compare_and_store_ret32
980;
981; kmp_int32
982; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
983; parameters:
984; p: rcx
985; cv: edx
986; sv: r8d
987;
988; return: eax
Jim Cownie5e8470a2013-09-27 10:38:44 +0000989PUBLIC __kmp_compare_and_store_ret32
990_TEXT SEGMENT
991 ALIGN 16
992
993__kmp_compare_and_store_ret32 PROC ;NEAR
994
995 mov eax, edx ; "cv"
996 mov edx, r8d ; "sv"
997lock cmpxchg DWORD PTR [rcx], edx
998 ret
999
1000__kmp_compare_and_store_ret32 ENDP
1001_TEXT ENDS
1002
1003
1004;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001005; FUNCTION __kmp_compare_and_store_ret64
1006;
1007; kmp_int64
1008; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1009; parameters:
1010; p: rcx
1011; cv: rdx
1012; sv: r8
1013;
1014; return: rax
Jim Cownie5e8470a2013-09-27 10:38:44 +00001015PUBLIC __kmp_compare_and_store_ret64
1016_TEXT SEGMENT
1017 ALIGN 16
1018
1019__kmp_compare_and_store_ret64 PROC ;NEAR
1020
1021 mov rax, rdx ; "cv"
1022 mov rdx, r8 ; "sv"
1023lock cmpxchg QWORD PTR [rcx], rdx
1024 ret
1025
1026__kmp_compare_and_store_ret64 ENDP
1027_TEXT ENDS
1028
1029
1030;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001031; FUNCTION __kmp_compare_and_store_loop8
1032;
1033; kmp_int8
1034; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
1035; parameters:
1036; p: rcx
1037; cv: edx
1038; sv: r8d
1039;
1040; return: al
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041PUBLIC __kmp_compare_and_store_loop8
1042_TEXT SEGMENT
1043 ALIGN 16
1044
1045__kmp_compare_and_store_loop8 PROC ;NEAR
1046$__kmp_loop:
1047 mov al, dl ; "cv"
1048 mov edx, r8d ; "sv"
1049lock cmpxchg BYTE PTR [rcx], dl
1050 ; Compare AL with [rcx]. If equal set
1051 ; ZF and exchange DL with [rcx]. Else, clear
1052 ; ZF and load [rcx] into AL.
1053 jz SHORT $__kmp_success
1054
1055 db 0f3H
1056 db 090H ; pause
1057
1058 jmp SHORT $__kmp_loop
1059
1060$__kmp_success:
1061 ret
1062
1063__kmp_compare_and_store_loop8 ENDP
1064_TEXT ENDS
1065
1066
1067;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068; FUNCTION __kmp_xchg_real32
1069;
1070; kmp_real32
1071; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
1072;
1073; parameters:
1074; p: rcx
1075; d: xmm1 (lower 4 bytes)
1076;
1077; return: xmm0 (lower 4 bytes)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001078PUBLIC __kmp_xchg_real32
1079_TEXT SEGMENT
1080 ALIGN 16
1081__kmp_xchg_real32 PROC ;NEAR
1082
1083 movd eax, xmm1 ; load d
1084
1085lock xchg DWORD PTR [rcx], eax
1086
1087 movd xmm0, eax ; load old value into return register
1088 ret
1089
1090__kmp_xchg_real32 ENDP
1091_TEXT ENDS
1092
1093
1094;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001095; FUNCTION __kmp_xchg_real64
1096;
1097; kmp_real64
1098; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d );
1099;
1100; parameters:
1101; p: rcx
1102; d: xmm1 (lower 8 bytes)
1103;
1104; return: xmm0 (lower 8 bytes)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001105PUBLIC __kmp_xchg_real64
1106_TEXT SEGMENT
1107 ALIGN 16
1108__kmp_xchg_real64 PROC ;NEAR
1109
1110 movd rax, xmm1 ; load "d"
1111
1112lock xchg QWORD PTR [rcx], rax
1113
1114 movd xmm0, rax ; load old value into return register
1115 ret
1116
1117__kmp_xchg_real64 ENDP
1118_TEXT ENDS
1119
Jim Cownie5e8470a2013-09-27 10:38:44 +00001120;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001121; FUNCTION __kmp_load_x87_fpu_control_word
1122;
1123; void
1124; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1125;
1126; parameters:
1127; p: rcx
Jim Cownie5e8470a2013-09-27 10:38:44 +00001128PUBLIC __kmp_load_x87_fpu_control_word
1129_TEXT SEGMENT
1130 ALIGN 16
1131__kmp_load_x87_fpu_control_word PROC ;NEAR
1132
1133 fldcw WORD PTR [rcx]
1134 ret
1135
1136__kmp_load_x87_fpu_control_word ENDP
1137_TEXT ENDS
1138
1139
1140;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001141; FUNCTION __kmp_store_x87_fpu_control_word
1142;
1143; void
1144; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1145;
1146; parameters:
1147; p: rcx
Jim Cownie5e8470a2013-09-27 10:38:44 +00001148PUBLIC __kmp_store_x87_fpu_control_word
1149_TEXT SEGMENT
1150 ALIGN 16
1151__kmp_store_x87_fpu_control_word PROC ;NEAR
1152
1153 fstcw WORD PTR [rcx]
1154 ret
1155
1156__kmp_store_x87_fpu_control_word ENDP
1157_TEXT ENDS
1158
1159
1160;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001161; FUNCTION __kmp_clear_x87_fpu_status_word
1162;
1163; void
1164; __kmp_clear_x87_fpu_status_word()
Jim Cownie5e8470a2013-09-27 10:38:44 +00001165PUBLIC __kmp_clear_x87_fpu_status_word
1166_TEXT SEGMENT
1167 ALIGN 16
1168__kmp_clear_x87_fpu_status_word PROC ;NEAR
1169
1170 fnclex
1171 ret
1172
1173__kmp_clear_x87_fpu_status_word ENDP
1174_TEXT ENDS
1175
1176
1177;------------------------------------------------------------------------
Jim Cownie5e8470a2013-09-27 10:38:44 +00001178; FUNCTION __kmp_invoke_microtask
1179;
1180; typedef void (*microtask_t)( int *gtid, int *tid, ... );
1181;
1182; int
1183; __kmp_invoke_microtask( microtask_t pkfn,
1184; int gtid, int tid,
1185; int argc, void *p_argv[] ) {
1186;
1187; (*pkfn) ( &gtid, &tid, argv[0], ... );
1188; return 1;
1189; }
1190;
1191; note:
1192; just before call to pkfn must have rsp 128-byte aligned for compiler
1193;
1194; parameters:
1195; rcx: pkfn 16[rbp]
1196; edx: gtid 24[rbp]
1197; r8d: tid 32[rbp]
1198; r9d: argc 40[rbp]
1199; [st]: p_argv 48[rbp]
1200;
1201; reg temps:
1202; rax: used all over the place
1203; rdx: used all over the place
1204; rcx: used as argument counter for push parms loop
1205; r10: used to hold pkfn function pointer argument
1206;
1207; return: eax (always 1/TRUE)
Jim Cownie5e8470a2013-09-27 10:38:44 +00001208$_pkfn = 16
1209$_gtid = 24
1210$_tid = 32
1211$_argc = 40
1212$_p_argv = 48
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001213if OMPT_SUPPORT
1214$_exit_frame = 56
1215endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001216
1217PUBLIC __kmp_invoke_microtask
1218_TEXT SEGMENT
1219 ALIGN 16
1220
1221__kmp_invoke_microtask PROC FRAME ;NEAR
1222 mov QWORD PTR 16[rsp], rdx ; home gtid parameter
1223 mov QWORD PTR 24[rsp], r8 ; home tid parameter
1224 push rbp ; save base pointer
1225 .pushreg rbp
1226 sub rsp, 0 ; no fixed allocation necessary - end prolog
1227
1228 lea rbp, QWORD PTR [rsp] ; establish the base pointer
1229 .setframe rbp, 0
1230 .ENDPROLOG
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001231if OMPT_SUPPORT
1232 mov rax, QWORD PTR $_exit_frame[rbp]
1233 mov QWORD PTR [rax], rbp
1234endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001235 mov r10, rcx ; save pkfn pointer for later
1236
1237;; ------------------------------------------------------------
1238 mov rax, r9 ; rax <= argc
1239 cmp rax, 2
1240 jge SHORT $_kmp_invoke_stack_align
1241 mov rax, 2 ; set 4 homes if less than 2 parms
1242$_kmp_invoke_stack_align:
1243 lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8
1244 mov rax, rsp ; Save current SP into rax
1245 sub rax, rdx ; rsp - ((argc+2)*8) -> rax
1246 ; without align, rsp would be this
1247 and rax, -128 ; Mask off 7 bits (128-byte align)
1248 add rax, rdx ; add space for push's in a loop below
1249 mov rsp, rax ; Prepare the stack ptr
1250 ; Now it will align to 128-byte at the call
1251;; ------------------------------------------------------------
1252 ; setup pkfn parameter stack
1253 mov rax, r9 ; rax <= argc
1254 shl rax, 3 ; rax <= argc*8
1255 mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv
1256 add rdx, rax ; rdx <= &p_argv[argc]
1257 mov rcx, r9 ; rcx <= argc
1258 jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0
1259 cmp ecx, 1 ; if argc=1 branch ahead
1260 je SHORT $_kmp_invoke_one_parm
1261 sub ecx, 2 ; if argc=2 branch ahead, subtract two from
1262 je SHORT $_kmp_invoke_two_parms
1263
1264$_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack
1265 sub rdx, 8 ; decrement p_argv pointer to previous parm
1266 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1]
1267 push r8 ; push p_argv[rcx-1] onto stack (reverse order)
1268 sub ecx, 1
1269 jecxz SHORT $_kmp_invoke_two_parms
1270 jmp SHORT $_kmp_invoke_push_parms
1271
1272$_kmp_invoke_two_parms:
1273 sub rdx, 8 ; put 4th parm to pkfn in r9
1274 mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1]
1275
1276$_kmp_invoke_one_parm:
1277 sub rdx, 8 ; put 3rd parm to pkfn in r8
1278 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0]
1279
1280$_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers
1281 lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn)
1282 lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= &gtid (1st parm to pkfn)
1283 sub rsp, 32 ; add stack space for first four parms
1284 mov rax, r10 ; rax <= pkfn
1285 call rax ; call (*pkfn)()
1286 mov rax, 1 ; move 1 into return register;
1287
1288 lea rsp, QWORD PTR [rbp] ; restore stack pointer
1289
1290; add rsp, 0 ; no fixed allocation necessary - start epilog
1291 pop rbp ; restore frame pointer
1292 ret
1293__kmp_invoke_microtask ENDP
1294_TEXT ENDS
1295
1296endif
1297
1298END