blob: a4f9a38ae7a7020bb88f32d1682b24e8b0dcd960 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001; z_Windows_NT-586_asm.asm: - microtasking routines specifically
2; written for IA-32 architecture and Intel(R) 64 running Windows* OS
Jim Cownie5e8470a2013-09-27 10:38:44 +00003
4;
5;//===----------------------------------------------------------------------===//
6;//
7;// The LLVM Compiler Infrastructure
8;//
9;// This file is dual licensed under the MIT and the University of Illinois Open
10;// Source Licenses. See LICENSE.txt for details.
11;//
12;//===----------------------------------------------------------------------===//
13;
14
15 TITLE z_Windows_NT-586_asm.asm
16
17; ============================= IA-32 architecture ==========================
18ifdef _M_IA32
19
20 .586P
21
22if @Version gt 510
23 .model HUGE
24else
25_TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
26_TEXT ENDS
27_DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
28_DATA ENDS
29CONST SEGMENT DWORD USE32 PUBLIC 'CONST'
30CONST ENDS
31_BSS SEGMENT DWORD USE32 PUBLIC 'BSS'
32_BSS ENDS
33$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM'
34$$SYMBOLS ENDS
35$$TYPES SEGMENT BYTE USE32 'DEBTYP'
36$$TYPES ENDS
37_TLS SEGMENT DWORD USE32 PUBLIC 'TLS'
38_TLS ENDS
39FLAT GROUP _DATA, CONST, _BSS
40 ASSUME CS: FLAT, DS: FLAT, SS: FLAT
41endif
42
43
44;------------------------------------------------------------------------
45;
46; FUNCTION ___kmp_x86_pause
47;
48; void
49; __kmp_x86_pause( void )
50;
51
52PUBLIC ___kmp_x86_pause
53_p$ = 4
54_d$ = 8
55_TEXT SEGMENT
56 ALIGN 16
57___kmp_x86_pause PROC NEAR
58
59 db 0f3H
60 db 090H ;; pause
61 ret
62
63___kmp_x86_pause ENDP
64_TEXT ENDS
65
66;------------------------------------------------------------------------
67;
68; FUNCTION ___kmp_x86_cpuid
69;
70; void
71; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
72;
73
74PUBLIC ___kmp_x86_cpuid
75_TEXT SEGMENT
76 ALIGN 16
77_mode$ = 8
78_mode2$ = 12
79_p$ = 16
80_eax$ = 0
81_ebx$ = 4
82_ecx$ = 8
83_edx$ = 12
84
85___kmp_x86_cpuid PROC NEAR
86
87 push ebp
88 mov ebp, esp
89
90 push edi
91 push ebx
92 push ecx
93 push edx
94
95 mov eax, DWORD PTR _mode$[ebp]
96 mov ecx, DWORD PTR _mode2$[ebp]
97 cpuid ; Query the CPUID for the current processor
98
99 mov edi, DWORD PTR _p$[ebp]
100 mov DWORD PTR _eax$[ edi ], eax
101 mov DWORD PTR _ebx$[ edi ], ebx
102 mov DWORD PTR _ecx$[ edi ], ecx
103 mov DWORD PTR _edx$[ edi ], edx
104
105 pop edx
106 pop ecx
107 pop ebx
108 pop edi
109
110 mov esp, ebp
111 pop ebp
112 ret
113
114___kmp_x86_cpuid ENDP
115_TEXT ENDS
116
117;------------------------------------------------------------------------
118;
119; FUNCTION ___kmp_test_then_add32
120;
121; kmp_int32
122; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
123;
124
125PUBLIC ___kmp_test_then_add32
126_p$ = 4
127_d$ = 8
128_TEXT SEGMENT
129 ALIGN 16
130___kmp_test_then_add32 PROC NEAR
131
132 mov eax, DWORD PTR _d$[esp]
133 mov ecx, DWORD PTR _p$[esp]
134lock xadd DWORD PTR [ecx], eax
135 ret
136
137___kmp_test_then_add32 ENDP
138_TEXT ENDS
139
140;------------------------------------------------------------------------
141;
142; FUNCTION ___kmp_compare_and_store8
143;
144; kmp_int8
145; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
146;
147
148PUBLIC ___kmp_compare_and_store8
149_TEXT SEGMENT
150 ALIGN 16
151_p$ = 4
152_cv$ = 8
153_sv$ = 12
154
155___kmp_compare_and_store8 PROC NEAR
156
157 mov ecx, DWORD PTR _p$[esp]
158 mov al, BYTE PTR _cv$[esp]
159 mov dl, BYTE PTR _sv$[esp]
160lock cmpxchg BYTE PTR [ecx], dl
161 sete al ; if al == [ecx] set al = 1 else set al = 0
162 and eax, 1 ; sign extend previous instruction
163 ret
164
165___kmp_compare_and_store8 ENDP
166_TEXT ENDS
167
168;------------------------------------------------------------------------
169;
170; FUNCTION ___kmp_compare_and_store16
171;
172; kmp_int16
173; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
174;
175
176PUBLIC ___kmp_compare_and_store16
177_TEXT SEGMENT
178 ALIGN 16
179_p$ = 4
180_cv$ = 8
181_sv$ = 12
182
183___kmp_compare_and_store16 PROC NEAR
184
185 mov ecx, DWORD PTR _p$[esp]
186 mov ax, WORD PTR _cv$[esp]
187 mov dx, WORD PTR _sv$[esp]
188lock cmpxchg WORD PTR [ecx], dx
189 sete al ; if ax == [ecx] set al = 1 else set al = 0
190 and eax, 1 ; sign extend previous instruction
191 ret
192
193___kmp_compare_and_store16 ENDP
194_TEXT ENDS
195
196;------------------------------------------------------------------------
197;
198; FUNCTION ___kmp_compare_and_store32
199;
200; kmp_int32
201; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
202;
203
204PUBLIC ___kmp_compare_and_store32
205_TEXT SEGMENT
206 ALIGN 16
207_p$ = 4
208_cv$ = 8
209_sv$ = 12
210
211___kmp_compare_and_store32 PROC NEAR
212
213 mov ecx, DWORD PTR _p$[esp]
214 mov eax, DWORD PTR _cv$[esp]
215 mov edx, DWORD PTR _sv$[esp]
216lock cmpxchg DWORD PTR [ecx], edx
217 sete al ; if eax == [ecx] set al = 1 else set al = 0
218 and eax, 1 ; sign extend previous instruction
219 ret
220
221___kmp_compare_and_store32 ENDP
222_TEXT ENDS
223
224;------------------------------------------------------------------------
225;
226; FUNCTION ___kmp_compare_and_store64
227;
228; kmp_int32
229; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
230;
231
232PUBLIC ___kmp_compare_and_store64
233_TEXT SEGMENT
234 ALIGN 16
235_p$ = 8
236_cv_low$ = 12
237_cv_high$ = 16
238_sv_low$ = 20
239_sv_high$ = 24
240
241___kmp_compare_and_store64 PROC NEAR
242
243 push ebp
244 mov ebp, esp
245 push ebx
246 push edi
247 mov edi, DWORD PTR _p$[ebp]
248 mov eax, DWORD PTR _cv_low$[ebp]
249 mov edx, DWORD PTR _cv_high$[ebp]
250 mov ebx, DWORD PTR _sv_low$[ebp]
251 mov ecx, DWORD PTR _sv_high$[ebp]
252lock cmpxchg8b QWORD PTR [edi]
253 sete al ; if edx:eax == [edi] set al = 1 else set al = 0
254 and eax, 1 ; sign extend previous instruction
255 pop edi
256 pop ebx
257 mov esp, ebp
258 pop ebp
259 ret
260
261___kmp_compare_and_store64 ENDP
262_TEXT ENDS
263
264;------------------------------------------------------------------------
265;
266; FUNCTION ___kmp_xchg_fixed8
267;
268; kmp_int8
269; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
270;
271
272PUBLIC ___kmp_xchg_fixed8
273_TEXT SEGMENT
274 ALIGN 16
275_p$ = 4
276_d$ = 8
277
278___kmp_xchg_fixed8 PROC NEAR
279
280 mov ecx, DWORD PTR _p$[esp]
281 mov al, BYTE PTR _d$[esp]
282lock xchg BYTE PTR [ecx], al
283 ret
284
285___kmp_xchg_fixed8 ENDP
286_TEXT ENDS
287
288;------------------------------------------------------------------------
289;
290; FUNCTION ___kmp_xchg_fixed16
291;
292; kmp_int16
293; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
294;
295
296PUBLIC ___kmp_xchg_fixed16
297_TEXT SEGMENT
298 ALIGN 16
299_p$ = 4
300_d$ = 8
301
302___kmp_xchg_fixed16 PROC NEAR
303
304 mov ecx, DWORD PTR _p$[esp]
305 mov ax, WORD PTR _d$[esp]
306lock xchg WORD PTR [ecx], ax
307 ret
308
309___kmp_xchg_fixed16 ENDP
310_TEXT ENDS
311
312;------------------------------------------------------------------------
313;
314; FUNCTION ___kmp_xchg_fixed32
315;
316; kmp_int32
317; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
318;
319
320PUBLIC ___kmp_xchg_fixed32
321_TEXT SEGMENT
322 ALIGN 16
323_p$ = 4
324_d$ = 8
325
326___kmp_xchg_fixed32 PROC NEAR
327
328 mov ecx, DWORD PTR _p$[esp]
329 mov eax, DWORD PTR _d$[esp]
330lock xchg DWORD PTR [ecx], eax
331 ret
332
333___kmp_xchg_fixed32 ENDP
334_TEXT ENDS
335
336
337;------------------------------------------------------------------------
338;
339; FUNCTION ___kmp_xchg_real32
340;
341; kmp_real32
342; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
343;
344
345PUBLIC ___kmp_xchg_real32
346_TEXT SEGMENT
347 ALIGN 16
348_p$ = 8
349_d$ = 12
350_old_value$ = -4
351
352___kmp_xchg_real32 PROC NEAR
353
354 push ebp
355 mov ebp, esp
356 sub esp, 4
357 push esi
358 mov esi, DWORD PTR _p$[ebp]
359
360 fld DWORD PTR [esi]
361 ;; load <addr>
362 fst DWORD PTR _old_value$[ebp]
363 ;; store into old_value
364
365 mov eax, DWORD PTR _d$[ebp]
366
367lock xchg DWORD PTR [esi], eax
368
369 fld DWORD PTR _old_value$[ebp]
370 ;; return old_value
371 pop esi
372 mov esp, ebp
373 pop ebp
374 ret
375
376___kmp_xchg_real32 ENDP
377_TEXT ENDS
378
379
380;------------------------------------------------------------------------
381;
382; FUNCTION ___kmp_compare_and_store_ret8
383;
384; kmp_int8
385; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
386;
387
388PUBLIC ___kmp_compare_and_store_ret8
389_TEXT SEGMENT
390 ALIGN 16
391_p$ = 4
392_cv$ = 8
393_sv$ = 12
394
395___kmp_compare_and_store_ret8 PROC NEAR
396
397 mov ecx, DWORD PTR _p$[esp]
398 mov al, BYTE PTR _cv$[esp]
399 mov dl, BYTE PTR _sv$[esp]
400lock cmpxchg BYTE PTR [ecx], dl
401 ret
402
403___kmp_compare_and_store_ret8 ENDP
404_TEXT ENDS
405
406;------------------------------------------------------------------------
407;
408; FUNCTION ___kmp_compare_and_store_ret16
409;
410; kmp_int16
411; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
412;
413
414PUBLIC ___kmp_compare_and_store_ret16
415_TEXT SEGMENT
416 ALIGN 16
417_p$ = 4
418_cv$ = 8
419_sv$ = 12
420
421___kmp_compare_and_store_ret16 PROC NEAR
422
423 mov ecx, DWORD PTR _p$[esp]
424 mov ax, WORD PTR _cv$[esp]
425 mov dx, WORD PTR _sv$[esp]
426lock cmpxchg WORD PTR [ecx], dx
427 ret
428
429___kmp_compare_and_store_ret16 ENDP
430_TEXT ENDS
431
432;------------------------------------------------------------------------
433;
434; FUNCTION ___kmp_compare_and_store_ret32
435;
436; kmp_int32
437; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
438;
439
440PUBLIC ___kmp_compare_and_store_ret32
441_TEXT SEGMENT
442 ALIGN 16
443_p$ = 4
444_cv$ = 8
445_sv$ = 12
446
447___kmp_compare_and_store_ret32 PROC NEAR
448
449 mov ecx, DWORD PTR _p$[esp]
450 mov eax, DWORD PTR _cv$[esp]
451 mov edx, DWORD PTR _sv$[esp]
452lock cmpxchg DWORD PTR [ecx], edx
453 ret
454
455___kmp_compare_and_store_ret32 ENDP
456_TEXT ENDS
457
458;------------------------------------------------------------------------
459;
460; FUNCTION ___kmp_compare_and_store_ret64
461;
462; kmp_int64
463; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
464;
465
466PUBLIC ___kmp_compare_and_store_ret64
467_TEXT SEGMENT
468 ALIGN 16
469_p$ = 8
470_cv_low$ = 12
471_cv_high$ = 16
472_sv_low$ = 20
473_sv_high$ = 24
474
475___kmp_compare_and_store_ret64 PROC NEAR
476
477 push ebp
478 mov ebp, esp
479 push ebx
480 push edi
481 mov edi, DWORD PTR _p$[ebp]
482 mov eax, DWORD PTR _cv_low$[ebp]
483 mov edx, DWORD PTR _cv_high$[ebp]
484 mov ebx, DWORD PTR _sv_low$[ebp]
485 mov ecx, DWORD PTR _sv_high$[ebp]
486lock cmpxchg8b QWORD PTR [edi]
487 pop edi
488 pop ebx
489 mov esp, ebp
490 pop ebp
491 ret
492
493___kmp_compare_and_store_ret64 ENDP
494_TEXT ENDS
495
Jim Cownie5e8470a2013-09-27 10:38:44 +0000496;------------------------------------------------------------------------
497;
498; FUNCTION ___kmp_load_x87_fpu_control_word
499;
500; void
501; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
502;
503; parameters:
504; p: 4(%esp)
505
506PUBLIC ___kmp_load_x87_fpu_control_word
507_TEXT SEGMENT
508 ALIGN 16
509_p$ = 4
510
511___kmp_load_x87_fpu_control_word PROC NEAR
512
513 mov eax, DWORD PTR _p$[esp]
514 fldcw WORD PTR [eax]
515 ret
516
517___kmp_load_x87_fpu_control_word ENDP
518_TEXT ENDS
519
520;------------------------------------------------------------------------
521;
522; FUNCTION ___kmp_store_x87_fpu_control_word
523;
524; void
525; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
526;
527; parameters:
528; p: 4(%esp)
529
530PUBLIC ___kmp_store_x87_fpu_control_word
531_TEXT SEGMENT
532 ALIGN 16
533_p$ = 4
534
535___kmp_store_x87_fpu_control_word PROC NEAR
536
537 mov eax, DWORD PTR _p$[esp]
538 fstcw WORD PTR [eax]
539 ret
540
541___kmp_store_x87_fpu_control_word ENDP
542_TEXT ENDS
543
544;------------------------------------------------------------------------
545;
546; FUNCTION ___kmp_clear_x87_fpu_status_word
547;
548; void
549; __kmp_clear_x87_fpu_status_word();
550;
551
552PUBLIC ___kmp_clear_x87_fpu_status_word
553_TEXT SEGMENT
554 ALIGN 16
555
556___kmp_clear_x87_fpu_status_word PROC NEAR
557
558 fnclex
559 ret
560
561___kmp_clear_x87_fpu_status_word ENDP
562_TEXT ENDS
563
564
565;------------------------------------------------------------------------
566;
567; FUNCTION ___kmp_invoke_microtask
568;
569; typedef void (*microtask_t)( int *gtid, int *tid, ... );
570;
571; int
572; __kmp_invoke_microtask( microtask_t pkfn,
573; int gtid, int tid,
574; int argc, void *p_argv[] )
575;
576
577PUBLIC ___kmp_invoke_microtask
578_TEXT SEGMENT
579 ALIGN 16
580_pkfn$ = 8
581_gtid$ = 12
582_tid$ = 16
583_argc$ = 20
584_argv$ = 24
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000585if OMPT_SUPPORT
586_exit_frame$ = 28
587endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000588_i$ = -8
589_stk_adj$ = -16
590_vptr$ = -12
591_qptr$ = -4
592
593___kmp_invoke_microtask PROC NEAR
594; Line 102
595 push ebp
596 mov ebp, esp
597 sub esp, 16 ; 00000010H
598 push ebx
599 push esi
600 push edi
Andrey Churbanovd7d088f2015-04-29 16:42:24 +0000601if OMPT_SUPPORT
602 mov eax, DWORD PTR _exit_frame$[ebp]
603 mov DWORD PTR [eax], ebp
604endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605; Line 114
606 mov eax, DWORD PTR _argc$[ebp]
607 mov DWORD PTR _i$[ebp], eax
608
609;; ------------------------------------------------------------
610 lea edx, DWORD PTR [eax*4+8]
611 mov ecx, esp ; Save current SP into ECX
612 mov eax,edx ; Save the size of the args in eax
613 sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this
614 mov edx,ecx ; Save to edx
615 and ecx,-128 ; Mask off 7 bits
616 sub edx,ecx ; Amount to subtract from esp
617 sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call
618
619 add edx,eax ; Calculate total size of the stack decrement.
620 mov DWORD PTR _stk_adj$[ebp], edx
621;; ------------------------------------------------------------
622
623 jmp SHORT $L22237
624$L22238:
625 mov ecx, DWORD PTR _i$[ebp]
626 sub ecx, 1
627 mov DWORD PTR _i$[ebp], ecx
628$L22237:
629 cmp DWORD PTR _i$[ebp], 0
630 jle SHORT $L22239
631; Line 116
632 mov edx, DWORD PTR _i$[ebp]
633 mov eax, DWORD PTR _argv$[ebp]
634 mov ecx, DWORD PTR [eax+edx*4-4]
635 mov DWORD PTR _vptr$[ebp], ecx
636; Line 123
637 mov eax, DWORD PTR _vptr$[ebp]
638; Line 124
639 push eax
640; Line 127
641 jmp SHORT $L22238
642$L22239:
643; Line 129
644 lea edx, DWORD PTR _tid$[ebp]
645 mov DWORD PTR _vptr$[ebp], edx
646; Line 130
647 lea eax, DWORD PTR _gtid$[ebp]
648 mov DWORD PTR _qptr$[ebp], eax
649; Line 143
650 mov eax, DWORD PTR _vptr$[ebp]
651; Line 144
652 push eax
653; Line 145
654 mov eax, DWORD PTR _qptr$[ebp]
655; Line 146
656 push eax
657; Line 147
658 call DWORD PTR _pkfn$[ebp]
659; Line 148
660 add esp, DWORD PTR _stk_adj$[ebp]
661; Line 152
662 mov eax, 1
663; Line 153
664 pop edi
665 pop esi
666 pop ebx
667 mov esp, ebp
668 pop ebp
669 ret 0
670___kmp_invoke_microtask ENDP
671_TEXT ENDS
672
673endif
674
675; ==================================== Intel(R) 64 ===================================
676
677ifdef _M_AMD64
678
679;------------------------------------------------------------------------
680;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000681; FUNCTION __kmp_x86_cpuid
682;
683; void
684; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
685;
686; parameters:
687; mode: ecx
688; mode2: edx
689; cpuid_buffer: r8
690
691PUBLIC __kmp_x86_cpuid
692_TEXT SEGMENT
693 ALIGN 16
694
695__kmp_x86_cpuid PROC FRAME ;NEAR
696
697 push rbp
698 .pushreg rbp
699 mov rbp, rsp
700 .setframe rbp, 0
701 push rbx ; callee-save register
702 .pushreg rbx
703 .ENDPROLOG
704
705 mov r10, r8 ; p parameter
706 mov eax, ecx ; mode parameter
707 mov ecx, edx ; mode2 parameter
708 cpuid ; Query the CPUID for the current processor
709
710 mov DWORD PTR 0[ r10 ], eax ; store results into buffer
711 mov DWORD PTR 4[ r10 ], ebx
712 mov DWORD PTR 8[ r10 ], ecx
713 mov DWORD PTR 12[ r10 ], edx
714
715 pop rbx ; callee-save register
716 mov rsp, rbp
717 pop rbp
718 ret
719
720__kmp_x86_cpuid ENDP
721_TEXT ENDS
722
723
724;------------------------------------------------------------------------
725;
726; FUNCTION __kmp_test_then_add32
727;
728; kmp_int32
729; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
730;
731; parameters:
732; p: rcx
733; d: edx
734;
735; return: eax
736
737PUBLIC __kmp_test_then_add32
738_TEXT SEGMENT
739 ALIGN 16
740__kmp_test_then_add32 PROC ;NEAR
741
742 mov eax, edx
743lock xadd DWORD PTR [rcx], eax
744 ret
745
746__kmp_test_then_add32 ENDP
747_TEXT ENDS
748
749
750;------------------------------------------------------------------------
751;
752; FUNCTION __kmp_test_then_add64
753;
754; kmp_int32
755; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
756;
757; parameters:
758; p: rcx
759; d: rdx
760;
761; return: rax
762
763PUBLIC __kmp_test_then_add64
764_TEXT SEGMENT
765 ALIGN 16
766__kmp_test_then_add64 PROC ;NEAR
767
768 mov rax, rdx
769lock xadd QWORD PTR [rcx], rax
770 ret
771
772__kmp_test_then_add64 ENDP
773_TEXT ENDS
774
775
776;------------------------------------------------------------------------
777;
778; FUNCTION __kmp_compare_and_store8
779;
780; kmp_int8
781; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
782; parameters:
783; p: rcx
784; cv: edx
785; sv: r8d
786;
787; return: eax
788
789PUBLIC __kmp_compare_and_store8
790_TEXT SEGMENT
791 ALIGN 16
792
793__kmp_compare_and_store8 PROC ;NEAR
794
795 mov al, dl ; "cv"
796 mov edx, r8d ; "sv"
797lock cmpxchg BYTE PTR [rcx], dl
798 sete al ; if al == [rcx] set al = 1 else set al = 0
799 and rax, 1 ; sign extend previous instruction
800 ret
801
802__kmp_compare_and_store8 ENDP
803_TEXT ENDS
804
805
806;------------------------------------------------------------------------
807;
808; FUNCTION __kmp_compare_and_store16
809;
810; kmp_int16
811; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
812; parameters:
813; p: rcx
814; cv: edx
815; sv: r8d
816;
817; return: eax
818
819PUBLIC __kmp_compare_and_store16
820_TEXT SEGMENT
821 ALIGN 16
822
823__kmp_compare_and_store16 PROC ;NEAR
824
825 mov ax, dx ; "cv"
826 mov edx, r8d ; "sv"
827lock cmpxchg WORD PTR [rcx], dx
828 sete al ; if ax == [rcx] set al = 1 else set al = 0
829 and rax, 1 ; sign extend previous instruction
830 ret
831
832__kmp_compare_and_store16 ENDP
833_TEXT ENDS
834
835
836;------------------------------------------------------------------------
837;
838; FUNCTION __kmp_compare_and_store32
839;
840; kmp_int32
841; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
842; parameters:
843; p: rcx
844; cv: edx
845; sv: r8d
846;
847; return: eax
848
849PUBLIC __kmp_compare_and_store32
850_TEXT SEGMENT
851 ALIGN 16
852
853__kmp_compare_and_store32 PROC ;NEAR
854
855 mov eax, edx ; "cv"
856 mov edx, r8d ; "sv"
857lock cmpxchg DWORD PTR [rcx], edx
858 sete al ; if eax == [rcx] set al = 1 else set al = 0
859 and rax, 1 ; sign extend previous instruction
860 ret
861
862__kmp_compare_and_store32 ENDP
863_TEXT ENDS
864
865
866;------------------------------------------------------------------------
867;
868; FUNCTION __kmp_compare_and_store64
869;
870; kmp_int32
871; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
872; parameters:
873; p: rcx
874; cv: rdx
875; sv: r8
876;
877; return: eax
878
879PUBLIC __kmp_compare_and_store64
880_TEXT SEGMENT
881 ALIGN 16
882
883__kmp_compare_and_store64 PROC ;NEAR
884
885 mov rax, rdx ; "cv"
886 mov rdx, r8 ; "sv"
887lock cmpxchg QWORD PTR [rcx], rdx
888 sete al ; if rax == [rcx] set al = 1 else set al = 0
889 and rax, 1 ; sign extend previous instruction
890 ret
891
892__kmp_compare_and_store64 ENDP
893_TEXT ENDS
894
895
896;------------------------------------------------------------------------
897;
898; FUNCTION ___kmp_xchg_fixed8
899;
900; kmp_int8
901; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
902;
903; parameters:
904; p: rcx
905; d: dl
906;
907; return: al
908
909PUBLIC __kmp_xchg_fixed8
910_TEXT SEGMENT
911 ALIGN 16
912
913__kmp_xchg_fixed8 PROC ;NEAR
914
915 mov al, dl
916lock xchg BYTE PTR [rcx], al
917 ret
918
919__kmp_xchg_fixed8 ENDP
920_TEXT ENDS
921
922
923;------------------------------------------------------------------------
924;
925; FUNCTION ___kmp_xchg_fixed16
926;
927; kmp_int16
928; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
929;
930; parameters:
931; p: rcx
932; d: dx
933;
934; return: ax
935
936PUBLIC __kmp_xchg_fixed16
937_TEXT SEGMENT
938 ALIGN 16
939
940__kmp_xchg_fixed16 PROC ;NEAR
941
942 mov ax, dx
943lock xchg WORD PTR [rcx], ax
944 ret
945
946__kmp_xchg_fixed16 ENDP
947_TEXT ENDS
948
949
950;------------------------------------------------------------------------
951;
952; FUNCTION ___kmp_xchg_fixed32
953;
954; kmp_int32
955; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
956;
957; parameters:
958; p: rcx
959; d: edx
960;
961; return: eax
962
963PUBLIC __kmp_xchg_fixed32
964_TEXT SEGMENT
965 ALIGN 16
966__kmp_xchg_fixed32 PROC ;NEAR
967
968 mov eax, edx
969lock xchg DWORD PTR [rcx], eax
970 ret
971
972__kmp_xchg_fixed32 ENDP
973_TEXT ENDS
974
975
976;------------------------------------------------------------------------
977;
978; FUNCTION ___kmp_xchg_fixed64
979;
980; kmp_int64
981; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
982;
983; parameters:
984; p: rcx
985; d: rdx
986;
987; return: rax
988
989PUBLIC __kmp_xchg_fixed64
990_TEXT SEGMENT
991 ALIGN 16
992__kmp_xchg_fixed64 PROC ;NEAR
993
994 mov rax, rdx
995lock xchg QWORD PTR [rcx], rax
996 ret
997
998__kmp_xchg_fixed64 ENDP
999_TEXT ENDS
1000
1001
1002;------------------------------------------------------------------------
1003;
1004; FUNCTION __kmp_compare_and_store_ret8
1005;
1006; kmp_int8
1007; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
1008; parameters:
1009; p: rcx
1010; cv: edx
1011; sv: r8d
1012;
1013; return: eax
1014
1015PUBLIC __kmp_compare_and_store_ret8
1016_TEXT SEGMENT
1017 ALIGN 16
1018
1019__kmp_compare_and_store_ret8 PROC ;NEAR
1020 mov al, dl ; "cv"
1021 mov edx, r8d ; "sv"
1022lock cmpxchg BYTE PTR [rcx], dl
1023 ; Compare AL with [rcx]. If equal set
1024 ; ZF and exchange DL with [rcx]. Else, clear
1025 ; ZF and load [rcx] into AL.
1026 ret
1027
1028__kmp_compare_and_store_ret8 ENDP
1029_TEXT ENDS
1030
1031
1032;------------------------------------------------------------------------
1033;
1034; FUNCTION __kmp_compare_and_store_ret16
1035;
1036; kmp_int16
1037; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
1038; parameters:
1039; p: rcx
1040; cv: edx
1041; sv: r8d
1042;
1043; return: eax
1044
1045PUBLIC __kmp_compare_and_store_ret16
1046_TEXT SEGMENT
1047 ALIGN 16
1048
1049__kmp_compare_and_store_ret16 PROC ;NEAR
1050
1051 mov ax, dx ; "cv"
1052 mov edx, r8d ; "sv"
1053lock cmpxchg WORD PTR [rcx], dx
1054 ret
1055
1056__kmp_compare_and_store_ret16 ENDP
1057_TEXT ENDS
1058
1059
1060;------------------------------------------------------------------------
1061;
1062; FUNCTION __kmp_compare_and_store_ret32
1063;
1064; kmp_int32
1065; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
1066; parameters:
1067; p: rcx
1068; cv: edx
1069; sv: r8d
1070;
1071; return: eax
1072
1073PUBLIC __kmp_compare_and_store_ret32
1074_TEXT SEGMENT
1075 ALIGN 16
1076
1077__kmp_compare_and_store_ret32 PROC ;NEAR
1078
1079 mov eax, edx ; "cv"
1080 mov edx, r8d ; "sv"
1081lock cmpxchg DWORD PTR [rcx], edx
1082 ret
1083
1084__kmp_compare_and_store_ret32 ENDP
1085_TEXT ENDS
1086
1087
1088;------------------------------------------------------------------------
1089;
1090; FUNCTION __kmp_compare_and_store_ret64
1091;
1092; kmp_int64
1093; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1094; parameters:
1095; p: rcx
1096; cv: rdx
1097; sv: r8
1098;
1099; return: rax
1100
1101PUBLIC __kmp_compare_and_store_ret64
1102_TEXT SEGMENT
1103 ALIGN 16
1104
1105__kmp_compare_and_store_ret64 PROC ;NEAR
1106
1107 mov rax, rdx ; "cv"
1108 mov rdx, r8 ; "sv"
1109lock cmpxchg QWORD PTR [rcx], rdx
1110 ret
1111
1112__kmp_compare_and_store_ret64 ENDP
1113_TEXT ENDS
1114
1115
1116;------------------------------------------------------------------------
1117;
1118; FUNCTION __kmp_compare_and_store_loop8
1119;
1120; kmp_int8
1121; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
1122; parameters:
1123; p: rcx
1124; cv: edx
1125; sv: r8d
1126;
1127; return: al
1128
1129PUBLIC __kmp_compare_and_store_loop8
1130_TEXT SEGMENT
1131 ALIGN 16
1132
1133__kmp_compare_and_store_loop8 PROC ;NEAR
1134$__kmp_loop:
1135 mov al, dl ; "cv"
1136 mov edx, r8d ; "sv"
1137lock cmpxchg BYTE PTR [rcx], dl
1138 ; Compare AL with [rcx]. If equal set
1139 ; ZF and exchange DL with [rcx]. Else, clear
1140 ; ZF and load [rcx] into AL.
1141 jz SHORT $__kmp_success
1142
1143 db 0f3H
1144 db 090H ; pause
1145
1146 jmp SHORT $__kmp_loop
1147
1148$__kmp_success:
1149 ret
1150
1151__kmp_compare_and_store_loop8 ENDP
1152_TEXT ENDS
1153
1154
1155;------------------------------------------------------------------------
1156;
1157; FUNCTION __kmp_xchg_real32
1158;
1159; kmp_real32
1160; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
1161;
1162; parameters:
1163; p: rcx
1164; d: xmm1 (lower 4 bytes)
1165;
1166; return: xmm0 (lower 4 bytes)
1167
1168PUBLIC __kmp_xchg_real32
1169_TEXT SEGMENT
1170 ALIGN 16
1171__kmp_xchg_real32 PROC ;NEAR
1172
1173 movd eax, xmm1 ; load d
1174
1175lock xchg DWORD PTR [rcx], eax
1176
1177 movd xmm0, eax ; load old value into return register
1178 ret
1179
1180__kmp_xchg_real32 ENDP
1181_TEXT ENDS
1182
1183
1184;------------------------------------------------------------------------
1185;
1186; FUNCTION __kmp_xchg_real64
1187;
1188; kmp_real64
1189; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d );
1190;
1191; parameters:
1192; p: rcx
1193; d: xmm1 (lower 8 bytes)
1194;
1195; return: xmm0 (lower 8 bytes)
1196
1197PUBLIC __kmp_xchg_real64
1198_TEXT SEGMENT
1199 ALIGN 16
1200__kmp_xchg_real64 PROC ;NEAR
1201
1202 movd rax, xmm1 ; load "d"
1203
1204lock xchg QWORD PTR [rcx], rax
1205
1206 movd xmm0, rax ; load old value into return register
1207 ret
1208
1209__kmp_xchg_real64 ENDP
1210_TEXT ENDS
1211
Jim Cownie5e8470a2013-09-27 10:38:44 +00001212;------------------------------------------------------------------------
1213;
1214; FUNCTION __kmp_load_x87_fpu_control_word
1215;
1216; void
1217; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1218;
1219; parameters:
1220; p: rcx
1221;
1222
1223PUBLIC __kmp_load_x87_fpu_control_word
1224_TEXT SEGMENT
1225 ALIGN 16
1226__kmp_load_x87_fpu_control_word PROC ;NEAR
1227
1228 fldcw WORD PTR [rcx]
1229 ret
1230
1231__kmp_load_x87_fpu_control_word ENDP
1232_TEXT ENDS
1233
1234
1235;------------------------------------------------------------------------
1236;
1237; FUNCTION __kmp_store_x87_fpu_control_word
1238;
1239; void
1240; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1241;
1242; parameters:
1243; p: rcx
1244;
1245
1246PUBLIC __kmp_store_x87_fpu_control_word
1247_TEXT SEGMENT
1248 ALIGN 16
1249__kmp_store_x87_fpu_control_word PROC ;NEAR
1250
1251 fstcw WORD PTR [rcx]
1252 ret
1253
1254__kmp_store_x87_fpu_control_word ENDP
1255_TEXT ENDS
1256
1257
1258;------------------------------------------------------------------------
1259;
1260; FUNCTION __kmp_clear_x87_fpu_status_word
1261;
1262; void
1263; __kmp_clear_x87_fpu_status_word()
1264;
1265
1266PUBLIC __kmp_clear_x87_fpu_status_word
1267_TEXT SEGMENT
1268 ALIGN 16
1269__kmp_clear_x87_fpu_status_word PROC ;NEAR
1270
1271 fnclex
1272 ret
1273
1274__kmp_clear_x87_fpu_status_word ENDP
1275_TEXT ENDS
1276
1277
1278;------------------------------------------------------------------------
1279;
1280; FUNCTION __kmp_invoke_microtask
1281;
1282; typedef void (*microtask_t)( int *gtid, int *tid, ... );
1283;
1284; int
1285; __kmp_invoke_microtask( microtask_t pkfn,
1286; int gtid, int tid,
1287; int argc, void *p_argv[] ) {
1288;
1289; (*pkfn) ( &gtid, &tid, argv[0], ... );
1290; return 1;
1291; }
1292;
1293; note:
1294; just before call to pkfn must have rsp 128-byte aligned for compiler
1295;
1296; parameters:
1297; rcx: pkfn 16[rbp]
1298; edx: gtid 24[rbp]
1299; r8d: tid 32[rbp]
1300; r9d: argc 40[rbp]
1301; [st]: p_argv 48[rbp]
1302;
1303; reg temps:
1304; rax: used all over the place
1305; rdx: used all over the place
1306; rcx: used as argument counter for push parms loop
1307; r10: used to hold pkfn function pointer argument
1308;
1309; return: eax (always 1/TRUE)
1310;
1311
1312$_pkfn = 16
1313$_gtid = 24
1314$_tid = 32
1315$_argc = 40
1316$_p_argv = 48
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001317if OMPT_SUPPORT
1318$_exit_frame = 56
1319endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001320
1321PUBLIC __kmp_invoke_microtask
1322_TEXT SEGMENT
1323 ALIGN 16
1324
1325__kmp_invoke_microtask PROC FRAME ;NEAR
1326 mov QWORD PTR 16[rsp], rdx ; home gtid parameter
1327 mov QWORD PTR 24[rsp], r8 ; home tid parameter
1328 push rbp ; save base pointer
1329 .pushreg rbp
1330 sub rsp, 0 ; no fixed allocation necessary - end prolog
1331
1332 lea rbp, QWORD PTR [rsp] ; establish the base pointer
1333 .setframe rbp, 0
1334 .ENDPROLOG
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001335if OMPT_SUPPORT
1336 mov rax, QWORD PTR $_exit_frame[rbp]
1337 mov QWORD PTR [rax], rbp
1338endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001339 mov r10, rcx ; save pkfn pointer for later
1340
1341;; ------------------------------------------------------------
1342 mov rax, r9 ; rax <= argc
1343 cmp rax, 2
1344 jge SHORT $_kmp_invoke_stack_align
1345 mov rax, 2 ; set 4 homes if less than 2 parms
1346$_kmp_invoke_stack_align:
1347 lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8
1348 mov rax, rsp ; Save current SP into rax
1349 sub rax, rdx ; rsp - ((argc+2)*8) -> rax
1350 ; without align, rsp would be this
1351 and rax, -128 ; Mask off 7 bits (128-byte align)
1352 add rax, rdx ; add space for push's in a loop below
1353 mov rsp, rax ; Prepare the stack ptr
1354 ; Now it will align to 128-byte at the call
1355;; ------------------------------------------------------------
1356 ; setup pkfn parameter stack
1357 mov rax, r9 ; rax <= argc
1358 shl rax, 3 ; rax <= argc*8
1359 mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv
1360 add rdx, rax ; rdx <= &p_argv[argc]
1361 mov rcx, r9 ; rcx <= argc
1362 jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0
1363 cmp ecx, 1 ; if argc=1 branch ahead
1364 je SHORT $_kmp_invoke_one_parm
1365 sub ecx, 2 ; if argc=2 branch ahead, subtract two from
1366 je SHORT $_kmp_invoke_two_parms
1367
1368$_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack
1369 sub rdx, 8 ; decrement p_argv pointer to previous parm
1370 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1]
1371 push r8 ; push p_argv[rcx-1] onto stack (reverse order)
1372 sub ecx, 1
1373 jecxz SHORT $_kmp_invoke_two_parms
1374 jmp SHORT $_kmp_invoke_push_parms
1375
1376$_kmp_invoke_two_parms:
1377 sub rdx, 8 ; put 4th parm to pkfn in r9
1378 mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1]
1379
1380$_kmp_invoke_one_parm:
1381 sub rdx, 8 ; put 3rd parm to pkfn in r8
1382 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0]
1383
1384$_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers
1385 lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn)
1386 lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= &gtid (1st parm to pkfn)
1387 sub rsp, 32 ; add stack space for first four parms
1388 mov rax, r10 ; rax <= pkfn
1389 call rax ; call (*pkfn)()
1390 mov rax, 1 ; move 1 into return register;
1391
1392 lea rsp, QWORD PTR [rbp] ; restore stack pointer
1393
1394; add rsp, 0 ; no fixed allocation necessary - start epilog
1395 pop rbp ; restore frame pointer
1396 ret
1397__kmp_invoke_microtask ENDP
1398_TEXT ENDS
1399
1400endif
1401
1402END