blob: 215f5d2a49438c9aeb32d9482525b624c383a3eb [file] [log] [blame]
Robert Sloanc9abfe42018-11-26 12:19:07 -08001; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
Adam Langleyfad63272015-11-12 12:15:39 -08004default rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
Robert Sloan726e9d12018-09-11 11:45:04 -07008
9%ifdef BORINGSSL_PREFIX
10%include "boringssl_prefix_symbols_nasm.inc"
11%endif
Adam Langleyfad63272015-11-12 12:15:39 -080012section .text code align=64
13
14EXTERN OPENSSL_ia32cap_P
15
16
17ALIGN 64
18$L$poly:
19 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
20
Adam Langleyfad63272015-11-12 12:15:39 -080021$L$One:
22 DD 1,1,1,1,1,1,1,1
23$L$Two:
24 DD 2,2,2,2,2,2,2,2
25$L$Three:
26 DD 3,3,3,3,3,3,3,3
27$L$ONE_mont:
28 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
29
Adam Langleyfad63272015-11-12 12:15:39 -080030
Robert Sloan5cbb5c82018-04-24 11:35:46 -070031$L$ord:
32 DQ 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
33$L$ordK:
34 DQ 0xccd1c8aaee00bc4f
35
36
Adam Langleyfad63272015-11-12 12:15:39 -080037
Adam Langleyfad63272015-11-12 12:15:39 -080038global ecp_nistz256_neg
39
40ALIGN 32
41ecp_nistz256_neg:
42 mov QWORD[8+rsp],rdi ;WIN64 prologue
43 mov QWORD[16+rsp],rsi
44 mov rax,rsp
45$L$SEH_begin_ecp_nistz256_neg:
46 mov rdi,rcx
47 mov rsi,rdx
48
49
Robert Sloanab8b8882018-03-26 11:39:51 -070050
Adam Langleyfad63272015-11-12 12:15:39 -080051 push r12
Robert Sloanab8b8882018-03-26 11:39:51 -070052
Adam Langleyfad63272015-11-12 12:15:39 -080053 push r13
54
Robert Sloanab8b8882018-03-26 11:39:51 -070055$L$neg_body:
56
Adam Langleyfad63272015-11-12 12:15:39 -080057 xor r8,r8
58 xor r9,r9
59 xor r10,r10
60 xor r11,r11
61 xor r13,r13
62
63 sub r8,QWORD[rsi]
64 sbb r9,QWORD[8+rsi]
65 sbb r10,QWORD[16+rsi]
66 mov rax,r8
67 sbb r11,QWORD[24+rsi]
68 lea rsi,[$L$poly]
69 mov rdx,r9
70 sbb r13,0
71
72 add r8,QWORD[rsi]
73 mov rcx,r10
74 adc r9,QWORD[8+rsi]
75 adc r10,QWORD[16+rsi]
76 mov r12,r11
77 adc r11,QWORD[24+rsi]
78 test r13,r13
79
80 cmovz r8,rax
81 cmovz r9,rdx
82 mov QWORD[rdi],r8
83 cmovz r10,rcx
84 mov QWORD[8+rdi],r9
85 cmovz r11,r12
86 mov QWORD[16+rdi],r10
87 mov QWORD[24+rdi],r11
88
Robert Sloanab8b8882018-03-26 11:39:51 -070089 mov r13,QWORD[rsp]
90
91 mov r12,QWORD[8+rsp]
92
93 lea rsp,[16+rsp]
94
95$L$neg_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -080096 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
97 mov rsi,QWORD[16+rsp]
98 DB 0F3h,0C3h ;repret
Robert Sloanab8b8882018-03-26 11:39:51 -070099
Adam Langleyfad63272015-11-12 12:15:39 -0800100$L$SEH_end_ecp_nistz256_neg:
101
102
103
104
Adam Langleyfad63272015-11-12 12:15:39 -0800105
106
Robert Sloan5cbb5c82018-04-24 11:35:46 -0700107global ecp_nistz256_ord_mul_mont
108
109ALIGN 32
110ecp_nistz256_ord_mul_mont:
111 mov QWORD[8+rsp],rdi ;WIN64 prologue
112 mov QWORD[16+rsp],rsi
113 mov rax,rsp
114$L$SEH_begin_ecp_nistz256_ord_mul_mont:
115 mov rdi,rcx
116 mov rsi,rdx
117 mov rdx,r8
118
119
120
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100121 lea rcx,[OPENSSL_ia32cap_P]
122 mov rcx,QWORD[8+rcx]
123 and ecx,0x80100
124 cmp ecx,0x80100
125 je NEAR $L$ecp_nistz256_ord_mul_montx
Robert Sloan5cbb5c82018-04-24 11:35:46 -0700126 push rbp
127
128 push rbx
129
130 push r12
131
132 push r13
133
134 push r14
135
136 push r15
137
138$L$ord_mul_body:
139
140 mov rax,QWORD[rdx]
141 mov rbx,rdx
142 lea r14,[$L$ord]
143 mov r15,QWORD[$L$ordK]
144
145
146 mov rcx,rax
147 mul QWORD[rsi]
148 mov r8,rax
149 mov rax,rcx
150 mov r9,rdx
151
152 mul QWORD[8+rsi]
153 add r9,rax
154 mov rax,rcx
155 adc rdx,0
156 mov r10,rdx
157
158 mul QWORD[16+rsi]
159 add r10,rax
160 mov rax,rcx
161 adc rdx,0
162
163 mov r13,r8
164 imul r8,r15
165
166 mov r11,rdx
167 mul QWORD[24+rsi]
168 add r11,rax
169 mov rax,r8
170 adc rdx,0
171 mov r12,rdx
172
173
174 mul QWORD[r14]
175 mov rbp,r8
176 add r13,rax
177 mov rax,r8
178 adc rdx,0
179 mov rcx,rdx
180
181 sub r10,r8
182 sbb r8,0
183
184 mul QWORD[8+r14]
185 add r9,rcx
186 adc rdx,0
187 add r9,rax
188 mov rax,rbp
189 adc r10,rdx
190 mov rdx,rbp
191 adc r8,0
192
193 shl rax,32
194 shr rdx,32
195 sub r11,rax
196 mov rax,QWORD[8+rbx]
197 sbb rbp,rdx
198
199 add r11,r8
200 adc r12,rbp
201 adc r13,0
202
203
204 mov rcx,rax
205 mul QWORD[rsi]
206 add r9,rax
207 mov rax,rcx
208 adc rdx,0
209 mov rbp,rdx
210
211 mul QWORD[8+rsi]
212 add r10,rbp
213 adc rdx,0
214 add r10,rax
215 mov rax,rcx
216 adc rdx,0
217 mov rbp,rdx
218
219 mul QWORD[16+rsi]
220 add r11,rbp
221 adc rdx,0
222 add r11,rax
223 mov rax,rcx
224 adc rdx,0
225
226 mov rcx,r9
227 imul r9,r15
228
229 mov rbp,rdx
230 mul QWORD[24+rsi]
231 add r12,rbp
232 adc rdx,0
233 xor r8,r8
234 add r12,rax
235 mov rax,r9
236 adc r13,rdx
237 adc r8,0
238
239
240 mul QWORD[r14]
241 mov rbp,r9
242 add rcx,rax
243 mov rax,r9
244 adc rcx,rdx
245
246 sub r11,r9
247 sbb r9,0
248
249 mul QWORD[8+r14]
250 add r10,rcx
251 adc rdx,0
252 add r10,rax
253 mov rax,rbp
254 adc r11,rdx
255 mov rdx,rbp
256 adc r9,0
257
258 shl rax,32
259 shr rdx,32
260 sub r12,rax
261 mov rax,QWORD[16+rbx]
262 sbb rbp,rdx
263
264 add r12,r9
265 adc r13,rbp
266 adc r8,0
267
268
269 mov rcx,rax
270 mul QWORD[rsi]
271 add r10,rax
272 mov rax,rcx
273 adc rdx,0
274 mov rbp,rdx
275
276 mul QWORD[8+rsi]
277 add r11,rbp
278 adc rdx,0
279 add r11,rax
280 mov rax,rcx
281 adc rdx,0
282 mov rbp,rdx
283
284 mul QWORD[16+rsi]
285 add r12,rbp
286 adc rdx,0
287 add r12,rax
288 mov rax,rcx
289 adc rdx,0
290
291 mov rcx,r10
292 imul r10,r15
293
294 mov rbp,rdx
295 mul QWORD[24+rsi]
296 add r13,rbp
297 adc rdx,0
298 xor r9,r9
299 add r13,rax
300 mov rax,r10
301 adc r8,rdx
302 adc r9,0
303
304
305 mul QWORD[r14]
306 mov rbp,r10
307 add rcx,rax
308 mov rax,r10
309 adc rcx,rdx
310
311 sub r12,r10
312 sbb r10,0
313
314 mul QWORD[8+r14]
315 add r11,rcx
316 adc rdx,0
317 add r11,rax
318 mov rax,rbp
319 adc r12,rdx
320 mov rdx,rbp
321 adc r10,0
322
323 shl rax,32
324 shr rdx,32
325 sub r13,rax
326 mov rax,QWORD[24+rbx]
327 sbb rbp,rdx
328
329 add r13,r10
330 adc r8,rbp
331 adc r9,0
332
333
334 mov rcx,rax
335 mul QWORD[rsi]
336 add r11,rax
337 mov rax,rcx
338 adc rdx,0
339 mov rbp,rdx
340
341 mul QWORD[8+rsi]
342 add r12,rbp
343 adc rdx,0
344 add r12,rax
345 mov rax,rcx
346 adc rdx,0
347 mov rbp,rdx
348
349 mul QWORD[16+rsi]
350 add r13,rbp
351 adc rdx,0
352 add r13,rax
353 mov rax,rcx
354 adc rdx,0
355
356 mov rcx,r11
357 imul r11,r15
358
359 mov rbp,rdx
360 mul QWORD[24+rsi]
361 add r8,rbp
362 adc rdx,0
363 xor r10,r10
364 add r8,rax
365 mov rax,r11
366 adc r9,rdx
367 adc r10,0
368
369
370 mul QWORD[r14]
371 mov rbp,r11
372 add rcx,rax
373 mov rax,r11
374 adc rcx,rdx
375
376 sub r13,r11
377 sbb r11,0
378
379 mul QWORD[8+r14]
380 add r12,rcx
381 adc rdx,0
382 add r12,rax
383 mov rax,rbp
384 adc r13,rdx
385 mov rdx,rbp
386 adc r11,0
387
388 shl rax,32
389 shr rdx,32
390 sub r8,rax
391 sbb rbp,rdx
392
393 add r8,r11
394 adc r9,rbp
395 adc r10,0
396
397
398 mov rsi,r12
399 sub r12,QWORD[r14]
400 mov r11,r13
401 sbb r13,QWORD[8+r14]
402 mov rcx,r8
403 sbb r8,QWORD[16+r14]
404 mov rbp,r9
405 sbb r9,QWORD[24+r14]
406 sbb r10,0
407
408 cmovc r12,rsi
409 cmovc r13,r11
410 cmovc r8,rcx
411 cmovc r9,rbp
412
413 mov QWORD[rdi],r12
414 mov QWORD[8+rdi],r13
415 mov QWORD[16+rdi],r8
416 mov QWORD[24+rdi],r9
417
418 mov r15,QWORD[rsp]
419
420 mov r14,QWORD[8+rsp]
421
422 mov r13,QWORD[16+rsp]
423
424 mov r12,QWORD[24+rsp]
425
426 mov rbx,QWORD[32+rsp]
427
428 mov rbp,QWORD[40+rsp]
429
430 lea rsp,[48+rsp]
431
432$L$ord_mul_epilogue:
433 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
434 mov rsi,QWORD[16+rsp]
435 DB 0F3h,0C3h ;repret
436
437$L$SEH_end_ecp_nistz256_ord_mul_mont:
438
439
440
441
442
443
444
445global ecp_nistz256_ord_sqr_mont
446
447ALIGN 32
448ecp_nistz256_ord_sqr_mont:
449 mov QWORD[8+rsp],rdi ;WIN64 prologue
450 mov QWORD[16+rsp],rsi
451 mov rax,rsp
452$L$SEH_begin_ecp_nistz256_ord_sqr_mont:
453 mov rdi,rcx
454 mov rsi,rdx
455 mov rdx,r8
456
457
458
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100459 lea rcx,[OPENSSL_ia32cap_P]
460 mov rcx,QWORD[8+rcx]
461 and ecx,0x80100
462 cmp ecx,0x80100
463 je NEAR $L$ecp_nistz256_ord_sqr_montx
Robert Sloan5cbb5c82018-04-24 11:35:46 -0700464 push rbp
465
466 push rbx
467
468 push r12
469
470 push r13
471
472 push r14
473
474 push r15
475
476$L$ord_sqr_body:
477
478 mov r8,QWORD[rsi]
479 mov rax,QWORD[8+rsi]
480 mov r14,QWORD[16+rsi]
481 mov r15,QWORD[24+rsi]
482 lea rsi,[$L$ord]
483 mov rbx,rdx
484 jmp NEAR $L$oop_ord_sqr
485
486ALIGN 32
487$L$oop_ord_sqr:
488
489 mov rbp,rax
490 mul r8
491 mov r9,rax
492DB 102,72,15,110,205
493 mov rax,r14
494 mov r10,rdx
495
496 mul r8
497 add r10,rax
498 mov rax,r15
499DB 102,73,15,110,214
500 adc rdx,0
501 mov r11,rdx
502
503 mul r8
504 add r11,rax
505 mov rax,r15
506DB 102,73,15,110,223
507 adc rdx,0
508 mov r12,rdx
509
510
511 mul r14
512 mov r13,rax
513 mov rax,r14
514 mov r14,rdx
515
516
517 mul rbp
518 add r11,rax
519 mov rax,r15
520 adc rdx,0
521 mov r15,rdx
522
523 mul rbp
524 add r12,rax
525 adc rdx,0
526
527 add r12,r15
528 adc r13,rdx
529 adc r14,0
530
531
532 xor r15,r15
533 mov rax,r8
534 add r9,r9
535 adc r10,r10
536 adc r11,r11
537 adc r12,r12
538 adc r13,r13
539 adc r14,r14
540 adc r15,0
541
542
543 mul rax
544 mov r8,rax
545DB 102,72,15,126,200
546 mov rbp,rdx
547
548 mul rax
549 add r9,rbp
550 adc r10,rax
551DB 102,72,15,126,208
552 adc rdx,0
553 mov rbp,rdx
554
555 mul rax
556 add r11,rbp
557 adc r12,rax
558DB 102,72,15,126,216
559 adc rdx,0
560 mov rbp,rdx
561
562 mov rcx,r8
563 imul r8,QWORD[32+rsi]
564
565 mul rax
566 add r13,rbp
567 adc r14,rax
568 mov rax,QWORD[rsi]
569 adc r15,rdx
570
571
572 mul r8
573 mov rbp,r8
574 add rcx,rax
575 mov rax,QWORD[8+rsi]
576 adc rcx,rdx
577
578 sub r10,r8
579 sbb rbp,0
580
581 mul r8
582 add r9,rcx
583 adc rdx,0
584 add r9,rax
585 mov rax,r8
586 adc r10,rdx
587 mov rdx,r8
588 adc rbp,0
589
590 mov rcx,r9
591 imul r9,QWORD[32+rsi]
592
593 shl rax,32
594 shr rdx,32
595 sub r11,rax
596 mov rax,QWORD[rsi]
597 sbb r8,rdx
598
599 add r11,rbp
600 adc r8,0
601
602
603 mul r9
604 mov rbp,r9
605 add rcx,rax
606 mov rax,QWORD[8+rsi]
607 adc rcx,rdx
608
609 sub r11,r9
610 sbb rbp,0
611
612 mul r9
613 add r10,rcx
614 adc rdx,0
615 add r10,rax
616 mov rax,r9
617 adc r11,rdx
618 mov rdx,r9
619 adc rbp,0
620
621 mov rcx,r10
622 imul r10,QWORD[32+rsi]
623
624 shl rax,32
625 shr rdx,32
626 sub r8,rax
627 mov rax,QWORD[rsi]
628 sbb r9,rdx
629
630 add r8,rbp
631 adc r9,0
632
633
634 mul r10
635 mov rbp,r10
636 add rcx,rax
637 mov rax,QWORD[8+rsi]
638 adc rcx,rdx
639
640 sub r8,r10
641 sbb rbp,0
642
643 mul r10
644 add r11,rcx
645 adc rdx,0
646 add r11,rax
647 mov rax,r10
648 adc r8,rdx
649 mov rdx,r10
650 adc rbp,0
651
652 mov rcx,r11
653 imul r11,QWORD[32+rsi]
654
655 shl rax,32
656 shr rdx,32
657 sub r9,rax
658 mov rax,QWORD[rsi]
659 sbb r10,rdx
660
661 add r9,rbp
662 adc r10,0
663
664
665 mul r11
666 mov rbp,r11
667 add rcx,rax
668 mov rax,QWORD[8+rsi]
669 adc rcx,rdx
670
671 sub r9,r11
672 sbb rbp,0
673
674 mul r11
675 add r8,rcx
676 adc rdx,0
677 add r8,rax
678 mov rax,r11
679 adc r9,rdx
680 mov rdx,r11
681 adc rbp,0
682
683 shl rax,32
684 shr rdx,32
685 sub r10,rax
686 sbb r11,rdx
687
688 add r10,rbp
689 adc r11,0
690
691
692 xor rdx,rdx
693 add r8,r12
694 adc r9,r13
695 mov r12,r8
696 adc r10,r14
697 adc r11,r15
698 mov rax,r9
699 adc rdx,0
700
701
702 sub r8,QWORD[rsi]
703 mov r14,r10
704 sbb r9,QWORD[8+rsi]
705 sbb r10,QWORD[16+rsi]
706 mov r15,r11
707 sbb r11,QWORD[24+rsi]
708 sbb rdx,0
709
710 cmovc r8,r12
711 cmovnc rax,r9
712 cmovnc r14,r10
713 cmovnc r15,r11
714
715 dec rbx
716 jnz NEAR $L$oop_ord_sqr
717
718 mov QWORD[rdi],r8
719 mov QWORD[8+rdi],rax
720 pxor xmm1,xmm1
721 mov QWORD[16+rdi],r14
722 pxor xmm2,xmm2
723 mov QWORD[24+rdi],r15
724 pxor xmm3,xmm3
725
726 mov r15,QWORD[rsp]
727
728 mov r14,QWORD[8+rsp]
729
730 mov r13,QWORD[16+rsp]
731
732 mov r12,QWORD[24+rsp]
733
734 mov rbx,QWORD[32+rsp]
735
736 mov rbp,QWORD[40+rsp]
737
738 lea rsp,[48+rsp]
739
740$L$ord_sqr_epilogue:
741 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
742 mov rsi,QWORD[16+rsp]
743 DB 0F3h,0C3h ;repret
744
745$L$SEH_end_ecp_nistz256_ord_sqr_mont:
746
747
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100748ALIGN 32
749ecp_nistz256_ord_mul_montx:
750 mov QWORD[8+rsp],rdi ;WIN64 prologue
751 mov QWORD[16+rsp],rsi
752 mov rax,rsp
753$L$SEH_begin_ecp_nistz256_ord_mul_montx:
754 mov rdi,rcx
755 mov rsi,rdx
756 mov rdx,r8
757
758
759
760$L$ecp_nistz256_ord_mul_montx:
761 push rbp
762
763 push rbx
764
765 push r12
766
767 push r13
768
769 push r14
770
771 push r15
772
773$L$ord_mulx_body:
774
775 mov rbx,rdx
776 mov rdx,QWORD[rdx]
777 mov r9,QWORD[rsi]
778 mov r10,QWORD[8+rsi]
779 mov r11,QWORD[16+rsi]
780 mov r12,QWORD[24+rsi]
781 lea rsi,[((-128))+rsi]
782 lea r14,[(($L$ord-128))]
783 mov r15,QWORD[$L$ordK]
784
785
786 mulx r9,r8,r9
787 mulx r10,rcx,r10
788 mulx r11,rbp,r11
789 add r9,rcx
790 mulx r12,rcx,r12
791 mov rdx,r8
792 mulx rax,rdx,r15
793 adc r10,rbp
794 adc r11,rcx
795 adc r12,0
796
797
798 xor r13,r13
799 mulx rbp,rcx,QWORD[((0+128))+r14]
800 adcx r8,rcx
801 adox r9,rbp
802
803 mulx rbp,rcx,QWORD[((8+128))+r14]
804 adcx r9,rcx
805 adox r10,rbp
806
807 mulx rbp,rcx,QWORD[((16+128))+r14]
808 adcx r10,rcx
809 adox r11,rbp
810
811 mulx rbp,rcx,QWORD[((24+128))+r14]
812 mov rdx,QWORD[8+rbx]
813 adcx r11,rcx
814 adox r12,rbp
815 adcx r12,r8
816 adox r13,r8
817 adc r13,0
818
819
820 mulx rbp,rcx,QWORD[((0+128))+rsi]
821 adcx r9,rcx
822 adox r10,rbp
823
824 mulx rbp,rcx,QWORD[((8+128))+rsi]
825 adcx r10,rcx
826 adox r11,rbp
827
828 mulx rbp,rcx,QWORD[((16+128))+rsi]
829 adcx r11,rcx
830 adox r12,rbp
831
832 mulx rbp,rcx,QWORD[((24+128))+rsi]
833 mov rdx,r9
834 mulx rax,rdx,r15
835 adcx r12,rcx
836 adox r13,rbp
837
838 adcx r13,r8
839 adox r8,r8
840 adc r8,0
841
842
843 mulx rbp,rcx,QWORD[((0+128))+r14]
844 adcx r9,rcx
845 adox r10,rbp
846
847 mulx rbp,rcx,QWORD[((8+128))+r14]
848 adcx r10,rcx
849 adox r11,rbp
850
851 mulx rbp,rcx,QWORD[((16+128))+r14]
852 adcx r11,rcx
853 adox r12,rbp
854
855 mulx rbp,rcx,QWORD[((24+128))+r14]
856 mov rdx,QWORD[16+rbx]
857 adcx r12,rcx
858 adox r13,rbp
859 adcx r13,r9
860 adox r8,r9
861 adc r8,0
862
863
864 mulx rbp,rcx,QWORD[((0+128))+rsi]
865 adcx r10,rcx
866 adox r11,rbp
867
868 mulx rbp,rcx,QWORD[((8+128))+rsi]
869 adcx r11,rcx
870 adox r12,rbp
871
872 mulx rbp,rcx,QWORD[((16+128))+rsi]
873 adcx r12,rcx
874 adox r13,rbp
875
876 mulx rbp,rcx,QWORD[((24+128))+rsi]
877 mov rdx,r10
878 mulx rax,rdx,r15
879 adcx r13,rcx
880 adox r8,rbp
881
882 adcx r8,r9
883 adox r9,r9
884 adc r9,0
885
886
887 mulx rbp,rcx,QWORD[((0+128))+r14]
888 adcx r10,rcx
889 adox r11,rbp
890
891 mulx rbp,rcx,QWORD[((8+128))+r14]
892 adcx r11,rcx
893 adox r12,rbp
894
895 mulx rbp,rcx,QWORD[((16+128))+r14]
896 adcx r12,rcx
897 adox r13,rbp
898
899 mulx rbp,rcx,QWORD[((24+128))+r14]
900 mov rdx,QWORD[24+rbx]
901 adcx r13,rcx
902 adox r8,rbp
903 adcx r8,r10
904 adox r9,r10
905 adc r9,0
906
907
908 mulx rbp,rcx,QWORD[((0+128))+rsi]
909 adcx r11,rcx
910 adox r12,rbp
911
912 mulx rbp,rcx,QWORD[((8+128))+rsi]
913 adcx r12,rcx
914 adox r13,rbp
915
916 mulx rbp,rcx,QWORD[((16+128))+rsi]
917 adcx r13,rcx
918 adox r8,rbp
919
920 mulx rbp,rcx,QWORD[((24+128))+rsi]
921 mov rdx,r11
922 mulx rax,rdx,r15
923 adcx r8,rcx
924 adox r9,rbp
925
926 adcx r9,r10
927 adox r10,r10
928 adc r10,0
929
930
931 mulx rbp,rcx,QWORD[((0+128))+r14]
932 adcx r11,rcx
933 adox r12,rbp
934
935 mulx rbp,rcx,QWORD[((8+128))+r14]
936 adcx r12,rcx
937 adox r13,rbp
938
939 mulx rbp,rcx,QWORD[((16+128))+r14]
940 adcx r13,rcx
941 adox r8,rbp
942
943 mulx rbp,rcx,QWORD[((24+128))+r14]
944 lea r14,[128+r14]
945 mov rbx,r12
946 adcx r8,rcx
947 adox r9,rbp
948 mov rdx,r13
949 adcx r9,r11
950 adox r10,r11
951 adc r10,0
952
953
954
955 mov rcx,r8
956 sub r12,QWORD[r14]
957 sbb r13,QWORD[8+r14]
958 sbb r8,QWORD[16+r14]
959 mov rbp,r9
960 sbb r9,QWORD[24+r14]
961 sbb r10,0
962
963 cmovc r12,rbx
964 cmovc r13,rdx
965 cmovc r8,rcx
966 cmovc r9,rbp
967
968 mov QWORD[rdi],r12
969 mov QWORD[8+rdi],r13
970 mov QWORD[16+rdi],r8
971 mov QWORD[24+rdi],r9
972
973 mov r15,QWORD[rsp]
974
975 mov r14,QWORD[8+rsp]
976
977 mov r13,QWORD[16+rsp]
978
979 mov r12,QWORD[24+rsp]
980
981 mov rbx,QWORD[32+rsp]
982
983 mov rbp,QWORD[40+rsp]
984
985 lea rsp,[48+rsp]
986
987$L$ord_mulx_epilogue:
988 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
989 mov rsi,QWORD[16+rsp]
990 DB 0F3h,0C3h ;repret
991
992$L$SEH_end_ecp_nistz256_ord_mul_montx:
993
994
995ALIGN 32
996ecp_nistz256_ord_sqr_montx:
997 mov QWORD[8+rsp],rdi ;WIN64 prologue
998 mov QWORD[16+rsp],rsi
999 mov rax,rsp
1000$L$SEH_begin_ecp_nistz256_ord_sqr_montx:
1001 mov rdi,rcx
1002 mov rsi,rdx
1003 mov rdx,r8
1004
1005
1006
1007$L$ecp_nistz256_ord_sqr_montx:
1008 push rbp
1009
1010 push rbx
1011
1012 push r12
1013
1014 push r13
1015
1016 push r14
1017
1018 push r15
1019
1020$L$ord_sqrx_body:
1021
1022 mov rbx,rdx
1023 mov rdx,QWORD[rsi]
1024 mov r14,QWORD[8+rsi]
1025 mov r15,QWORD[16+rsi]
1026 mov r8,QWORD[24+rsi]
1027 lea rsi,[$L$ord]
1028 jmp NEAR $L$oop_ord_sqrx
1029
1030ALIGN 32
1031$L$oop_ord_sqrx:
1032 mulx r10,r9,r14
1033 mulx r11,rcx,r15
1034 mov rax,rdx
1035DB 102,73,15,110,206
1036 mulx r12,rbp,r8
1037 mov rdx,r14
1038 add r10,rcx
1039DB 102,73,15,110,215
1040 adc r11,rbp
1041 adc r12,0
1042 xor r13,r13
1043
1044 mulx rbp,rcx,r15
1045 adcx r11,rcx
1046 adox r12,rbp
1047
1048 mulx rbp,rcx,r8
1049 mov rdx,r15
1050 adcx r12,rcx
1051 adox r13,rbp
1052 adc r13,0
1053
1054 mulx r14,rcx,r8
1055 mov rdx,rax
1056DB 102,73,15,110,216
1057 xor r15,r15
1058 adcx r9,r9
1059 adox r13,rcx
1060 adcx r10,r10
1061 adox r14,r15
1062
1063
1064 mulx rbp,r8,rdx
1065DB 102,72,15,126,202
1066 adcx r11,r11
1067 adox r9,rbp
1068 adcx r12,r12
1069 mulx rax,rcx,rdx
1070DB 102,72,15,126,210
1071 adcx r13,r13
1072 adox r10,rcx
1073 adcx r14,r14
1074 mulx rbp,rcx,rdx
1075DB 0x67
1076DB 102,72,15,126,218
1077 adox r11,rax
1078 adcx r15,r15
1079 adox r12,rcx
1080 adox r13,rbp
1081 mulx rax,rcx,rdx
1082 adox r14,rcx
1083 adox r15,rax
1084
1085
1086 mov rdx,r8
1087 mulx rcx,rdx,QWORD[32+rsi]
1088
1089 xor rax,rax
1090 mulx rbp,rcx,QWORD[rsi]
1091 adcx r8,rcx
1092 adox r9,rbp
1093 mulx rbp,rcx,QWORD[8+rsi]
1094 adcx r9,rcx
1095 adox r10,rbp
1096 mulx rbp,rcx,QWORD[16+rsi]
1097 adcx r10,rcx
1098 adox r11,rbp
1099 mulx rbp,rcx,QWORD[24+rsi]
1100 adcx r11,rcx
1101 adox r8,rbp
1102 adcx r8,rax
1103
1104
1105 mov rdx,r9
1106 mulx rcx,rdx,QWORD[32+rsi]
1107
1108 mulx rbp,rcx,QWORD[rsi]
1109 adox r9,rcx
1110 adcx r10,rbp
1111 mulx rbp,rcx,QWORD[8+rsi]
1112 adox r10,rcx
1113 adcx r11,rbp
1114 mulx rbp,rcx,QWORD[16+rsi]
1115 adox r11,rcx
1116 adcx r8,rbp
1117 mulx rbp,rcx,QWORD[24+rsi]
1118 adox r8,rcx
1119 adcx r9,rbp
1120 adox r9,rax
1121
1122
1123 mov rdx,r10
1124 mulx rcx,rdx,QWORD[32+rsi]
1125
1126 mulx rbp,rcx,QWORD[rsi]
1127 adcx r10,rcx
1128 adox r11,rbp
1129 mulx rbp,rcx,QWORD[8+rsi]
1130 adcx r11,rcx
1131 adox r8,rbp
1132 mulx rbp,rcx,QWORD[16+rsi]
1133 adcx r8,rcx
1134 adox r9,rbp
1135 mulx rbp,rcx,QWORD[24+rsi]
1136 adcx r9,rcx
1137 adox r10,rbp
1138 adcx r10,rax
1139
1140
1141 mov rdx,r11
1142 mulx rcx,rdx,QWORD[32+rsi]
1143
1144 mulx rbp,rcx,QWORD[rsi]
1145 adox r11,rcx
1146 adcx r8,rbp
1147 mulx rbp,rcx,QWORD[8+rsi]
1148 adox r8,rcx
1149 adcx r9,rbp
1150 mulx rbp,rcx,QWORD[16+rsi]
1151 adox r9,rcx
1152 adcx r10,rbp
1153 mulx rbp,rcx,QWORD[24+rsi]
1154 adox r10,rcx
1155 adcx r11,rbp
1156 adox r11,rax
1157
1158
1159 add r12,r8
1160 adc r9,r13
1161 mov rdx,r12
1162 adc r10,r14
1163 adc r11,r15
1164 mov r14,r9
1165 adc rax,0
1166
1167
1168 sub r12,QWORD[rsi]
1169 mov r15,r10
1170 sbb r9,QWORD[8+rsi]
1171 sbb r10,QWORD[16+rsi]
1172 mov r8,r11
1173 sbb r11,QWORD[24+rsi]
1174 sbb rax,0
1175
1176 cmovnc rdx,r12
1177 cmovnc r14,r9
1178 cmovnc r15,r10
1179 cmovnc r8,r11
1180
1181 dec rbx
1182 jnz NEAR $L$oop_ord_sqrx
1183
1184 mov QWORD[rdi],rdx
1185 mov QWORD[8+rdi],r14
1186 pxor xmm1,xmm1
1187 mov QWORD[16+rdi],r15
1188 pxor xmm2,xmm2
1189 mov QWORD[24+rdi],r8
1190 pxor xmm3,xmm3
1191
1192 mov r15,QWORD[rsp]
1193
1194 mov r14,QWORD[8+rsp]
1195
1196 mov r13,QWORD[16+rsp]
1197
1198 mov r12,QWORD[24+rsp]
1199
1200 mov rbx,QWORD[32+rsp]
1201
1202 mov rbp,QWORD[40+rsp]
1203
1204 lea rsp,[48+rsp]
1205
1206$L$ord_sqrx_epilogue:
1207 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1208 mov rsi,QWORD[16+rsp]
1209 DB 0F3h,0C3h ;repret
1210
1211$L$SEH_end_ecp_nistz256_ord_sqr_montx:
1212
1213
Robert Sloan5cbb5c82018-04-24 11:35:46 -07001214
1215
1216
1217
Adam Langleyfad63272015-11-12 12:15:39 -08001218global ecp_nistz256_mul_mont
1219
1220ALIGN 32
1221ecp_nistz256_mul_mont:
1222 mov QWORD[8+rsp],rdi ;WIN64 prologue
1223 mov QWORD[16+rsp],rsi
1224 mov rax,rsp
1225$L$SEH_begin_ecp_nistz256_mul_mont:
1226 mov rdi,rcx
1227 mov rsi,rdx
1228 mov rdx,r8
1229
1230
Robert Sloanab8b8882018-03-26 11:39:51 -07001231
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001232 lea rcx,[OPENSSL_ia32cap_P]
1233 mov rcx,QWORD[8+rcx]
1234 and ecx,0x80100
Adam Langleyfad63272015-11-12 12:15:39 -08001235$L$mul_mont:
1236 push rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001237
Adam Langleyfad63272015-11-12 12:15:39 -08001238 push rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07001239
Adam Langleyfad63272015-11-12 12:15:39 -08001240 push r12
Robert Sloanab8b8882018-03-26 11:39:51 -07001241
Adam Langleyfad63272015-11-12 12:15:39 -08001242 push r13
Robert Sloanab8b8882018-03-26 11:39:51 -07001243
Adam Langleyfad63272015-11-12 12:15:39 -08001244 push r14
Robert Sloanab8b8882018-03-26 11:39:51 -07001245
Adam Langleyfad63272015-11-12 12:15:39 -08001246 push r15
Robert Sloanab8b8882018-03-26 11:39:51 -07001247
1248$L$mul_body:
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001249 cmp ecx,0x80100
1250 je NEAR $L$mul_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001251 mov rbx,rdx
1252 mov rax,QWORD[rdx]
1253 mov r9,QWORD[rsi]
1254 mov r10,QWORD[8+rsi]
1255 mov r11,QWORD[16+rsi]
1256 mov r12,QWORD[24+rsi]
1257
1258 call __ecp_nistz256_mul_montq
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001259 jmp NEAR $L$mul_mont_done
1260
1261ALIGN 32
1262$L$mul_montx:
1263 mov rbx,rdx
1264 mov rdx,QWORD[rdx]
1265 mov r9,QWORD[rsi]
1266 mov r10,QWORD[8+rsi]
1267 mov r11,QWORD[16+rsi]
1268 mov r12,QWORD[24+rsi]
1269 lea rsi,[((-128))+rsi]
1270
1271 call __ecp_nistz256_mul_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001272$L$mul_mont_done:
Robert Sloanab8b8882018-03-26 11:39:51 -07001273 mov r15,QWORD[rsp]
1274
1275 mov r14,QWORD[8+rsp]
1276
1277 mov r13,QWORD[16+rsp]
1278
1279 mov r12,QWORD[24+rsp]
1280
1281 mov rbx,QWORD[32+rsp]
1282
1283 mov rbp,QWORD[40+rsp]
1284
1285 lea rsp,[48+rsp]
1286
1287$L$mul_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08001288 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1289 mov rsi,QWORD[16+rsp]
1290 DB 0F3h,0C3h ;repret
Robert Sloanab8b8882018-03-26 11:39:51 -07001291
Adam Langleyfad63272015-11-12 12:15:39 -08001292$L$SEH_end_ecp_nistz256_mul_mont:
1293
1294
1295ALIGN 32
1296__ecp_nistz256_mul_montq:
1297
1298
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001299
Adam Langleyfad63272015-11-12 12:15:39 -08001300 mov rbp,rax
1301 mul r9
1302 mov r14,QWORD[(($L$poly+8))]
1303 mov r8,rax
1304 mov rax,rbp
1305 mov r9,rdx
1306
1307 mul r10
1308 mov r15,QWORD[(($L$poly+24))]
1309 add r9,rax
1310 mov rax,rbp
1311 adc rdx,0
1312 mov r10,rdx
1313
1314 mul r11
1315 add r10,rax
1316 mov rax,rbp
1317 adc rdx,0
1318 mov r11,rdx
1319
1320 mul r12
1321 add r11,rax
1322 mov rax,r8
1323 adc rdx,0
1324 xor r13,r13
1325 mov r12,rdx
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336 mov rbp,r8
1337 shl r8,32
1338 mul r15
1339 shr rbp,32
1340 add r9,r8
1341 adc r10,rbp
1342 adc r11,rax
1343 mov rax,QWORD[8+rbx]
1344 adc r12,rdx
1345 adc r13,0
1346 xor r8,r8
1347
1348
1349
1350 mov rbp,rax
1351 mul QWORD[rsi]
1352 add r9,rax
1353 mov rax,rbp
1354 adc rdx,0
1355 mov rcx,rdx
1356
1357 mul QWORD[8+rsi]
1358 add r10,rcx
1359 adc rdx,0
1360 add r10,rax
1361 mov rax,rbp
1362 adc rdx,0
1363 mov rcx,rdx
1364
1365 mul QWORD[16+rsi]
1366 add r11,rcx
1367 adc rdx,0
1368 add r11,rax
1369 mov rax,rbp
1370 adc rdx,0
1371 mov rcx,rdx
1372
1373 mul QWORD[24+rsi]
1374 add r12,rcx
1375 adc rdx,0
1376 add r12,rax
1377 mov rax,r9
1378 adc r13,rdx
1379 adc r8,0
1380
1381
1382
1383 mov rbp,r9
1384 shl r9,32
1385 mul r15
1386 shr rbp,32
1387 add r10,r9
1388 adc r11,rbp
1389 adc r12,rax
1390 mov rax,QWORD[16+rbx]
1391 adc r13,rdx
1392 adc r8,0
1393 xor r9,r9
1394
1395
1396
1397 mov rbp,rax
1398 mul QWORD[rsi]
1399 add r10,rax
1400 mov rax,rbp
1401 adc rdx,0
1402 mov rcx,rdx
1403
1404 mul QWORD[8+rsi]
1405 add r11,rcx
1406 adc rdx,0
1407 add r11,rax
1408 mov rax,rbp
1409 adc rdx,0
1410 mov rcx,rdx
1411
1412 mul QWORD[16+rsi]
1413 add r12,rcx
1414 adc rdx,0
1415 add r12,rax
1416 mov rax,rbp
1417 adc rdx,0
1418 mov rcx,rdx
1419
1420 mul QWORD[24+rsi]
1421 add r13,rcx
1422 adc rdx,0
1423 add r13,rax
1424 mov rax,r10
1425 adc r8,rdx
1426 adc r9,0
1427
1428
1429
1430 mov rbp,r10
1431 shl r10,32
1432 mul r15
1433 shr rbp,32
1434 add r11,r10
1435 adc r12,rbp
1436 adc r13,rax
1437 mov rax,QWORD[24+rbx]
1438 adc r8,rdx
1439 adc r9,0
1440 xor r10,r10
1441
1442
1443
1444 mov rbp,rax
1445 mul QWORD[rsi]
1446 add r11,rax
1447 mov rax,rbp
1448 adc rdx,0
1449 mov rcx,rdx
1450
1451 mul QWORD[8+rsi]
1452 add r12,rcx
1453 adc rdx,0
1454 add r12,rax
1455 mov rax,rbp
1456 adc rdx,0
1457 mov rcx,rdx
1458
1459 mul QWORD[16+rsi]
1460 add r13,rcx
1461 adc rdx,0
1462 add r13,rax
1463 mov rax,rbp
1464 adc rdx,0
1465 mov rcx,rdx
1466
1467 mul QWORD[24+rsi]
1468 add r8,rcx
1469 adc rdx,0
1470 add r8,rax
1471 mov rax,r11
1472 adc r9,rdx
1473 adc r10,0
1474
1475
1476
1477 mov rbp,r11
1478 shl r11,32
1479 mul r15
1480 shr rbp,32
1481 add r12,r11
1482 adc r13,rbp
1483 mov rcx,r12
1484 adc r8,rax
1485 adc r9,rdx
1486 mov rbp,r13
1487 adc r10,0
1488
1489
1490
1491 sub r12,-1
1492 mov rbx,r8
1493 sbb r13,r14
1494 sbb r8,0
1495 mov rdx,r9
1496 sbb r9,r15
1497 sbb r10,0
1498
1499 cmovc r12,rcx
1500 cmovc r13,rbp
1501 mov QWORD[rdi],r12
1502 cmovc r8,rbx
1503 mov QWORD[8+rdi],r13
1504 cmovc r9,rdx
1505 mov QWORD[16+rdi],r8
1506 mov QWORD[24+rdi],r9
1507
1508 DB 0F3h,0C3h ;repret
1509
1510
1511
1512
1513
1514
1515
1516
1517
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001518
Adam Langleyfad63272015-11-12 12:15:39 -08001519global ecp_nistz256_sqr_mont
1520
1521ALIGN 32
1522ecp_nistz256_sqr_mont:
1523 mov QWORD[8+rsp],rdi ;WIN64 prologue
1524 mov QWORD[16+rsp],rsi
1525 mov rax,rsp
1526$L$SEH_begin_ecp_nistz256_sqr_mont:
1527 mov rdi,rcx
1528 mov rsi,rdx
1529
1530
Robert Sloanab8b8882018-03-26 11:39:51 -07001531
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001532 lea rcx,[OPENSSL_ia32cap_P]
1533 mov rcx,QWORD[8+rcx]
1534 and ecx,0x80100
Adam Langleyfad63272015-11-12 12:15:39 -08001535 push rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001536
Adam Langleyfad63272015-11-12 12:15:39 -08001537 push rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07001538
Adam Langleyfad63272015-11-12 12:15:39 -08001539 push r12
Robert Sloanab8b8882018-03-26 11:39:51 -07001540
Adam Langleyfad63272015-11-12 12:15:39 -08001541 push r13
Robert Sloanab8b8882018-03-26 11:39:51 -07001542
Adam Langleyfad63272015-11-12 12:15:39 -08001543 push r14
Robert Sloanab8b8882018-03-26 11:39:51 -07001544
Adam Langleyfad63272015-11-12 12:15:39 -08001545 push r15
Robert Sloanab8b8882018-03-26 11:39:51 -07001546
1547$L$sqr_body:
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001548 cmp ecx,0x80100
1549 je NEAR $L$sqr_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001550 mov rax,QWORD[rsi]
1551 mov r14,QWORD[8+rsi]
1552 mov r15,QWORD[16+rsi]
1553 mov r8,QWORD[24+rsi]
1554
1555 call __ecp_nistz256_sqr_montq
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001556 jmp NEAR $L$sqr_mont_done
1557
1558ALIGN 32
1559$L$sqr_montx:
1560 mov rdx,QWORD[rsi]
1561 mov r14,QWORD[8+rsi]
1562 mov r15,QWORD[16+rsi]
1563 mov r8,QWORD[24+rsi]
1564 lea rsi,[((-128))+rsi]
1565
1566 call __ecp_nistz256_sqr_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001567$L$sqr_mont_done:
Robert Sloanab8b8882018-03-26 11:39:51 -07001568 mov r15,QWORD[rsp]
1569
1570 mov r14,QWORD[8+rsp]
1571
1572 mov r13,QWORD[16+rsp]
1573
1574 mov r12,QWORD[24+rsp]
1575
1576 mov rbx,QWORD[32+rsp]
1577
1578 mov rbp,QWORD[40+rsp]
1579
1580 lea rsp,[48+rsp]
1581
1582$L$sqr_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08001583 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1584 mov rsi,QWORD[16+rsp]
1585 DB 0F3h,0C3h ;repret
Robert Sloanab8b8882018-03-26 11:39:51 -07001586
Adam Langleyfad63272015-11-12 12:15:39 -08001587$L$SEH_end_ecp_nistz256_sqr_mont:
1588
1589
1590ALIGN 32
1591__ecp_nistz256_sqr_montq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001592
Adam Langleyfad63272015-11-12 12:15:39 -08001593 mov r13,rax
1594 mul r14
1595 mov r9,rax
1596 mov rax,r15
1597 mov r10,rdx
1598
1599 mul r13
1600 add r10,rax
1601 mov rax,r8
1602 adc rdx,0
1603 mov r11,rdx
1604
1605 mul r13
1606 add r11,rax
1607 mov rax,r15
1608 adc rdx,0
1609 mov r12,rdx
1610
1611
1612 mul r14
1613 add r11,rax
1614 mov rax,r8
1615 adc rdx,0
1616 mov rbp,rdx
1617
1618 mul r14
1619 add r12,rax
1620 mov rax,r8
1621 adc rdx,0
1622 add r12,rbp
1623 mov r13,rdx
1624 adc r13,0
1625
1626
1627 mul r15
1628 xor r15,r15
1629 add r13,rax
1630 mov rax,QWORD[rsi]
1631 mov r14,rdx
1632 adc r14,0
1633
1634 add r9,r9
1635 adc r10,r10
1636 adc r11,r11
1637 adc r12,r12
1638 adc r13,r13
1639 adc r14,r14
1640 adc r15,0
1641
1642 mul rax
1643 mov r8,rax
1644 mov rax,QWORD[8+rsi]
1645 mov rcx,rdx
1646
1647 mul rax
1648 add r9,rcx
1649 adc r10,rax
1650 mov rax,QWORD[16+rsi]
1651 adc rdx,0
1652 mov rcx,rdx
1653
1654 mul rax
1655 add r11,rcx
1656 adc r12,rax
1657 mov rax,QWORD[24+rsi]
1658 adc rdx,0
1659 mov rcx,rdx
1660
1661 mul rax
1662 add r13,rcx
1663 adc r14,rax
1664 mov rax,r8
1665 adc r15,rdx
1666
1667 mov rsi,QWORD[(($L$poly+8))]
1668 mov rbp,QWORD[(($L$poly+24))]
1669
1670
1671
1672
1673 mov rcx,r8
1674 shl r8,32
1675 mul rbp
1676 shr rcx,32
1677 add r9,r8
1678 adc r10,rcx
1679 adc r11,rax
1680 mov rax,r9
1681 adc rdx,0
1682
1683
1684
1685 mov rcx,r9
1686 shl r9,32
1687 mov r8,rdx
1688 mul rbp
1689 shr rcx,32
1690 add r10,r9
1691 adc r11,rcx
1692 adc r8,rax
1693 mov rax,r10
1694 adc rdx,0
1695
1696
1697
1698 mov rcx,r10
1699 shl r10,32
1700 mov r9,rdx
1701 mul rbp
1702 shr rcx,32
1703 add r11,r10
1704 adc r8,rcx
1705 adc r9,rax
1706 mov rax,r11
1707 adc rdx,0
1708
1709
1710
1711 mov rcx,r11
1712 shl r11,32
1713 mov r10,rdx
1714 mul rbp
1715 shr rcx,32
1716 add r8,r11
1717 adc r9,rcx
1718 adc r10,rax
1719 adc rdx,0
1720 xor r11,r11
1721
1722
1723
1724 add r12,r8
1725 adc r13,r9
1726 mov r8,r12
1727 adc r14,r10
1728 adc r15,rdx
1729 mov r9,r13
1730 adc r11,0
1731
1732 sub r12,-1
1733 mov r10,r14
1734 sbb r13,rsi
1735 sbb r14,0
1736 mov rcx,r15
1737 sbb r15,rbp
1738 sbb r11,0
1739
1740 cmovc r12,r8
1741 cmovc r13,r9
1742 mov QWORD[rdi],r12
1743 cmovc r14,r10
1744 mov QWORD[8+rdi],r13
1745 cmovc r15,rcx
1746 mov QWORD[16+rdi],r14
1747 mov QWORD[24+rdi],r15
1748
1749 DB 0F3h,0C3h ;repret
1750
1751
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001752
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001753ALIGN 32
1754__ecp_nistz256_mul_montx:
1755
1756
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001757
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001758 mulx r9,r8,r9
1759 mulx r10,rcx,r10
1760 mov r14,32
1761 xor r13,r13
1762 mulx r11,rbp,r11
1763 mov r15,QWORD[(($L$poly+24))]
1764 adc r9,rcx
1765 mulx r12,rcx,r12
1766 mov rdx,r8
1767 adc r10,rbp
1768 shlx rbp,r8,r14
1769 adc r11,rcx
1770 shrx rcx,r8,r14
1771 adc r12,0
1772
1773
1774
1775 add r9,rbp
1776 adc r10,rcx
1777
1778 mulx rbp,rcx,r15
1779 mov rdx,QWORD[8+rbx]
1780 adc r11,rcx
1781 adc r12,rbp
1782 adc r13,0
1783 xor r8,r8
1784
1785
1786
1787 mulx rbp,rcx,QWORD[((0+128))+rsi]
1788 adcx r9,rcx
1789 adox r10,rbp
1790
1791 mulx rbp,rcx,QWORD[((8+128))+rsi]
1792 adcx r10,rcx
1793 adox r11,rbp
1794
1795 mulx rbp,rcx,QWORD[((16+128))+rsi]
1796 adcx r11,rcx
1797 adox r12,rbp
1798
1799 mulx rbp,rcx,QWORD[((24+128))+rsi]
1800 mov rdx,r9
1801 adcx r12,rcx
1802 shlx rcx,r9,r14
1803 adox r13,rbp
1804 shrx rbp,r9,r14
1805
1806 adcx r13,r8
1807 adox r8,r8
1808 adc r8,0
1809
1810
1811
1812 add r10,rcx
1813 adc r11,rbp
1814
1815 mulx rbp,rcx,r15
1816 mov rdx,QWORD[16+rbx]
1817 adc r12,rcx
1818 adc r13,rbp
1819 adc r8,0
1820 xor r9,r9
1821
1822
1823
1824 mulx rbp,rcx,QWORD[((0+128))+rsi]
1825 adcx r10,rcx
1826 adox r11,rbp
1827
1828 mulx rbp,rcx,QWORD[((8+128))+rsi]
1829 adcx r11,rcx
1830 adox r12,rbp
1831
1832 mulx rbp,rcx,QWORD[((16+128))+rsi]
1833 adcx r12,rcx
1834 adox r13,rbp
1835
1836 mulx rbp,rcx,QWORD[((24+128))+rsi]
1837 mov rdx,r10
1838 adcx r13,rcx
1839 shlx rcx,r10,r14
1840 adox r8,rbp
1841 shrx rbp,r10,r14
1842
1843 adcx r8,r9
1844 adox r9,r9
1845 adc r9,0
1846
1847
1848
1849 add r11,rcx
1850 adc r12,rbp
1851
1852 mulx rbp,rcx,r15
1853 mov rdx,QWORD[24+rbx]
1854 adc r13,rcx
1855 adc r8,rbp
1856 adc r9,0
1857 xor r10,r10
1858
1859
1860
1861 mulx rbp,rcx,QWORD[((0+128))+rsi]
1862 adcx r11,rcx
1863 adox r12,rbp
1864
1865 mulx rbp,rcx,QWORD[((8+128))+rsi]
1866 adcx r12,rcx
1867 adox r13,rbp
1868
1869 mulx rbp,rcx,QWORD[((16+128))+rsi]
1870 adcx r13,rcx
1871 adox r8,rbp
1872
1873 mulx rbp,rcx,QWORD[((24+128))+rsi]
1874 mov rdx,r11
1875 adcx r8,rcx
1876 shlx rcx,r11,r14
1877 adox r9,rbp
1878 shrx rbp,r11,r14
1879
1880 adcx r9,r10
1881 adox r10,r10
1882 adc r10,0
1883
1884
1885
1886 add r12,rcx
1887 adc r13,rbp
1888
1889 mulx rbp,rcx,r15
1890 mov rbx,r12
1891 mov r14,QWORD[(($L$poly+8))]
1892 adc r8,rcx
1893 mov rdx,r13
1894 adc r9,rbp
1895 adc r10,0
1896
1897
1898
1899 xor eax,eax
1900 mov rcx,r8
1901 sbb r12,-1
1902 sbb r13,r14
1903 sbb r8,0
1904 mov rbp,r9
1905 sbb r9,r15
1906 sbb r10,0
1907
1908 cmovc r12,rbx
1909 cmovc r13,rdx
1910 mov QWORD[rdi],r12
1911 cmovc r8,rcx
1912 mov QWORD[8+rdi],r13
1913 cmovc r9,rbp
1914 mov QWORD[16+rdi],r8
1915 mov QWORD[24+rdi],r9
1916
1917 DB 0F3h,0C3h ;repret
1918
1919
1920
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001921
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001922ALIGN 32
1923__ecp_nistz256_sqr_montx:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001924
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001925 mulx r10,r9,r14
1926 mulx r11,rcx,r15
1927 xor eax,eax
1928 adc r10,rcx
1929 mulx r12,rbp,r8
1930 mov rdx,r14
1931 adc r11,rbp
1932 adc r12,0
1933 xor r13,r13
1934
1935
1936 mulx rbp,rcx,r15
1937 adcx r11,rcx
1938 adox r12,rbp
1939
1940 mulx rbp,rcx,r8
1941 mov rdx,r15
1942 adcx r12,rcx
1943 adox r13,rbp
1944 adc r13,0
1945
1946
1947 mulx r14,rcx,r8
1948 mov rdx,QWORD[((0+128))+rsi]
1949 xor r15,r15
1950 adcx r9,r9
1951 adox r13,rcx
1952 adcx r10,r10
1953 adox r14,r15
1954
1955 mulx rbp,r8,rdx
1956 mov rdx,QWORD[((8+128))+rsi]
1957 adcx r11,r11
1958 adox r9,rbp
1959 adcx r12,r12
1960 mulx rax,rcx,rdx
1961 mov rdx,QWORD[((16+128))+rsi]
1962 adcx r13,r13
1963 adox r10,rcx
1964 adcx r14,r14
1965DB 0x67
1966 mulx rbp,rcx,rdx
1967 mov rdx,QWORD[((24+128))+rsi]
1968 adox r11,rax
1969 adcx r15,r15
1970 adox r12,rcx
1971 mov rsi,32
1972 adox r13,rbp
1973DB 0x67,0x67
1974 mulx rax,rcx,rdx
1975 mov rdx,QWORD[(($L$poly+24))]
1976 adox r14,rcx
1977 shlx rcx,r8,rsi
1978 adox r15,rax
1979 shrx rax,r8,rsi
1980 mov rbp,rdx
1981
1982
1983 add r9,rcx
1984 adc r10,rax
1985
1986 mulx r8,rcx,r8
1987 adc r11,rcx
1988 shlx rcx,r9,rsi
1989 adc r8,0
1990 shrx rax,r9,rsi
1991
1992
1993 add r10,rcx
1994 adc r11,rax
1995
1996 mulx r9,rcx,r9
1997 adc r8,rcx
1998 shlx rcx,r10,rsi
1999 adc r9,0
2000 shrx rax,r10,rsi
2001
2002
2003 add r11,rcx
2004 adc r8,rax
2005
2006 mulx r10,rcx,r10
2007 adc r9,rcx
2008 shlx rcx,r11,rsi
2009 adc r10,0
2010 shrx rax,r11,rsi
2011
2012
2013 add r8,rcx
2014 adc r9,rax
2015
2016 mulx r11,rcx,r11
2017 adc r10,rcx
2018 adc r11,0
2019
2020 xor rdx,rdx
2021 add r12,r8
2022 mov rsi,QWORD[(($L$poly+8))]
2023 adc r13,r9
2024 mov r8,r12
2025 adc r14,r10
2026 adc r15,r11
2027 mov r9,r13
2028 adc rdx,0
2029
2030 sub r12,-1
2031 mov r10,r14
2032 sbb r13,rsi
2033 sbb r14,0
2034 mov r11,r15
2035 sbb r15,rbp
2036 sbb rdx,0
2037
2038 cmovc r12,r8
2039 cmovc r13,r9
2040 mov QWORD[rdi],r12
2041 cmovc r14,r10
2042 mov QWORD[8+rdi],r13
2043 cmovc r15,r11
2044 mov QWORD[16+rdi],r14
2045 mov QWORD[24+rdi],r15
2046
2047 DB 0F3h,0C3h ;repret
2048
2049
Adam Langleyfad63272015-11-12 12:15:39 -08002050
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002051
Adam Langleyfad63272015-11-12 12:15:39 -08002052global ecp_nistz256_select_w5
2053
2054ALIGN 32
2055ecp_nistz256_select_w5:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002056
Robert Sloan8f860b12017-08-28 07:37:06 -07002057 lea rax,[OPENSSL_ia32cap_P]
2058 mov rax,QWORD[8+rax]
2059 test eax,32
2060 jnz NEAR $L$avx2_select_w5
Adam Langleyfad63272015-11-12 12:15:39 -08002061 lea rax,[((-136))+rsp]
2062$L$SEH_begin_ecp_nistz256_select_w5:
2063DB 0x48,0x8d,0x60,0xe0
2064DB 0x0f,0x29,0x70,0xe0
2065DB 0x0f,0x29,0x78,0xf0
2066DB 0x44,0x0f,0x29,0x00
2067DB 0x44,0x0f,0x29,0x48,0x10
2068DB 0x44,0x0f,0x29,0x50,0x20
2069DB 0x44,0x0f,0x29,0x58,0x30
2070DB 0x44,0x0f,0x29,0x60,0x40
2071DB 0x44,0x0f,0x29,0x68,0x50
2072DB 0x44,0x0f,0x29,0x70,0x60
2073DB 0x44,0x0f,0x29,0x78,0x70
2074 movdqa xmm0,XMMWORD[$L$One]
2075 movd xmm1,r8d
2076
2077 pxor xmm2,xmm2
2078 pxor xmm3,xmm3
2079 pxor xmm4,xmm4
2080 pxor xmm5,xmm5
2081 pxor xmm6,xmm6
2082 pxor xmm7,xmm7
2083
2084 movdqa xmm8,xmm0
2085 pshufd xmm1,xmm1,0
2086
2087 mov rax,16
2088$L$select_loop_sse_w5:
2089
2090 movdqa xmm15,xmm8
2091 paddd xmm8,xmm0
2092 pcmpeqd xmm15,xmm1
2093
2094 movdqa xmm9,XMMWORD[rdx]
2095 movdqa xmm10,XMMWORD[16+rdx]
2096 movdqa xmm11,XMMWORD[32+rdx]
2097 movdqa xmm12,XMMWORD[48+rdx]
2098 movdqa xmm13,XMMWORD[64+rdx]
2099 movdqa xmm14,XMMWORD[80+rdx]
2100 lea rdx,[96+rdx]
2101
2102 pand xmm9,xmm15
2103 pand xmm10,xmm15
2104 por xmm2,xmm9
2105 pand xmm11,xmm15
2106 por xmm3,xmm10
2107 pand xmm12,xmm15
2108 por xmm4,xmm11
2109 pand xmm13,xmm15
2110 por xmm5,xmm12
2111 pand xmm14,xmm15
2112 por xmm6,xmm13
2113 por xmm7,xmm14
2114
2115 dec rax
2116 jnz NEAR $L$select_loop_sse_w5
2117
2118 movdqu XMMWORD[rcx],xmm2
2119 movdqu XMMWORD[16+rcx],xmm3
2120 movdqu XMMWORD[32+rcx],xmm4
2121 movdqu XMMWORD[48+rcx],xmm5
2122 movdqu XMMWORD[64+rcx],xmm6
2123 movdqu XMMWORD[80+rcx],xmm7
2124 movaps xmm6,XMMWORD[rsp]
2125 movaps xmm7,XMMWORD[16+rsp]
2126 movaps xmm8,XMMWORD[32+rsp]
2127 movaps xmm9,XMMWORD[48+rsp]
2128 movaps xmm10,XMMWORD[64+rsp]
2129 movaps xmm11,XMMWORD[80+rsp]
2130 movaps xmm12,XMMWORD[96+rsp]
2131 movaps xmm13,XMMWORD[112+rsp]
2132 movaps xmm14,XMMWORD[128+rsp]
2133 movaps xmm15,XMMWORD[144+rsp]
2134 lea rsp,[168+rsp]
Adam Langleyfad63272015-11-12 12:15:39 -08002135 DB 0F3h,0C3h ;repret
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002136
Robert Sloanab8b8882018-03-26 11:39:51 -07002137$L$SEH_end_ecp_nistz256_select_w5:
Adam Langleyfad63272015-11-12 12:15:39 -08002138
2139
2140
2141
2142global ecp_nistz256_select_w7
2143
2144ALIGN 32
2145ecp_nistz256_select_w7:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002146
Robert Sloan8f860b12017-08-28 07:37:06 -07002147 lea rax,[OPENSSL_ia32cap_P]
2148 mov rax,QWORD[8+rax]
2149 test eax,32
2150 jnz NEAR $L$avx2_select_w7
Adam Langleyfad63272015-11-12 12:15:39 -08002151 lea rax,[((-136))+rsp]
2152$L$SEH_begin_ecp_nistz256_select_w7:
2153DB 0x48,0x8d,0x60,0xe0
2154DB 0x0f,0x29,0x70,0xe0
2155DB 0x0f,0x29,0x78,0xf0
2156DB 0x44,0x0f,0x29,0x00
2157DB 0x44,0x0f,0x29,0x48,0x10
2158DB 0x44,0x0f,0x29,0x50,0x20
2159DB 0x44,0x0f,0x29,0x58,0x30
2160DB 0x44,0x0f,0x29,0x60,0x40
2161DB 0x44,0x0f,0x29,0x68,0x50
2162DB 0x44,0x0f,0x29,0x70,0x60
2163DB 0x44,0x0f,0x29,0x78,0x70
2164 movdqa xmm8,XMMWORD[$L$One]
2165 movd xmm1,r8d
2166
2167 pxor xmm2,xmm2
2168 pxor xmm3,xmm3
2169 pxor xmm4,xmm4
2170 pxor xmm5,xmm5
2171
2172 movdqa xmm0,xmm8
2173 pshufd xmm1,xmm1,0
2174 mov rax,64
2175
2176$L$select_loop_sse_w7:
2177 movdqa xmm15,xmm8
2178 paddd xmm8,xmm0
2179 movdqa xmm9,XMMWORD[rdx]
2180 movdqa xmm10,XMMWORD[16+rdx]
2181 pcmpeqd xmm15,xmm1
2182 movdqa xmm11,XMMWORD[32+rdx]
2183 movdqa xmm12,XMMWORD[48+rdx]
2184 lea rdx,[64+rdx]
2185
2186 pand xmm9,xmm15
2187 pand xmm10,xmm15
2188 por xmm2,xmm9
2189 pand xmm11,xmm15
2190 por xmm3,xmm10
2191 pand xmm12,xmm15
2192 por xmm4,xmm11
2193 prefetcht0 [255+rdx]
2194 por xmm5,xmm12
2195
2196 dec rax
2197 jnz NEAR $L$select_loop_sse_w7
2198
2199 movdqu XMMWORD[rcx],xmm2
2200 movdqu XMMWORD[16+rcx],xmm3
2201 movdqu XMMWORD[32+rcx],xmm4
2202 movdqu XMMWORD[48+rcx],xmm5
2203 movaps xmm6,XMMWORD[rsp]
2204 movaps xmm7,XMMWORD[16+rsp]
2205 movaps xmm8,XMMWORD[32+rsp]
2206 movaps xmm9,XMMWORD[48+rsp]
2207 movaps xmm10,XMMWORD[64+rsp]
2208 movaps xmm11,XMMWORD[80+rsp]
2209 movaps xmm12,XMMWORD[96+rsp]
2210 movaps xmm13,XMMWORD[112+rsp]
2211 movaps xmm14,XMMWORD[128+rsp]
2212 movaps xmm15,XMMWORD[144+rsp]
2213 lea rsp,[168+rsp]
Adam Langleyfad63272015-11-12 12:15:39 -08002214 DB 0F3h,0C3h ;repret
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002215
Robert Sloanab8b8882018-03-26 11:39:51 -07002216$L$SEH_end_ecp_nistz256_select_w7:
Adam Langleyfad63272015-11-12 12:15:39 -08002217
Robert Sloan8f860b12017-08-28 07:37:06 -07002218
2219
2220
2221ALIGN 32
2222ecp_nistz256_avx2_select_w5:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002223
Robert Sloan8f860b12017-08-28 07:37:06 -07002224$L$avx2_select_w5:
2225 vzeroupper
2226 lea rax,[((-136))+rsp]
Robert Sloanab8b8882018-03-26 11:39:51 -07002227 mov r11,rsp
Robert Sloan8f860b12017-08-28 07:37:06 -07002228$L$SEH_begin_ecp_nistz256_avx2_select_w5:
2229DB 0x48,0x8d,0x60,0xe0
2230DB 0xc5,0xf8,0x29,0x70,0xe0
2231DB 0xc5,0xf8,0x29,0x78,0xf0
2232DB 0xc5,0x78,0x29,0x40,0x00
2233DB 0xc5,0x78,0x29,0x48,0x10
2234DB 0xc5,0x78,0x29,0x50,0x20
2235DB 0xc5,0x78,0x29,0x58,0x30
2236DB 0xc5,0x78,0x29,0x60,0x40
2237DB 0xc5,0x78,0x29,0x68,0x50
2238DB 0xc5,0x78,0x29,0x70,0x60
2239DB 0xc5,0x78,0x29,0x78,0x70
2240 vmovdqa ymm0,YMMWORD[$L$Two]
2241
2242 vpxor ymm2,ymm2,ymm2
2243 vpxor ymm3,ymm3,ymm3
2244 vpxor ymm4,ymm4,ymm4
2245
2246 vmovdqa ymm5,YMMWORD[$L$One]
2247 vmovdqa ymm10,YMMWORD[$L$Two]
2248
2249 vmovd xmm1,r8d
2250 vpermd ymm1,ymm2,ymm1
2251
2252 mov rax,8
2253$L$select_loop_avx2_w5:
2254
2255 vmovdqa ymm6,YMMWORD[rdx]
2256 vmovdqa ymm7,YMMWORD[32+rdx]
2257 vmovdqa ymm8,YMMWORD[64+rdx]
2258
2259 vmovdqa ymm11,YMMWORD[96+rdx]
2260 vmovdqa ymm12,YMMWORD[128+rdx]
2261 vmovdqa ymm13,YMMWORD[160+rdx]
2262
2263 vpcmpeqd ymm9,ymm5,ymm1
2264 vpcmpeqd ymm14,ymm10,ymm1
2265
2266 vpaddd ymm5,ymm5,ymm0
2267 vpaddd ymm10,ymm10,ymm0
2268 lea rdx,[192+rdx]
2269
2270 vpand ymm6,ymm6,ymm9
2271 vpand ymm7,ymm7,ymm9
2272 vpand ymm8,ymm8,ymm9
2273 vpand ymm11,ymm11,ymm14
2274 vpand ymm12,ymm12,ymm14
2275 vpand ymm13,ymm13,ymm14
2276
2277 vpxor ymm2,ymm2,ymm6
2278 vpxor ymm3,ymm3,ymm7
2279 vpxor ymm4,ymm4,ymm8
2280 vpxor ymm2,ymm2,ymm11
2281 vpxor ymm3,ymm3,ymm12
2282 vpxor ymm4,ymm4,ymm13
2283
2284 dec rax
2285 jnz NEAR $L$select_loop_avx2_w5
2286
2287 vmovdqu YMMWORD[rcx],ymm2
2288 vmovdqu YMMWORD[32+rcx],ymm3
2289 vmovdqu YMMWORD[64+rcx],ymm4
2290 vzeroupper
2291 movaps xmm6,XMMWORD[rsp]
2292 movaps xmm7,XMMWORD[16+rsp]
2293 movaps xmm8,XMMWORD[32+rsp]
2294 movaps xmm9,XMMWORD[48+rsp]
2295 movaps xmm10,XMMWORD[64+rsp]
2296 movaps xmm11,XMMWORD[80+rsp]
2297 movaps xmm12,XMMWORD[96+rsp]
2298 movaps xmm13,XMMWORD[112+rsp]
2299 movaps xmm14,XMMWORD[128+rsp]
2300 movaps xmm15,XMMWORD[144+rsp]
Robert Sloanab8b8882018-03-26 11:39:51 -07002301 lea rsp,[r11]
Robert Sloan8f860b12017-08-28 07:37:06 -07002302 DB 0F3h,0C3h ;repret
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002303
Robert Sloanab8b8882018-03-26 11:39:51 -07002304$L$SEH_end_ecp_nistz256_avx2_select_w5:
Robert Sloan8f860b12017-08-28 07:37:06 -07002305
2306
2307
2308
Adam Langleyfad63272015-11-12 12:15:39 -08002309global ecp_nistz256_avx2_select_w7
2310
2311ALIGN 32
2312ecp_nistz256_avx2_select_w7:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002313
Robert Sloan8f860b12017-08-28 07:37:06 -07002314$L$avx2_select_w7:
2315 vzeroupper
Robert Sloanab8b8882018-03-26 11:39:51 -07002316 mov r11,rsp
Robert Sloan8f860b12017-08-28 07:37:06 -07002317 lea rax,[((-136))+rsp]
Adam Langleyfad63272015-11-12 12:15:39 -08002318$L$SEH_begin_ecp_nistz256_avx2_select_w7:
Robert Sloan8f860b12017-08-28 07:37:06 -07002319DB 0x48,0x8d,0x60,0xe0
2320DB 0xc5,0xf8,0x29,0x70,0xe0
2321DB 0xc5,0xf8,0x29,0x78,0xf0
2322DB 0xc5,0x78,0x29,0x40,0x00
2323DB 0xc5,0x78,0x29,0x48,0x10
2324DB 0xc5,0x78,0x29,0x50,0x20
2325DB 0xc5,0x78,0x29,0x58,0x30
2326DB 0xc5,0x78,0x29,0x60,0x40
2327DB 0xc5,0x78,0x29,0x68,0x50
2328DB 0xc5,0x78,0x29,0x70,0x60
2329DB 0xc5,0x78,0x29,0x78,0x70
2330 vmovdqa ymm0,YMMWORD[$L$Three]
2331
2332 vpxor ymm2,ymm2,ymm2
2333 vpxor ymm3,ymm3,ymm3
2334
2335 vmovdqa ymm4,YMMWORD[$L$One]
2336 vmovdqa ymm8,YMMWORD[$L$Two]
2337 vmovdqa ymm12,YMMWORD[$L$Three]
2338
2339 vmovd xmm1,r8d
2340 vpermd ymm1,ymm2,ymm1
Adam Langleyfad63272015-11-12 12:15:39 -08002341
2342
Robert Sloan8f860b12017-08-28 07:37:06 -07002343 mov rax,21
2344$L$select_loop_avx2_w7:
2345
2346 vmovdqa ymm5,YMMWORD[rdx]
2347 vmovdqa ymm6,YMMWORD[32+rdx]
2348
2349 vmovdqa ymm9,YMMWORD[64+rdx]
2350 vmovdqa ymm10,YMMWORD[96+rdx]
2351
2352 vmovdqa ymm13,YMMWORD[128+rdx]
2353 vmovdqa ymm14,YMMWORD[160+rdx]
2354
2355 vpcmpeqd ymm7,ymm4,ymm1
2356 vpcmpeqd ymm11,ymm8,ymm1
2357 vpcmpeqd ymm15,ymm12,ymm1
2358
2359 vpaddd ymm4,ymm4,ymm0
2360 vpaddd ymm8,ymm8,ymm0
2361 vpaddd ymm12,ymm12,ymm0
2362 lea rdx,[192+rdx]
2363
2364 vpand ymm5,ymm5,ymm7
2365 vpand ymm6,ymm6,ymm7
2366 vpand ymm9,ymm9,ymm11
2367 vpand ymm10,ymm10,ymm11
2368 vpand ymm13,ymm13,ymm15
2369 vpand ymm14,ymm14,ymm15
2370
2371 vpxor ymm2,ymm2,ymm5
2372 vpxor ymm3,ymm3,ymm6
2373 vpxor ymm2,ymm2,ymm9
2374 vpxor ymm3,ymm3,ymm10
2375 vpxor ymm2,ymm2,ymm13
2376 vpxor ymm3,ymm3,ymm14
2377
2378 dec rax
2379 jnz NEAR $L$select_loop_avx2_w7
2380
2381
2382 vmovdqa ymm5,YMMWORD[rdx]
2383 vmovdqa ymm6,YMMWORD[32+rdx]
2384
2385 vpcmpeqd ymm7,ymm4,ymm1
2386
2387 vpand ymm5,ymm5,ymm7
2388 vpand ymm6,ymm6,ymm7
2389
2390 vpxor ymm2,ymm2,ymm5
2391 vpxor ymm3,ymm3,ymm6
2392
2393 vmovdqu YMMWORD[rcx],ymm2
2394 vmovdqu YMMWORD[32+rcx],ymm3
2395 vzeroupper
2396 movaps xmm6,XMMWORD[rsp]
2397 movaps xmm7,XMMWORD[16+rsp]
2398 movaps xmm8,XMMWORD[32+rsp]
2399 movaps xmm9,XMMWORD[48+rsp]
2400 movaps xmm10,XMMWORD[64+rsp]
2401 movaps xmm11,XMMWORD[80+rsp]
2402 movaps xmm12,XMMWORD[96+rsp]
2403 movaps xmm13,XMMWORD[112+rsp]
2404 movaps xmm14,XMMWORD[128+rsp]
2405 movaps xmm15,XMMWORD[144+rsp]
Robert Sloanab8b8882018-03-26 11:39:51 -07002406 lea rsp,[r11]
Robert Sloan8f860b12017-08-28 07:37:06 -07002407 DB 0F3h,0C3h ;repret
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002408
Robert Sloanab8b8882018-03-26 11:39:51 -07002409$L$SEH_end_ecp_nistz256_avx2_select_w7:
Robert Sloan8f860b12017-08-28 07:37:06 -07002410
Adam Langleyfad63272015-11-12 12:15:39 -08002411
2412ALIGN 32
2413__ecp_nistz256_add_toq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002414
Steven Valdez909b19f2016-11-21 15:35:44 -05002415 xor r11,r11
Adam Langleyfad63272015-11-12 12:15:39 -08002416 add r12,QWORD[rbx]
2417 adc r13,QWORD[8+rbx]
2418 mov rax,r12
2419 adc r8,QWORD[16+rbx]
2420 adc r9,QWORD[24+rbx]
2421 mov rbp,r13
Steven Valdez909b19f2016-11-21 15:35:44 -05002422 adc r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08002423
2424 sub r12,-1
2425 mov rcx,r8
2426 sbb r13,r14
2427 sbb r8,0
2428 mov r10,r9
2429 sbb r9,r15
Steven Valdez909b19f2016-11-21 15:35:44 -05002430 sbb r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08002431
Steven Valdez909b19f2016-11-21 15:35:44 -05002432 cmovc r12,rax
2433 cmovc r13,rbp
Adam Langleyfad63272015-11-12 12:15:39 -08002434 mov QWORD[rdi],r12
Steven Valdez909b19f2016-11-21 15:35:44 -05002435 cmovc r8,rcx
Adam Langleyfad63272015-11-12 12:15:39 -08002436 mov QWORD[8+rdi],r13
Steven Valdez909b19f2016-11-21 15:35:44 -05002437 cmovc r9,r10
Adam Langleyfad63272015-11-12 12:15:39 -08002438 mov QWORD[16+rdi],r8
2439 mov QWORD[24+rdi],r9
2440
2441 DB 0F3h,0C3h ;repret
2442
2443
2444
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002445
Adam Langleyfad63272015-11-12 12:15:39 -08002446ALIGN 32
2447__ecp_nistz256_sub_fromq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002448
Adam Langleyfad63272015-11-12 12:15:39 -08002449 sub r12,QWORD[rbx]
2450 sbb r13,QWORD[8+rbx]
2451 mov rax,r12
2452 sbb r8,QWORD[16+rbx]
2453 sbb r9,QWORD[24+rbx]
2454 mov rbp,r13
2455 sbb r11,r11
2456
2457 add r12,-1
2458 mov rcx,r8
2459 adc r13,r14
2460 adc r8,0
2461 mov r10,r9
2462 adc r9,r15
2463 test r11,r11
2464
2465 cmovz r12,rax
2466 cmovz r13,rbp
2467 mov QWORD[rdi],r12
2468 cmovz r8,rcx
2469 mov QWORD[8+rdi],r13
2470 cmovz r9,r10
2471 mov QWORD[16+rdi],r8
2472 mov QWORD[24+rdi],r9
2473
2474 DB 0F3h,0C3h ;repret
2475
2476
2477
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002478
Adam Langleyfad63272015-11-12 12:15:39 -08002479ALIGN 32
2480__ecp_nistz256_subq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002481
Adam Langleyfad63272015-11-12 12:15:39 -08002482 sub rax,r12
2483 sbb rbp,r13
2484 mov r12,rax
2485 sbb rcx,r8
2486 sbb r10,r9
2487 mov r13,rbp
2488 sbb r11,r11
2489
2490 add rax,-1
2491 mov r8,rcx
2492 adc rbp,r14
2493 adc rcx,0
2494 mov r9,r10
2495 adc r10,r15
2496 test r11,r11
2497
2498 cmovnz r12,rax
2499 cmovnz r13,rbp
2500 cmovnz r8,rcx
2501 cmovnz r9,r10
2502
2503 DB 0F3h,0C3h ;repret
2504
2505
2506
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002507
Adam Langleyfad63272015-11-12 12:15:39 -08002508ALIGN 32
2509__ecp_nistz256_mul_by_2q:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002510
Steven Valdez909b19f2016-11-21 15:35:44 -05002511 xor r11,r11
Adam Langleyfad63272015-11-12 12:15:39 -08002512 add r12,r12
2513 adc r13,r13
2514 mov rax,r12
2515 adc r8,r8
2516 adc r9,r9
2517 mov rbp,r13
Steven Valdez909b19f2016-11-21 15:35:44 -05002518 adc r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08002519
2520 sub r12,-1
2521 mov rcx,r8
2522 sbb r13,r14
2523 sbb r8,0
2524 mov r10,r9
2525 sbb r9,r15
Steven Valdez909b19f2016-11-21 15:35:44 -05002526 sbb r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08002527
Steven Valdez909b19f2016-11-21 15:35:44 -05002528 cmovc r12,rax
2529 cmovc r13,rbp
Adam Langleyfad63272015-11-12 12:15:39 -08002530 mov QWORD[rdi],r12
Steven Valdez909b19f2016-11-21 15:35:44 -05002531 cmovc r8,rcx
Adam Langleyfad63272015-11-12 12:15:39 -08002532 mov QWORD[8+rdi],r13
Steven Valdez909b19f2016-11-21 15:35:44 -05002533 cmovc r9,r10
Adam Langleyfad63272015-11-12 12:15:39 -08002534 mov QWORD[16+rdi],r8
2535 mov QWORD[24+rdi],r9
2536
2537 DB 0F3h,0C3h ;repret
2538
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002539
Adam Langleyfad63272015-11-12 12:15:39 -08002540global ecp_nistz256_point_double
2541
2542ALIGN 32
2543ecp_nistz256_point_double:
2544 mov QWORD[8+rsp],rdi ;WIN64 prologue
2545 mov QWORD[16+rsp],rsi
2546 mov rax,rsp
2547$L$SEH_begin_ecp_nistz256_point_double:
2548 mov rdi,rcx
2549 mov rsi,rdx
2550
2551
Robert Sloanab8b8882018-03-26 11:39:51 -07002552
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002553 lea rcx,[OPENSSL_ia32cap_P]
2554 mov rcx,QWORD[8+rcx]
2555 and ecx,0x80100
2556 cmp ecx,0x80100
2557 je NEAR $L$point_doublex
Adam Langleyfad63272015-11-12 12:15:39 -08002558 push rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07002559
Adam Langleyfad63272015-11-12 12:15:39 -08002560 push rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07002561
Adam Langleyfad63272015-11-12 12:15:39 -08002562 push r12
Robert Sloanab8b8882018-03-26 11:39:51 -07002563
Adam Langleyfad63272015-11-12 12:15:39 -08002564 push r13
Robert Sloanab8b8882018-03-26 11:39:51 -07002565
Adam Langleyfad63272015-11-12 12:15:39 -08002566 push r14
Robert Sloanab8b8882018-03-26 11:39:51 -07002567
Adam Langleyfad63272015-11-12 12:15:39 -08002568 push r15
Robert Sloanab8b8882018-03-26 11:39:51 -07002569
Adam Langleyfad63272015-11-12 12:15:39 -08002570 sub rsp,32*5+8
2571
Robert Sloanab8b8882018-03-26 11:39:51 -07002572$L$point_doubleq_body:
2573
David Benjamin4969cc92016-04-22 15:02:23 -04002574$L$point_double_shortcutq:
Adam Langleyfad63272015-11-12 12:15:39 -08002575 movdqu xmm0,XMMWORD[rsi]
2576 mov rbx,rsi
2577 movdqu xmm1,XMMWORD[16+rsi]
2578 mov r12,QWORD[((32+0))+rsi]
2579 mov r13,QWORD[((32+8))+rsi]
2580 mov r8,QWORD[((32+16))+rsi]
2581 mov r9,QWORD[((32+24))+rsi]
2582 mov r14,QWORD[(($L$poly+8))]
2583 mov r15,QWORD[(($L$poly+24))]
2584 movdqa XMMWORD[96+rsp],xmm0
2585 movdqa XMMWORD[(96+16)+rsp],xmm1
2586 lea r10,[32+rdi]
2587 lea r11,[64+rdi]
2588DB 102,72,15,110,199
2589DB 102,73,15,110,202
2590DB 102,73,15,110,211
2591
2592 lea rdi,[rsp]
2593 call __ecp_nistz256_mul_by_2q
2594
2595 mov rax,QWORD[((64+0))+rsi]
2596 mov r14,QWORD[((64+8))+rsi]
2597 mov r15,QWORD[((64+16))+rsi]
2598 mov r8,QWORD[((64+24))+rsi]
2599 lea rsi,[((64-0))+rsi]
2600 lea rdi,[64+rsp]
2601 call __ecp_nistz256_sqr_montq
2602
2603 mov rax,QWORD[((0+0))+rsp]
2604 mov r14,QWORD[((8+0))+rsp]
2605 lea rsi,[((0+0))+rsp]
2606 mov r15,QWORD[((16+0))+rsp]
2607 mov r8,QWORD[((24+0))+rsp]
2608 lea rdi,[rsp]
2609 call __ecp_nistz256_sqr_montq
2610
2611 mov rax,QWORD[32+rbx]
2612 mov r9,QWORD[((64+0))+rbx]
2613 mov r10,QWORD[((64+8))+rbx]
2614 mov r11,QWORD[((64+16))+rbx]
2615 mov r12,QWORD[((64+24))+rbx]
2616 lea rsi,[((64-0))+rbx]
2617 lea rbx,[32+rbx]
2618DB 102,72,15,126,215
2619 call __ecp_nistz256_mul_montq
2620 call __ecp_nistz256_mul_by_2q
2621
2622 mov r12,QWORD[((96+0))+rsp]
2623 mov r13,QWORD[((96+8))+rsp]
2624 lea rbx,[64+rsp]
2625 mov r8,QWORD[((96+16))+rsp]
2626 mov r9,QWORD[((96+24))+rsp]
2627 lea rdi,[32+rsp]
2628 call __ecp_nistz256_add_toq
2629
2630 mov r12,QWORD[((96+0))+rsp]
2631 mov r13,QWORD[((96+8))+rsp]
2632 lea rbx,[64+rsp]
2633 mov r8,QWORD[((96+16))+rsp]
2634 mov r9,QWORD[((96+24))+rsp]
2635 lea rdi,[64+rsp]
2636 call __ecp_nistz256_sub_fromq
2637
2638 mov rax,QWORD[((0+0))+rsp]
2639 mov r14,QWORD[((8+0))+rsp]
2640 lea rsi,[((0+0))+rsp]
2641 mov r15,QWORD[((16+0))+rsp]
2642 mov r8,QWORD[((24+0))+rsp]
2643DB 102,72,15,126,207
2644 call __ecp_nistz256_sqr_montq
2645 xor r9,r9
2646 mov rax,r12
2647 add r12,-1
2648 mov r10,r13
2649 adc r13,rsi
2650 mov rcx,r14
2651 adc r14,0
2652 mov r8,r15
2653 adc r15,rbp
2654 adc r9,0
2655 xor rsi,rsi
2656 test rax,1
2657
2658 cmovz r12,rax
2659 cmovz r13,r10
2660 cmovz r14,rcx
2661 cmovz r15,r8
2662 cmovz r9,rsi
2663
2664 mov rax,r13
2665 shr r12,1
2666 shl rax,63
2667 mov r10,r14
2668 shr r13,1
2669 or r12,rax
2670 shl r10,63
2671 mov rcx,r15
2672 shr r14,1
2673 or r13,r10
2674 shl rcx,63
2675 mov QWORD[rdi],r12
2676 shr r15,1
2677 mov QWORD[8+rdi],r13
2678 shl r9,63
2679 or r14,rcx
2680 or r15,r9
2681 mov QWORD[16+rdi],r14
2682 mov QWORD[24+rdi],r15
2683 mov rax,QWORD[64+rsp]
2684 lea rbx,[64+rsp]
2685 mov r9,QWORD[((0+32))+rsp]
2686 mov r10,QWORD[((8+32))+rsp]
2687 lea rsi,[((0+32))+rsp]
2688 mov r11,QWORD[((16+32))+rsp]
2689 mov r12,QWORD[((24+32))+rsp]
2690 lea rdi,[32+rsp]
2691 call __ecp_nistz256_mul_montq
2692
2693 lea rdi,[128+rsp]
2694 call __ecp_nistz256_mul_by_2q
2695
2696 lea rbx,[32+rsp]
2697 lea rdi,[32+rsp]
2698 call __ecp_nistz256_add_toq
2699
2700 mov rax,QWORD[96+rsp]
2701 lea rbx,[96+rsp]
2702 mov r9,QWORD[((0+0))+rsp]
2703 mov r10,QWORD[((8+0))+rsp]
2704 lea rsi,[((0+0))+rsp]
2705 mov r11,QWORD[((16+0))+rsp]
2706 mov r12,QWORD[((24+0))+rsp]
2707 lea rdi,[rsp]
2708 call __ecp_nistz256_mul_montq
2709
2710 lea rdi,[128+rsp]
2711 call __ecp_nistz256_mul_by_2q
2712
2713 mov rax,QWORD[((0+32))+rsp]
2714 mov r14,QWORD[((8+32))+rsp]
2715 lea rsi,[((0+32))+rsp]
2716 mov r15,QWORD[((16+32))+rsp]
2717 mov r8,QWORD[((24+32))+rsp]
2718DB 102,72,15,126,199
2719 call __ecp_nistz256_sqr_montq
2720
2721 lea rbx,[128+rsp]
2722 mov r8,r14
2723 mov r9,r15
2724 mov r14,rsi
2725 mov r15,rbp
2726 call __ecp_nistz256_sub_fromq
2727
2728 mov rax,QWORD[((0+0))+rsp]
2729 mov rbp,QWORD[((0+8))+rsp]
2730 mov rcx,QWORD[((0+16))+rsp]
2731 mov r10,QWORD[((0+24))+rsp]
2732 lea rdi,[rsp]
2733 call __ecp_nistz256_subq
2734
2735 mov rax,QWORD[32+rsp]
2736 lea rbx,[32+rsp]
2737 mov r14,r12
2738 xor ecx,ecx
2739 mov QWORD[((0+0))+rsp],r12
2740 mov r10,r13
2741 mov QWORD[((0+8))+rsp],r13
2742 cmovz r11,r8
2743 mov QWORD[((0+16))+rsp],r8
2744 lea rsi,[((0-0))+rsp]
2745 cmovz r12,r9
2746 mov QWORD[((0+24))+rsp],r9
2747 mov r9,r14
2748 lea rdi,[rsp]
2749 call __ecp_nistz256_mul_montq
2750
2751DB 102,72,15,126,203
2752DB 102,72,15,126,207
2753 call __ecp_nistz256_sub_fromq
2754
Robert Sloanab8b8882018-03-26 11:39:51 -07002755 lea rsi,[((160+56))+rsp]
2756
2757 mov r15,QWORD[((-48))+rsi]
2758
2759 mov r14,QWORD[((-40))+rsi]
2760
2761 mov r13,QWORD[((-32))+rsi]
2762
2763 mov r12,QWORD[((-24))+rsi]
2764
2765 mov rbx,QWORD[((-16))+rsi]
2766
2767 mov rbp,QWORD[((-8))+rsi]
2768
2769 lea rsp,[rsi]
2770
2771$L$point_doubleq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08002772 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2773 mov rsi,QWORD[16+rsp]
2774 DB 0F3h,0C3h ;repret
Robert Sloanab8b8882018-03-26 11:39:51 -07002775
Adam Langleyfad63272015-11-12 12:15:39 -08002776$L$SEH_end_ecp_nistz256_point_double:
2777global ecp_nistz256_point_add
2778
2779ALIGN 32
2780ecp_nistz256_point_add:
2781 mov QWORD[8+rsp],rdi ;WIN64 prologue
2782 mov QWORD[16+rsp],rsi
2783 mov rax,rsp
2784$L$SEH_begin_ecp_nistz256_point_add:
2785 mov rdi,rcx
2786 mov rsi,rdx
2787 mov rdx,r8
2788
2789
Robert Sloanab8b8882018-03-26 11:39:51 -07002790
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002791 lea rcx,[OPENSSL_ia32cap_P]
2792 mov rcx,QWORD[8+rcx]
2793 and ecx,0x80100
2794 cmp ecx,0x80100
2795 je NEAR $L$point_addx
Adam Langleyfad63272015-11-12 12:15:39 -08002796 push rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07002797
Adam Langleyfad63272015-11-12 12:15:39 -08002798 push rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07002799
Adam Langleyfad63272015-11-12 12:15:39 -08002800 push r12
Robert Sloanab8b8882018-03-26 11:39:51 -07002801
Adam Langleyfad63272015-11-12 12:15:39 -08002802 push r13
Robert Sloanab8b8882018-03-26 11:39:51 -07002803
Adam Langleyfad63272015-11-12 12:15:39 -08002804 push r14
Robert Sloanab8b8882018-03-26 11:39:51 -07002805
Adam Langleyfad63272015-11-12 12:15:39 -08002806 push r15
Robert Sloanab8b8882018-03-26 11:39:51 -07002807
Adam Langleyfad63272015-11-12 12:15:39 -08002808 sub rsp,32*18+8
2809
Robert Sloanab8b8882018-03-26 11:39:51 -07002810$L$point_addq_body:
2811
Adam Langleyfad63272015-11-12 12:15:39 -08002812 movdqu xmm0,XMMWORD[rsi]
2813 movdqu xmm1,XMMWORD[16+rsi]
2814 movdqu xmm2,XMMWORD[32+rsi]
2815 movdqu xmm3,XMMWORD[48+rsi]
2816 movdqu xmm4,XMMWORD[64+rsi]
2817 movdqu xmm5,XMMWORD[80+rsi]
2818 mov rbx,rsi
2819 mov rsi,rdx
2820 movdqa XMMWORD[384+rsp],xmm0
2821 movdqa XMMWORD[(384+16)+rsp],xmm1
Adam Langleyfad63272015-11-12 12:15:39 -08002822 movdqa XMMWORD[416+rsp],xmm2
2823 movdqa XMMWORD[(416+16)+rsp],xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08002824 movdqa XMMWORD[448+rsp],xmm4
2825 movdqa XMMWORD[(448+16)+rsp],xmm5
Steven Valdez909b19f2016-11-21 15:35:44 -05002826 por xmm5,xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08002827
2828 movdqu xmm0,XMMWORD[rsi]
Steven Valdez909b19f2016-11-21 15:35:44 -05002829 pshufd xmm3,xmm5,0xb1
Adam Langleyfad63272015-11-12 12:15:39 -08002830 movdqu xmm1,XMMWORD[16+rsi]
2831 movdqu xmm2,XMMWORD[32+rsi]
2832 por xmm5,xmm3
2833 movdqu xmm3,XMMWORD[48+rsi]
2834 mov rax,QWORD[((64+0))+rsi]
2835 mov r14,QWORD[((64+8))+rsi]
2836 mov r15,QWORD[((64+16))+rsi]
2837 mov r8,QWORD[((64+24))+rsi]
2838 movdqa XMMWORD[480+rsp],xmm0
2839 pshufd xmm4,xmm5,0x1e
2840 movdqa XMMWORD[(480+16)+rsp],xmm1
Steven Valdez909b19f2016-11-21 15:35:44 -05002841 movdqu xmm0,XMMWORD[64+rsi]
2842 movdqu xmm1,XMMWORD[80+rsi]
Adam Langleyfad63272015-11-12 12:15:39 -08002843 movdqa XMMWORD[512+rsp],xmm2
2844 movdqa XMMWORD[(512+16)+rsp],xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08002845 por xmm5,xmm4
2846 pxor xmm4,xmm4
Steven Valdez909b19f2016-11-21 15:35:44 -05002847 por xmm1,xmm0
2848DB 102,72,15,110,199
Adam Langleyfad63272015-11-12 12:15:39 -08002849
2850 lea rsi,[((64-0))+rsi]
2851 mov QWORD[((544+0))+rsp],rax
2852 mov QWORD[((544+8))+rsp],r14
2853 mov QWORD[((544+16))+rsp],r15
2854 mov QWORD[((544+24))+rsp],r8
2855 lea rdi,[96+rsp]
2856 call __ecp_nistz256_sqr_montq
2857
2858 pcmpeqd xmm5,xmm4
Steven Valdez909b19f2016-11-21 15:35:44 -05002859 pshufd xmm4,xmm1,0xb1
2860 por xmm4,xmm1
Adam Langleyfad63272015-11-12 12:15:39 -08002861 pshufd xmm5,xmm5,0
2862 pshufd xmm3,xmm4,0x1e
2863 por xmm4,xmm3
2864 pxor xmm3,xmm3
2865 pcmpeqd xmm4,xmm3
2866 pshufd xmm4,xmm4,0
2867 mov rax,QWORD[((64+0))+rbx]
2868 mov r14,QWORD[((64+8))+rbx]
2869 mov r15,QWORD[((64+16))+rbx]
2870 mov r8,QWORD[((64+24))+rbx]
David Benjamin4969cc92016-04-22 15:02:23 -04002871DB 102,72,15,110,203
Adam Langleyfad63272015-11-12 12:15:39 -08002872
2873 lea rsi,[((64-0))+rbx]
2874 lea rdi,[32+rsp]
2875 call __ecp_nistz256_sqr_montq
2876
2877 mov rax,QWORD[544+rsp]
2878 lea rbx,[544+rsp]
2879 mov r9,QWORD[((0+96))+rsp]
2880 mov r10,QWORD[((8+96))+rsp]
2881 lea rsi,[((0+96))+rsp]
2882 mov r11,QWORD[((16+96))+rsp]
2883 mov r12,QWORD[((24+96))+rsp]
2884 lea rdi,[224+rsp]
2885 call __ecp_nistz256_mul_montq
2886
2887 mov rax,QWORD[448+rsp]
2888 lea rbx,[448+rsp]
2889 mov r9,QWORD[((0+32))+rsp]
2890 mov r10,QWORD[((8+32))+rsp]
2891 lea rsi,[((0+32))+rsp]
2892 mov r11,QWORD[((16+32))+rsp]
2893 mov r12,QWORD[((24+32))+rsp]
2894 lea rdi,[256+rsp]
2895 call __ecp_nistz256_mul_montq
2896
2897 mov rax,QWORD[416+rsp]
2898 lea rbx,[416+rsp]
2899 mov r9,QWORD[((0+224))+rsp]
2900 mov r10,QWORD[((8+224))+rsp]
2901 lea rsi,[((0+224))+rsp]
2902 mov r11,QWORD[((16+224))+rsp]
2903 mov r12,QWORD[((24+224))+rsp]
2904 lea rdi,[224+rsp]
2905 call __ecp_nistz256_mul_montq
2906
2907 mov rax,QWORD[512+rsp]
2908 lea rbx,[512+rsp]
2909 mov r9,QWORD[((0+256))+rsp]
2910 mov r10,QWORD[((8+256))+rsp]
2911 lea rsi,[((0+256))+rsp]
2912 mov r11,QWORD[((16+256))+rsp]
2913 mov r12,QWORD[((24+256))+rsp]
2914 lea rdi,[256+rsp]
2915 call __ecp_nistz256_mul_montq
2916
2917 lea rbx,[224+rsp]
2918 lea rdi,[64+rsp]
2919 call __ecp_nistz256_sub_fromq
2920
2921 or r12,r13
2922 movdqa xmm2,xmm4
2923 or r12,r8
2924 or r12,r9
2925 por xmm2,xmm5
2926DB 102,73,15,110,220
2927
2928 mov rax,QWORD[384+rsp]
2929 lea rbx,[384+rsp]
2930 mov r9,QWORD[((0+96))+rsp]
2931 mov r10,QWORD[((8+96))+rsp]
2932 lea rsi,[((0+96))+rsp]
2933 mov r11,QWORD[((16+96))+rsp]
2934 mov r12,QWORD[((24+96))+rsp]
2935 lea rdi,[160+rsp]
2936 call __ecp_nistz256_mul_montq
2937
2938 mov rax,QWORD[480+rsp]
2939 lea rbx,[480+rsp]
2940 mov r9,QWORD[((0+32))+rsp]
2941 mov r10,QWORD[((8+32))+rsp]
2942 lea rsi,[((0+32))+rsp]
2943 mov r11,QWORD[((16+32))+rsp]
2944 mov r12,QWORD[((24+32))+rsp]
2945 lea rdi,[192+rsp]
2946 call __ecp_nistz256_mul_montq
2947
2948 lea rbx,[160+rsp]
2949 lea rdi,[rsp]
2950 call __ecp_nistz256_sub_fromq
2951
2952 or r12,r13
2953 or r12,r8
2954 or r12,r9
2955
Srinivas Paladugudd42a612019-08-09 19:30:39 +00002956DB 102,73,15,126,208
2957DB 102,73,15,126,217
Pete Bentley0c61efe2019-08-13 09:32:23 +01002958 or r12,r8
2959DB 0x3e
Srinivas Paladugudd42a612019-08-09 19:30:39 +00002960 jnz NEAR $L$add_proceedq
Pete Bentley0c61efe2019-08-13 09:32:23 +01002961
2962
2963
Adam Langleyfad63272015-11-12 12:15:39 -08002964 test r9,r9
David Benjamin4969cc92016-04-22 15:02:23 -04002965 jz NEAR $L$add_doubleq
Adam Langleyfad63272015-11-12 12:15:39 -08002966
Pete Bentley0c61efe2019-08-13 09:32:23 +01002967
2968
2969
2970
2971
Adam Langleyfad63272015-11-12 12:15:39 -08002972DB 102,72,15,126,199
2973 pxor xmm0,xmm0
2974 movdqu XMMWORD[rdi],xmm0
2975 movdqu XMMWORD[16+rdi],xmm0
2976 movdqu XMMWORD[32+rdi],xmm0
2977 movdqu XMMWORD[48+rdi],xmm0
2978 movdqu XMMWORD[64+rdi],xmm0
2979 movdqu XMMWORD[80+rdi],xmm0
2980 jmp NEAR $L$add_doneq
2981
2982ALIGN 32
David Benjamin4969cc92016-04-22 15:02:23 -04002983$L$add_doubleq:
2984DB 102,72,15,126,206
2985DB 102,72,15,126,199
2986 add rsp,416
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002987
David Benjamin4969cc92016-04-22 15:02:23 -04002988 jmp NEAR $L$point_double_shortcutq
2989
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002990
David Benjamin4969cc92016-04-22 15:02:23 -04002991ALIGN 32
Adam Langleyfad63272015-11-12 12:15:39 -08002992$L$add_proceedq:
2993 mov rax,QWORD[((0+64))+rsp]
2994 mov r14,QWORD[((8+64))+rsp]
2995 lea rsi,[((0+64))+rsp]
2996 mov r15,QWORD[((16+64))+rsp]
2997 mov r8,QWORD[((24+64))+rsp]
2998 lea rdi,[96+rsp]
2999 call __ecp_nistz256_sqr_montq
3000
3001 mov rax,QWORD[448+rsp]
3002 lea rbx,[448+rsp]
3003 mov r9,QWORD[((0+0))+rsp]
3004 mov r10,QWORD[((8+0))+rsp]
3005 lea rsi,[((0+0))+rsp]
3006 mov r11,QWORD[((16+0))+rsp]
3007 mov r12,QWORD[((24+0))+rsp]
3008 lea rdi,[352+rsp]
3009 call __ecp_nistz256_mul_montq
3010
3011 mov rax,QWORD[((0+0))+rsp]
3012 mov r14,QWORD[((8+0))+rsp]
3013 lea rsi,[((0+0))+rsp]
3014 mov r15,QWORD[((16+0))+rsp]
3015 mov r8,QWORD[((24+0))+rsp]
3016 lea rdi,[32+rsp]
3017 call __ecp_nistz256_sqr_montq
3018
3019 mov rax,QWORD[544+rsp]
3020 lea rbx,[544+rsp]
3021 mov r9,QWORD[((0+352))+rsp]
3022 mov r10,QWORD[((8+352))+rsp]
3023 lea rsi,[((0+352))+rsp]
3024 mov r11,QWORD[((16+352))+rsp]
3025 mov r12,QWORD[((24+352))+rsp]
3026 lea rdi,[352+rsp]
3027 call __ecp_nistz256_mul_montq
3028
3029 mov rax,QWORD[rsp]
3030 lea rbx,[rsp]
3031 mov r9,QWORD[((0+32))+rsp]
3032 mov r10,QWORD[((8+32))+rsp]
3033 lea rsi,[((0+32))+rsp]
3034 mov r11,QWORD[((16+32))+rsp]
3035 mov r12,QWORD[((24+32))+rsp]
3036 lea rdi,[128+rsp]
3037 call __ecp_nistz256_mul_montq
3038
3039 mov rax,QWORD[160+rsp]
3040 lea rbx,[160+rsp]
3041 mov r9,QWORD[((0+32))+rsp]
3042 mov r10,QWORD[((8+32))+rsp]
3043 lea rsi,[((0+32))+rsp]
3044 mov r11,QWORD[((16+32))+rsp]
3045 mov r12,QWORD[((24+32))+rsp]
3046 lea rdi,[192+rsp]
3047 call __ecp_nistz256_mul_montq
3048
3049
3050
3051
Steven Valdez909b19f2016-11-21 15:35:44 -05003052 xor r11,r11
Adam Langleyfad63272015-11-12 12:15:39 -08003053 add r12,r12
3054 lea rsi,[96+rsp]
3055 adc r13,r13
3056 mov rax,r12
3057 adc r8,r8
3058 adc r9,r9
3059 mov rbp,r13
Steven Valdez909b19f2016-11-21 15:35:44 -05003060 adc r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08003061
3062 sub r12,-1
3063 mov rcx,r8
3064 sbb r13,r14
3065 sbb r8,0
3066 mov r10,r9
3067 sbb r9,r15
Steven Valdez909b19f2016-11-21 15:35:44 -05003068 sbb r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08003069
Steven Valdez909b19f2016-11-21 15:35:44 -05003070 cmovc r12,rax
Adam Langleyfad63272015-11-12 12:15:39 -08003071 mov rax,QWORD[rsi]
Steven Valdez909b19f2016-11-21 15:35:44 -05003072 cmovc r13,rbp
Adam Langleyfad63272015-11-12 12:15:39 -08003073 mov rbp,QWORD[8+rsi]
Steven Valdez909b19f2016-11-21 15:35:44 -05003074 cmovc r8,rcx
Adam Langleyfad63272015-11-12 12:15:39 -08003075 mov rcx,QWORD[16+rsi]
Steven Valdez909b19f2016-11-21 15:35:44 -05003076 cmovc r9,r10
Adam Langleyfad63272015-11-12 12:15:39 -08003077 mov r10,QWORD[24+rsi]
3078
3079 call __ecp_nistz256_subq
3080
3081 lea rbx,[128+rsp]
3082 lea rdi,[288+rsp]
3083 call __ecp_nistz256_sub_fromq
3084
3085 mov rax,QWORD[((192+0))+rsp]
3086 mov rbp,QWORD[((192+8))+rsp]
3087 mov rcx,QWORD[((192+16))+rsp]
3088 mov r10,QWORD[((192+24))+rsp]
3089 lea rdi,[320+rsp]
3090
3091 call __ecp_nistz256_subq
3092
3093 mov QWORD[rdi],r12
3094 mov QWORD[8+rdi],r13
3095 mov QWORD[16+rdi],r8
3096 mov QWORD[24+rdi],r9
3097 mov rax,QWORD[128+rsp]
3098 lea rbx,[128+rsp]
3099 mov r9,QWORD[((0+224))+rsp]
3100 mov r10,QWORD[((8+224))+rsp]
3101 lea rsi,[((0+224))+rsp]
3102 mov r11,QWORD[((16+224))+rsp]
3103 mov r12,QWORD[((24+224))+rsp]
3104 lea rdi,[256+rsp]
3105 call __ecp_nistz256_mul_montq
3106
3107 mov rax,QWORD[320+rsp]
3108 lea rbx,[320+rsp]
3109 mov r9,QWORD[((0+64))+rsp]
3110 mov r10,QWORD[((8+64))+rsp]
3111 lea rsi,[((0+64))+rsp]
3112 mov r11,QWORD[((16+64))+rsp]
3113 mov r12,QWORD[((24+64))+rsp]
3114 lea rdi,[320+rsp]
3115 call __ecp_nistz256_mul_montq
3116
3117 lea rbx,[256+rsp]
3118 lea rdi,[320+rsp]
3119 call __ecp_nistz256_sub_fromq
3120
3121DB 102,72,15,126,199
3122
3123 movdqa xmm0,xmm5
3124 movdqa xmm1,xmm5
3125 pandn xmm0,XMMWORD[352+rsp]
3126 movdqa xmm2,xmm5
3127 pandn xmm1,XMMWORD[((352+16))+rsp]
3128 movdqa xmm3,xmm5
3129 pand xmm2,XMMWORD[544+rsp]
3130 pand xmm3,XMMWORD[((544+16))+rsp]
3131 por xmm2,xmm0
3132 por xmm3,xmm1
3133
3134 movdqa xmm0,xmm4
3135 movdqa xmm1,xmm4
3136 pandn xmm0,xmm2
3137 movdqa xmm2,xmm4
3138 pandn xmm1,xmm3
3139 movdqa xmm3,xmm4
3140 pand xmm2,XMMWORD[448+rsp]
3141 pand xmm3,XMMWORD[((448+16))+rsp]
3142 por xmm2,xmm0
3143 por xmm3,xmm1
3144 movdqu XMMWORD[64+rdi],xmm2
3145 movdqu XMMWORD[80+rdi],xmm3
3146
3147 movdqa xmm0,xmm5
3148 movdqa xmm1,xmm5
3149 pandn xmm0,XMMWORD[288+rsp]
3150 movdqa xmm2,xmm5
3151 pandn xmm1,XMMWORD[((288+16))+rsp]
3152 movdqa xmm3,xmm5
3153 pand xmm2,XMMWORD[480+rsp]
3154 pand xmm3,XMMWORD[((480+16))+rsp]
3155 por xmm2,xmm0
3156 por xmm3,xmm1
3157
3158 movdqa xmm0,xmm4
3159 movdqa xmm1,xmm4
3160 pandn xmm0,xmm2
3161 movdqa xmm2,xmm4
3162 pandn xmm1,xmm3
3163 movdqa xmm3,xmm4
3164 pand xmm2,XMMWORD[384+rsp]
3165 pand xmm3,XMMWORD[((384+16))+rsp]
3166 por xmm2,xmm0
3167 por xmm3,xmm1
3168 movdqu XMMWORD[rdi],xmm2
3169 movdqu XMMWORD[16+rdi],xmm3
3170
3171 movdqa xmm0,xmm5
3172 movdqa xmm1,xmm5
3173 pandn xmm0,XMMWORD[320+rsp]
3174 movdqa xmm2,xmm5
3175 pandn xmm1,XMMWORD[((320+16))+rsp]
3176 movdqa xmm3,xmm5
3177 pand xmm2,XMMWORD[512+rsp]
3178 pand xmm3,XMMWORD[((512+16))+rsp]
3179 por xmm2,xmm0
3180 por xmm3,xmm1
3181
3182 movdqa xmm0,xmm4
3183 movdqa xmm1,xmm4
3184 pandn xmm0,xmm2
3185 movdqa xmm2,xmm4
3186 pandn xmm1,xmm3
3187 movdqa xmm3,xmm4
3188 pand xmm2,XMMWORD[416+rsp]
3189 pand xmm3,XMMWORD[((416+16))+rsp]
3190 por xmm2,xmm0
3191 por xmm3,xmm1
3192 movdqu XMMWORD[32+rdi],xmm2
3193 movdqu XMMWORD[48+rdi],xmm3
3194
3195$L$add_doneq:
Robert Sloanab8b8882018-03-26 11:39:51 -07003196 lea rsi,[((576+56))+rsp]
3197
3198 mov r15,QWORD[((-48))+rsi]
3199
3200 mov r14,QWORD[((-40))+rsi]
3201
3202 mov r13,QWORD[((-32))+rsi]
3203
3204 mov r12,QWORD[((-24))+rsi]
3205
3206 mov rbx,QWORD[((-16))+rsi]
3207
3208 mov rbp,QWORD[((-8))+rsi]
3209
3210 lea rsp,[rsi]
3211
3212$L$point_addq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08003213 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3214 mov rsi,QWORD[16+rsp]
3215 DB 0F3h,0C3h ;repret
Robert Sloanab8b8882018-03-26 11:39:51 -07003216
Adam Langleyfad63272015-11-12 12:15:39 -08003217$L$SEH_end_ecp_nistz256_point_add:
3218global ecp_nistz256_point_add_affine
3219
3220ALIGN 32
3221ecp_nistz256_point_add_affine:
3222 mov QWORD[8+rsp],rdi ;WIN64 prologue
3223 mov QWORD[16+rsp],rsi
3224 mov rax,rsp
3225$L$SEH_begin_ecp_nistz256_point_add_affine:
3226 mov rdi,rcx
3227 mov rsi,rdx
3228 mov rdx,r8
3229
3230
Robert Sloanab8b8882018-03-26 11:39:51 -07003231
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003232 lea rcx,[OPENSSL_ia32cap_P]
3233 mov rcx,QWORD[8+rcx]
3234 and ecx,0x80100
3235 cmp ecx,0x80100
3236 je NEAR $L$point_add_affinex
Adam Langleyfad63272015-11-12 12:15:39 -08003237 push rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07003238
Adam Langleyfad63272015-11-12 12:15:39 -08003239 push rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07003240
Adam Langleyfad63272015-11-12 12:15:39 -08003241 push r12
Robert Sloanab8b8882018-03-26 11:39:51 -07003242
Adam Langleyfad63272015-11-12 12:15:39 -08003243 push r13
Robert Sloanab8b8882018-03-26 11:39:51 -07003244
Adam Langleyfad63272015-11-12 12:15:39 -08003245 push r14
Robert Sloanab8b8882018-03-26 11:39:51 -07003246
Adam Langleyfad63272015-11-12 12:15:39 -08003247 push r15
Robert Sloanab8b8882018-03-26 11:39:51 -07003248
Adam Langleyfad63272015-11-12 12:15:39 -08003249 sub rsp,32*15+8
3250
Robert Sloanab8b8882018-03-26 11:39:51 -07003251$L$add_affineq_body:
3252
Adam Langleyfad63272015-11-12 12:15:39 -08003253 movdqu xmm0,XMMWORD[rsi]
3254 mov rbx,rdx
3255 movdqu xmm1,XMMWORD[16+rsi]
3256 movdqu xmm2,XMMWORD[32+rsi]
3257 movdqu xmm3,XMMWORD[48+rsi]
3258 movdqu xmm4,XMMWORD[64+rsi]
3259 movdqu xmm5,XMMWORD[80+rsi]
3260 mov rax,QWORD[((64+0))+rsi]
3261 mov r14,QWORD[((64+8))+rsi]
3262 mov r15,QWORD[((64+16))+rsi]
3263 mov r8,QWORD[((64+24))+rsi]
3264 movdqa XMMWORD[320+rsp],xmm0
3265 movdqa XMMWORD[(320+16)+rsp],xmm1
Adam Langleyfad63272015-11-12 12:15:39 -08003266 movdqa XMMWORD[352+rsp],xmm2
3267 movdqa XMMWORD[(352+16)+rsp],xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08003268 movdqa XMMWORD[384+rsp],xmm4
3269 movdqa XMMWORD[(384+16)+rsp],xmm5
Steven Valdez909b19f2016-11-21 15:35:44 -05003270 por xmm5,xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08003271
3272 movdqu xmm0,XMMWORD[rbx]
Steven Valdez909b19f2016-11-21 15:35:44 -05003273 pshufd xmm3,xmm5,0xb1
Adam Langleyfad63272015-11-12 12:15:39 -08003274 movdqu xmm1,XMMWORD[16+rbx]
3275 movdqu xmm2,XMMWORD[32+rbx]
3276 por xmm5,xmm3
3277 movdqu xmm3,XMMWORD[48+rbx]
3278 movdqa XMMWORD[416+rsp],xmm0
3279 pshufd xmm4,xmm5,0x1e
3280 movdqa XMMWORD[(416+16)+rsp],xmm1
3281 por xmm1,xmm0
3282DB 102,72,15,110,199
3283 movdqa XMMWORD[448+rsp],xmm2
3284 movdqa XMMWORD[(448+16)+rsp],xmm3
3285 por xmm3,xmm2
3286 por xmm5,xmm4
3287 pxor xmm4,xmm4
3288 por xmm3,xmm1
3289
3290 lea rsi,[((64-0))+rsi]
3291 lea rdi,[32+rsp]
3292 call __ecp_nistz256_sqr_montq
3293
3294 pcmpeqd xmm5,xmm4
3295 pshufd xmm4,xmm3,0xb1
3296 mov rax,QWORD[rbx]
3297
3298 mov r9,r12
3299 por xmm4,xmm3
3300 pshufd xmm5,xmm5,0
3301 pshufd xmm3,xmm4,0x1e
3302 mov r10,r13
3303 por xmm4,xmm3
3304 pxor xmm3,xmm3
3305 mov r11,r14
3306 pcmpeqd xmm4,xmm3
3307 pshufd xmm4,xmm4,0
3308
3309 lea rsi,[((32-0))+rsp]
3310 mov r12,r15
3311 lea rdi,[rsp]
3312 call __ecp_nistz256_mul_montq
3313
3314 lea rbx,[320+rsp]
3315 lea rdi,[64+rsp]
3316 call __ecp_nistz256_sub_fromq
3317
3318 mov rax,QWORD[384+rsp]
3319 lea rbx,[384+rsp]
3320 mov r9,QWORD[((0+32))+rsp]
3321 mov r10,QWORD[((8+32))+rsp]
3322 lea rsi,[((0+32))+rsp]
3323 mov r11,QWORD[((16+32))+rsp]
3324 mov r12,QWORD[((24+32))+rsp]
3325 lea rdi,[32+rsp]
3326 call __ecp_nistz256_mul_montq
3327
3328 mov rax,QWORD[384+rsp]
3329 lea rbx,[384+rsp]
3330 mov r9,QWORD[((0+64))+rsp]
3331 mov r10,QWORD[((8+64))+rsp]
3332 lea rsi,[((0+64))+rsp]
3333 mov r11,QWORD[((16+64))+rsp]
3334 mov r12,QWORD[((24+64))+rsp]
3335 lea rdi,[288+rsp]
3336 call __ecp_nistz256_mul_montq
3337
3338 mov rax,QWORD[448+rsp]
3339 lea rbx,[448+rsp]
3340 mov r9,QWORD[((0+32))+rsp]
3341 mov r10,QWORD[((8+32))+rsp]
3342 lea rsi,[((0+32))+rsp]
3343 mov r11,QWORD[((16+32))+rsp]
3344 mov r12,QWORD[((24+32))+rsp]
3345 lea rdi,[32+rsp]
3346 call __ecp_nistz256_mul_montq
3347
3348 lea rbx,[352+rsp]
3349 lea rdi,[96+rsp]
3350 call __ecp_nistz256_sub_fromq
3351
3352 mov rax,QWORD[((0+64))+rsp]
3353 mov r14,QWORD[((8+64))+rsp]
3354 lea rsi,[((0+64))+rsp]
3355 mov r15,QWORD[((16+64))+rsp]
3356 mov r8,QWORD[((24+64))+rsp]
3357 lea rdi,[128+rsp]
3358 call __ecp_nistz256_sqr_montq
3359
3360 mov rax,QWORD[((0+96))+rsp]
3361 mov r14,QWORD[((8+96))+rsp]
3362 lea rsi,[((0+96))+rsp]
3363 mov r15,QWORD[((16+96))+rsp]
3364 mov r8,QWORD[((24+96))+rsp]
3365 lea rdi,[192+rsp]
3366 call __ecp_nistz256_sqr_montq
3367
3368 mov rax,QWORD[128+rsp]
3369 lea rbx,[128+rsp]
3370 mov r9,QWORD[((0+64))+rsp]
3371 mov r10,QWORD[((8+64))+rsp]
3372 lea rsi,[((0+64))+rsp]
3373 mov r11,QWORD[((16+64))+rsp]
3374 mov r12,QWORD[((24+64))+rsp]
3375 lea rdi,[160+rsp]
3376 call __ecp_nistz256_mul_montq
3377
3378 mov rax,QWORD[320+rsp]
3379 lea rbx,[320+rsp]
3380 mov r9,QWORD[((0+128))+rsp]
3381 mov r10,QWORD[((8+128))+rsp]
3382 lea rsi,[((0+128))+rsp]
3383 mov r11,QWORD[((16+128))+rsp]
3384 mov r12,QWORD[((24+128))+rsp]
3385 lea rdi,[rsp]
3386 call __ecp_nistz256_mul_montq
3387
3388
3389
3390
Steven Valdez909b19f2016-11-21 15:35:44 -05003391 xor r11,r11
Adam Langleyfad63272015-11-12 12:15:39 -08003392 add r12,r12
3393 lea rsi,[192+rsp]
3394 adc r13,r13
3395 mov rax,r12
3396 adc r8,r8
3397 adc r9,r9
3398 mov rbp,r13
Steven Valdez909b19f2016-11-21 15:35:44 -05003399 adc r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08003400
3401 sub r12,-1
3402 mov rcx,r8
3403 sbb r13,r14
3404 sbb r8,0
3405 mov r10,r9
3406 sbb r9,r15
Steven Valdez909b19f2016-11-21 15:35:44 -05003407 sbb r11,0
Adam Langleyfad63272015-11-12 12:15:39 -08003408
Steven Valdez909b19f2016-11-21 15:35:44 -05003409 cmovc r12,rax
Adam Langleyfad63272015-11-12 12:15:39 -08003410 mov rax,QWORD[rsi]
Steven Valdez909b19f2016-11-21 15:35:44 -05003411 cmovc r13,rbp
Adam Langleyfad63272015-11-12 12:15:39 -08003412 mov rbp,QWORD[8+rsi]
Steven Valdez909b19f2016-11-21 15:35:44 -05003413 cmovc r8,rcx
Adam Langleyfad63272015-11-12 12:15:39 -08003414 mov rcx,QWORD[16+rsi]
Steven Valdez909b19f2016-11-21 15:35:44 -05003415 cmovc r9,r10
Adam Langleyfad63272015-11-12 12:15:39 -08003416 mov r10,QWORD[24+rsi]
3417
3418 call __ecp_nistz256_subq
3419
3420 lea rbx,[160+rsp]
3421 lea rdi,[224+rsp]
3422 call __ecp_nistz256_sub_fromq
3423
3424 mov rax,QWORD[((0+0))+rsp]
3425 mov rbp,QWORD[((0+8))+rsp]
3426 mov rcx,QWORD[((0+16))+rsp]
3427 mov r10,QWORD[((0+24))+rsp]
3428 lea rdi,[64+rsp]
3429
3430 call __ecp_nistz256_subq
3431
3432 mov QWORD[rdi],r12
3433 mov QWORD[8+rdi],r13
3434 mov QWORD[16+rdi],r8
3435 mov QWORD[24+rdi],r9
3436 mov rax,QWORD[352+rsp]
3437 lea rbx,[352+rsp]
3438 mov r9,QWORD[((0+160))+rsp]
3439 mov r10,QWORD[((8+160))+rsp]
3440 lea rsi,[((0+160))+rsp]
3441 mov r11,QWORD[((16+160))+rsp]
3442 mov r12,QWORD[((24+160))+rsp]
3443 lea rdi,[32+rsp]
3444 call __ecp_nistz256_mul_montq
3445
3446 mov rax,QWORD[96+rsp]
3447 lea rbx,[96+rsp]
3448 mov r9,QWORD[((0+64))+rsp]
3449 mov r10,QWORD[((8+64))+rsp]
3450 lea rsi,[((0+64))+rsp]
3451 mov r11,QWORD[((16+64))+rsp]
3452 mov r12,QWORD[((24+64))+rsp]
3453 lea rdi,[64+rsp]
3454 call __ecp_nistz256_mul_montq
3455
3456 lea rbx,[32+rsp]
3457 lea rdi,[256+rsp]
3458 call __ecp_nistz256_sub_fromq
3459
3460DB 102,72,15,126,199
3461
3462 movdqa xmm0,xmm5
3463 movdqa xmm1,xmm5
3464 pandn xmm0,XMMWORD[288+rsp]
3465 movdqa xmm2,xmm5
3466 pandn xmm1,XMMWORD[((288+16))+rsp]
3467 movdqa xmm3,xmm5
3468 pand xmm2,XMMWORD[$L$ONE_mont]
3469 pand xmm3,XMMWORD[(($L$ONE_mont+16))]
3470 por xmm2,xmm0
3471 por xmm3,xmm1
3472
3473 movdqa xmm0,xmm4
3474 movdqa xmm1,xmm4
3475 pandn xmm0,xmm2
3476 movdqa xmm2,xmm4
3477 pandn xmm1,xmm3
3478 movdqa xmm3,xmm4
3479 pand xmm2,XMMWORD[384+rsp]
3480 pand xmm3,XMMWORD[((384+16))+rsp]
3481 por xmm2,xmm0
3482 por xmm3,xmm1
3483 movdqu XMMWORD[64+rdi],xmm2
3484 movdqu XMMWORD[80+rdi],xmm3
3485
3486 movdqa xmm0,xmm5
3487 movdqa xmm1,xmm5
3488 pandn xmm0,XMMWORD[224+rsp]
3489 movdqa xmm2,xmm5
3490 pandn xmm1,XMMWORD[((224+16))+rsp]
3491 movdqa xmm3,xmm5
3492 pand xmm2,XMMWORD[416+rsp]
3493 pand xmm3,XMMWORD[((416+16))+rsp]
3494 por xmm2,xmm0
3495 por xmm3,xmm1
3496
3497 movdqa xmm0,xmm4
3498 movdqa xmm1,xmm4
3499 pandn xmm0,xmm2
3500 movdqa xmm2,xmm4
3501 pandn xmm1,xmm3
3502 movdqa xmm3,xmm4
3503 pand xmm2,XMMWORD[320+rsp]
3504 pand xmm3,XMMWORD[((320+16))+rsp]
3505 por xmm2,xmm0
3506 por xmm3,xmm1
3507 movdqu XMMWORD[rdi],xmm2
3508 movdqu XMMWORD[16+rdi],xmm3
3509
3510 movdqa xmm0,xmm5
3511 movdqa xmm1,xmm5
3512 pandn xmm0,XMMWORD[256+rsp]
3513 movdqa xmm2,xmm5
3514 pandn xmm1,XMMWORD[((256+16))+rsp]
3515 movdqa xmm3,xmm5
3516 pand xmm2,XMMWORD[448+rsp]
3517 pand xmm3,XMMWORD[((448+16))+rsp]
3518 por xmm2,xmm0
3519 por xmm3,xmm1
3520
3521 movdqa xmm0,xmm4
3522 movdqa xmm1,xmm4
3523 pandn xmm0,xmm2
3524 movdqa xmm2,xmm4
3525 pandn xmm1,xmm3
3526 movdqa xmm3,xmm4
3527 pand xmm2,XMMWORD[352+rsp]
3528 pand xmm3,XMMWORD[((352+16))+rsp]
3529 por xmm2,xmm0
3530 por xmm3,xmm1
3531 movdqu XMMWORD[32+rdi],xmm2
3532 movdqu XMMWORD[48+rdi],xmm3
3533
Robert Sloanab8b8882018-03-26 11:39:51 -07003534 lea rsi,[((480+56))+rsp]
3535
3536 mov r15,QWORD[((-48))+rsi]
3537
3538 mov r14,QWORD[((-40))+rsi]
3539
3540 mov r13,QWORD[((-32))+rsi]
3541
3542 mov r12,QWORD[((-24))+rsi]
3543
3544 mov rbx,QWORD[((-16))+rsi]
3545
3546 mov rbp,QWORD[((-8))+rsi]
3547
3548 lea rsp,[rsi]
3549
3550$L$add_affineq_epilogue:
3551 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3552 mov rsi,QWORD[16+rsp]
3553 DB 0F3h,0C3h ;repret
3554
3555$L$SEH_end_ecp_nistz256_point_add_affine:
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003556
3557ALIGN 32
3558__ecp_nistz256_add_tox:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003559
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003560 xor r11,r11
3561 adc r12,QWORD[rbx]
3562 adc r13,QWORD[8+rbx]
3563 mov rax,r12
3564 adc r8,QWORD[16+rbx]
3565 adc r9,QWORD[24+rbx]
3566 mov rbp,r13
3567 adc r11,0
3568
3569 xor r10,r10
3570 sbb r12,-1
3571 mov rcx,r8
3572 sbb r13,r14
3573 sbb r8,0
3574 mov r10,r9
3575 sbb r9,r15
3576 sbb r11,0
3577
3578 cmovc r12,rax
3579 cmovc r13,rbp
3580 mov QWORD[rdi],r12
3581 cmovc r8,rcx
3582 mov QWORD[8+rdi],r13
3583 cmovc r9,r10
3584 mov QWORD[16+rdi],r8
3585 mov QWORD[24+rdi],r9
3586
3587 DB 0F3h,0C3h ;repret
3588
3589
3590
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003591
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003592ALIGN 32
3593__ecp_nistz256_sub_fromx:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003594
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003595 xor r11,r11
3596 sbb r12,QWORD[rbx]
3597 sbb r13,QWORD[8+rbx]
3598 mov rax,r12
3599 sbb r8,QWORD[16+rbx]
3600 sbb r9,QWORD[24+rbx]
3601 mov rbp,r13
3602 sbb r11,0
3603
3604 xor r10,r10
3605 adc r12,-1
3606 mov rcx,r8
3607 adc r13,r14
3608 adc r8,0
3609 mov r10,r9
3610 adc r9,r15
3611
3612 bt r11,0
3613 cmovnc r12,rax
3614 cmovnc r13,rbp
3615 mov QWORD[rdi],r12
3616 cmovnc r8,rcx
3617 mov QWORD[8+rdi],r13
3618 cmovnc r9,r10
3619 mov QWORD[16+rdi],r8
3620 mov QWORD[24+rdi],r9
3621
3622 DB 0F3h,0C3h ;repret
3623
3624
3625
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003626
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003627ALIGN 32
3628__ecp_nistz256_subx:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003629
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003630 xor r11,r11
3631 sbb rax,r12
3632 sbb rbp,r13
3633 mov r12,rax
3634 sbb rcx,r8
3635 sbb r10,r9
3636 mov r13,rbp
3637 sbb r11,0
3638
3639 xor r9,r9
3640 adc rax,-1
3641 mov r8,rcx
3642 adc rbp,r14
3643 adc rcx,0
3644 mov r9,r10
3645 adc r10,r15
3646
3647 bt r11,0
3648 cmovc r12,rax
3649 cmovc r13,rbp
3650 cmovc r8,rcx
3651 cmovc r9,r10
3652
3653 DB 0F3h,0C3h ;repret
3654
3655
3656
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003657
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003658ALIGN 32
3659__ecp_nistz256_mul_by_2x:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003660
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003661 xor r11,r11
3662 adc r12,r12
3663 adc r13,r13
3664 mov rax,r12
3665 adc r8,r8
3666 adc r9,r9
3667 mov rbp,r13
3668 adc r11,0
3669
3670 xor r10,r10
3671 sbb r12,-1
3672 mov rcx,r8
3673 sbb r13,r14
3674 sbb r8,0
3675 mov r10,r9
3676 sbb r9,r15
3677 sbb r11,0
3678
3679 cmovc r12,rax
3680 cmovc r13,rbp
3681 mov QWORD[rdi],r12
3682 cmovc r8,rcx
3683 mov QWORD[8+rdi],r13
3684 cmovc r9,r10
3685 mov QWORD[16+rdi],r8
3686 mov QWORD[24+rdi],r9
3687
3688 DB 0F3h,0C3h ;repret
3689
3690
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003691
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003692ALIGN 32
3693ecp_nistz256_point_doublex:
3694 mov QWORD[8+rsp],rdi ;WIN64 prologue
3695 mov QWORD[16+rsp],rsi
3696 mov rax,rsp
3697$L$SEH_begin_ecp_nistz256_point_doublex:
3698 mov rdi,rcx
3699 mov rsi,rdx
3700
3701
3702
3703$L$point_doublex:
3704 push rbp
3705
3706 push rbx
3707
3708 push r12
3709
3710 push r13
3711
3712 push r14
3713
3714 push r15
3715
3716 sub rsp,32*5+8
3717
3718$L$point_doublex_body:
3719
3720$L$point_double_shortcutx:
3721 movdqu xmm0,XMMWORD[rsi]
3722 mov rbx,rsi
3723 movdqu xmm1,XMMWORD[16+rsi]
3724 mov r12,QWORD[((32+0))+rsi]
3725 mov r13,QWORD[((32+8))+rsi]
3726 mov r8,QWORD[((32+16))+rsi]
3727 mov r9,QWORD[((32+24))+rsi]
3728 mov r14,QWORD[(($L$poly+8))]
3729 mov r15,QWORD[(($L$poly+24))]
3730 movdqa XMMWORD[96+rsp],xmm0
3731 movdqa XMMWORD[(96+16)+rsp],xmm1
3732 lea r10,[32+rdi]
3733 lea r11,[64+rdi]
3734DB 102,72,15,110,199
3735DB 102,73,15,110,202
3736DB 102,73,15,110,211
3737
3738 lea rdi,[rsp]
3739 call __ecp_nistz256_mul_by_2x
3740
3741 mov rdx,QWORD[((64+0))+rsi]
3742 mov r14,QWORD[((64+8))+rsi]
3743 mov r15,QWORD[((64+16))+rsi]
3744 mov r8,QWORD[((64+24))+rsi]
3745 lea rsi,[((64-128))+rsi]
3746 lea rdi,[64+rsp]
3747 call __ecp_nistz256_sqr_montx
3748
3749 mov rdx,QWORD[((0+0))+rsp]
3750 mov r14,QWORD[((8+0))+rsp]
3751 lea rsi,[((-128+0))+rsp]
3752 mov r15,QWORD[((16+0))+rsp]
3753 mov r8,QWORD[((24+0))+rsp]
3754 lea rdi,[rsp]
3755 call __ecp_nistz256_sqr_montx
3756
3757 mov rdx,QWORD[32+rbx]
3758 mov r9,QWORD[((64+0))+rbx]
3759 mov r10,QWORD[((64+8))+rbx]
3760 mov r11,QWORD[((64+16))+rbx]
3761 mov r12,QWORD[((64+24))+rbx]
3762 lea rsi,[((64-128))+rbx]
3763 lea rbx,[32+rbx]
3764DB 102,72,15,126,215
3765 call __ecp_nistz256_mul_montx
3766 call __ecp_nistz256_mul_by_2x
3767
3768 mov r12,QWORD[((96+0))+rsp]
3769 mov r13,QWORD[((96+8))+rsp]
3770 lea rbx,[64+rsp]
3771 mov r8,QWORD[((96+16))+rsp]
3772 mov r9,QWORD[((96+24))+rsp]
3773 lea rdi,[32+rsp]
3774 call __ecp_nistz256_add_tox
3775
3776 mov r12,QWORD[((96+0))+rsp]
3777 mov r13,QWORD[((96+8))+rsp]
3778 lea rbx,[64+rsp]
3779 mov r8,QWORD[((96+16))+rsp]
3780 mov r9,QWORD[((96+24))+rsp]
3781 lea rdi,[64+rsp]
3782 call __ecp_nistz256_sub_fromx
3783
3784 mov rdx,QWORD[((0+0))+rsp]
3785 mov r14,QWORD[((8+0))+rsp]
3786 lea rsi,[((-128+0))+rsp]
3787 mov r15,QWORD[((16+0))+rsp]
3788 mov r8,QWORD[((24+0))+rsp]
3789DB 102,72,15,126,207
3790 call __ecp_nistz256_sqr_montx
3791 xor r9,r9
3792 mov rax,r12
3793 add r12,-1
3794 mov r10,r13
3795 adc r13,rsi
3796 mov rcx,r14
3797 adc r14,0
3798 mov r8,r15
3799 adc r15,rbp
3800 adc r9,0
3801 xor rsi,rsi
3802 test rax,1
3803
3804 cmovz r12,rax
3805 cmovz r13,r10
3806 cmovz r14,rcx
3807 cmovz r15,r8
3808 cmovz r9,rsi
3809
3810 mov rax,r13
3811 shr r12,1
3812 shl rax,63
3813 mov r10,r14
3814 shr r13,1
3815 or r12,rax
3816 shl r10,63
3817 mov rcx,r15
3818 shr r14,1
3819 or r13,r10
3820 shl rcx,63
3821 mov QWORD[rdi],r12
3822 shr r15,1
3823 mov QWORD[8+rdi],r13
3824 shl r9,63
3825 or r14,rcx
3826 or r15,r9
3827 mov QWORD[16+rdi],r14
3828 mov QWORD[24+rdi],r15
3829 mov rdx,QWORD[64+rsp]
3830 lea rbx,[64+rsp]
3831 mov r9,QWORD[((0+32))+rsp]
3832 mov r10,QWORD[((8+32))+rsp]
3833 lea rsi,[((-128+32))+rsp]
3834 mov r11,QWORD[((16+32))+rsp]
3835 mov r12,QWORD[((24+32))+rsp]
3836 lea rdi,[32+rsp]
3837 call __ecp_nistz256_mul_montx
3838
3839 lea rdi,[128+rsp]
3840 call __ecp_nistz256_mul_by_2x
3841
3842 lea rbx,[32+rsp]
3843 lea rdi,[32+rsp]
3844 call __ecp_nistz256_add_tox
3845
3846 mov rdx,QWORD[96+rsp]
3847 lea rbx,[96+rsp]
3848 mov r9,QWORD[((0+0))+rsp]
3849 mov r10,QWORD[((8+0))+rsp]
3850 lea rsi,[((-128+0))+rsp]
3851 mov r11,QWORD[((16+0))+rsp]
3852 mov r12,QWORD[((24+0))+rsp]
3853 lea rdi,[rsp]
3854 call __ecp_nistz256_mul_montx
3855
3856 lea rdi,[128+rsp]
3857 call __ecp_nistz256_mul_by_2x
3858
3859 mov rdx,QWORD[((0+32))+rsp]
3860 mov r14,QWORD[((8+32))+rsp]
3861 lea rsi,[((-128+32))+rsp]
3862 mov r15,QWORD[((16+32))+rsp]
3863 mov r8,QWORD[((24+32))+rsp]
3864DB 102,72,15,126,199
3865 call __ecp_nistz256_sqr_montx
3866
3867 lea rbx,[128+rsp]
3868 mov r8,r14
3869 mov r9,r15
3870 mov r14,rsi
3871 mov r15,rbp
3872 call __ecp_nistz256_sub_fromx
3873
3874 mov rax,QWORD[((0+0))+rsp]
3875 mov rbp,QWORD[((0+8))+rsp]
3876 mov rcx,QWORD[((0+16))+rsp]
3877 mov r10,QWORD[((0+24))+rsp]
3878 lea rdi,[rsp]
3879 call __ecp_nistz256_subx
3880
3881 mov rdx,QWORD[32+rsp]
3882 lea rbx,[32+rsp]
3883 mov r14,r12
3884 xor ecx,ecx
3885 mov QWORD[((0+0))+rsp],r12
3886 mov r10,r13
3887 mov QWORD[((0+8))+rsp],r13
3888 cmovz r11,r8
3889 mov QWORD[((0+16))+rsp],r8
3890 lea rsi,[((0-128))+rsp]
3891 cmovz r12,r9
3892 mov QWORD[((0+24))+rsp],r9
3893 mov r9,r14
3894 lea rdi,[rsp]
3895 call __ecp_nistz256_mul_montx
3896
3897DB 102,72,15,126,203
3898DB 102,72,15,126,207
3899 call __ecp_nistz256_sub_fromx
3900
3901 lea rsi,[((160+56))+rsp]
3902
3903 mov r15,QWORD[((-48))+rsi]
3904
3905 mov r14,QWORD[((-40))+rsi]
3906
3907 mov r13,QWORD[((-32))+rsi]
3908
3909 mov r12,QWORD[((-24))+rsi]
3910
3911 mov rbx,QWORD[((-16))+rsi]
3912
3913 mov rbp,QWORD[((-8))+rsi]
3914
3915 lea rsp,[rsi]
3916
3917$L$point_doublex_epilogue:
3918 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3919 mov rsi,QWORD[16+rsp]
3920 DB 0F3h,0C3h ;repret
3921
3922$L$SEH_end_ecp_nistz256_point_doublex:
3923
3924ALIGN 32
3925ecp_nistz256_point_addx:
3926 mov QWORD[8+rsp],rdi ;WIN64 prologue
3927 mov QWORD[16+rsp],rsi
3928 mov rax,rsp
3929$L$SEH_begin_ecp_nistz256_point_addx:
3930 mov rdi,rcx
3931 mov rsi,rdx
3932 mov rdx,r8
3933
3934
3935
3936$L$point_addx:
3937 push rbp
3938
3939 push rbx
3940
3941 push r12
3942
3943 push r13
3944
3945 push r14
3946
3947 push r15
3948
3949 sub rsp,32*18+8
3950
3951$L$point_addx_body:
3952
3953 movdqu xmm0,XMMWORD[rsi]
3954 movdqu xmm1,XMMWORD[16+rsi]
3955 movdqu xmm2,XMMWORD[32+rsi]
3956 movdqu xmm3,XMMWORD[48+rsi]
3957 movdqu xmm4,XMMWORD[64+rsi]
3958 movdqu xmm5,XMMWORD[80+rsi]
3959 mov rbx,rsi
3960 mov rsi,rdx
3961 movdqa XMMWORD[384+rsp],xmm0
3962 movdqa XMMWORD[(384+16)+rsp],xmm1
3963 movdqa XMMWORD[416+rsp],xmm2
3964 movdqa XMMWORD[(416+16)+rsp],xmm3
3965 movdqa XMMWORD[448+rsp],xmm4
3966 movdqa XMMWORD[(448+16)+rsp],xmm5
3967 por xmm5,xmm4
3968
3969 movdqu xmm0,XMMWORD[rsi]
3970 pshufd xmm3,xmm5,0xb1
3971 movdqu xmm1,XMMWORD[16+rsi]
3972 movdqu xmm2,XMMWORD[32+rsi]
3973 por xmm5,xmm3
3974 movdqu xmm3,XMMWORD[48+rsi]
3975 mov rdx,QWORD[((64+0))+rsi]
3976 mov r14,QWORD[((64+8))+rsi]
3977 mov r15,QWORD[((64+16))+rsi]
3978 mov r8,QWORD[((64+24))+rsi]
3979 movdqa XMMWORD[480+rsp],xmm0
3980 pshufd xmm4,xmm5,0x1e
3981 movdqa XMMWORD[(480+16)+rsp],xmm1
3982 movdqu xmm0,XMMWORD[64+rsi]
3983 movdqu xmm1,XMMWORD[80+rsi]
3984 movdqa XMMWORD[512+rsp],xmm2
3985 movdqa XMMWORD[(512+16)+rsp],xmm3
3986 por xmm5,xmm4
3987 pxor xmm4,xmm4
3988 por xmm1,xmm0
3989DB 102,72,15,110,199
3990
3991 lea rsi,[((64-128))+rsi]
3992 mov QWORD[((544+0))+rsp],rdx
3993 mov QWORD[((544+8))+rsp],r14
3994 mov QWORD[((544+16))+rsp],r15
3995 mov QWORD[((544+24))+rsp],r8
3996 lea rdi,[96+rsp]
3997 call __ecp_nistz256_sqr_montx
3998
3999 pcmpeqd xmm5,xmm4
4000 pshufd xmm4,xmm1,0xb1
4001 por xmm4,xmm1
4002 pshufd xmm5,xmm5,0
4003 pshufd xmm3,xmm4,0x1e
4004 por xmm4,xmm3
4005 pxor xmm3,xmm3
4006 pcmpeqd xmm4,xmm3
4007 pshufd xmm4,xmm4,0
4008 mov rdx,QWORD[((64+0))+rbx]
4009 mov r14,QWORD[((64+8))+rbx]
4010 mov r15,QWORD[((64+16))+rbx]
4011 mov r8,QWORD[((64+24))+rbx]
4012DB 102,72,15,110,203
4013
4014 lea rsi,[((64-128))+rbx]
4015 lea rdi,[32+rsp]
4016 call __ecp_nistz256_sqr_montx
4017
4018 mov rdx,QWORD[544+rsp]
4019 lea rbx,[544+rsp]
4020 mov r9,QWORD[((0+96))+rsp]
4021 mov r10,QWORD[((8+96))+rsp]
4022 lea rsi,[((-128+96))+rsp]
4023 mov r11,QWORD[((16+96))+rsp]
4024 mov r12,QWORD[((24+96))+rsp]
4025 lea rdi,[224+rsp]
4026 call __ecp_nistz256_mul_montx
4027
4028 mov rdx,QWORD[448+rsp]
4029 lea rbx,[448+rsp]
4030 mov r9,QWORD[((0+32))+rsp]
4031 mov r10,QWORD[((8+32))+rsp]
4032 lea rsi,[((-128+32))+rsp]
4033 mov r11,QWORD[((16+32))+rsp]
4034 mov r12,QWORD[((24+32))+rsp]
4035 lea rdi,[256+rsp]
4036 call __ecp_nistz256_mul_montx
4037
4038 mov rdx,QWORD[416+rsp]
4039 lea rbx,[416+rsp]
4040 mov r9,QWORD[((0+224))+rsp]
4041 mov r10,QWORD[((8+224))+rsp]
4042 lea rsi,[((-128+224))+rsp]
4043 mov r11,QWORD[((16+224))+rsp]
4044 mov r12,QWORD[((24+224))+rsp]
4045 lea rdi,[224+rsp]
4046 call __ecp_nistz256_mul_montx
4047
4048 mov rdx,QWORD[512+rsp]
4049 lea rbx,[512+rsp]
4050 mov r9,QWORD[((0+256))+rsp]
4051 mov r10,QWORD[((8+256))+rsp]
4052 lea rsi,[((-128+256))+rsp]
4053 mov r11,QWORD[((16+256))+rsp]
4054 mov r12,QWORD[((24+256))+rsp]
4055 lea rdi,[256+rsp]
4056 call __ecp_nistz256_mul_montx
4057
4058 lea rbx,[224+rsp]
4059 lea rdi,[64+rsp]
4060 call __ecp_nistz256_sub_fromx
4061
4062 or r12,r13
4063 movdqa xmm2,xmm4
4064 or r12,r8
4065 or r12,r9
4066 por xmm2,xmm5
4067DB 102,73,15,110,220
4068
4069 mov rdx,QWORD[384+rsp]
4070 lea rbx,[384+rsp]
4071 mov r9,QWORD[((0+96))+rsp]
4072 mov r10,QWORD[((8+96))+rsp]
4073 lea rsi,[((-128+96))+rsp]
4074 mov r11,QWORD[((16+96))+rsp]
4075 mov r12,QWORD[((24+96))+rsp]
4076 lea rdi,[160+rsp]
4077 call __ecp_nistz256_mul_montx
4078
4079 mov rdx,QWORD[480+rsp]
4080 lea rbx,[480+rsp]
4081 mov r9,QWORD[((0+32))+rsp]
4082 mov r10,QWORD[((8+32))+rsp]
4083 lea rsi,[((-128+32))+rsp]
4084 mov r11,QWORD[((16+32))+rsp]
4085 mov r12,QWORD[((24+32))+rsp]
4086 lea rdi,[192+rsp]
4087 call __ecp_nistz256_mul_montx
4088
4089 lea rbx,[160+rsp]
4090 lea rdi,[rsp]
4091 call __ecp_nistz256_sub_fromx
4092
4093 or r12,r13
4094 or r12,r8
4095 or r12,r9
4096
Srinivas Paladugudd42a612019-08-09 19:30:39 +00004097DB 102,73,15,126,208
4098DB 102,73,15,126,217
Pete Bentley0c61efe2019-08-13 09:32:23 +01004099 or r12,r8
4100DB 0x3e
Srinivas Paladugudd42a612019-08-09 19:30:39 +00004101 jnz NEAR $L$add_proceedx
Pete Bentley0c61efe2019-08-13 09:32:23 +01004102
4103
4104
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004105 test r9,r9
4106 jz NEAR $L$add_doublex
4107
Pete Bentley0c61efe2019-08-13 09:32:23 +01004108
4109
4110
4111
4112
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004113DB 102,72,15,126,199
4114 pxor xmm0,xmm0
4115 movdqu XMMWORD[rdi],xmm0
4116 movdqu XMMWORD[16+rdi],xmm0
4117 movdqu XMMWORD[32+rdi],xmm0
4118 movdqu XMMWORD[48+rdi],xmm0
4119 movdqu XMMWORD[64+rdi],xmm0
4120 movdqu XMMWORD[80+rdi],xmm0
4121 jmp NEAR $L$add_donex
4122
4123ALIGN 32
4124$L$add_doublex:
4125DB 102,72,15,126,206
4126DB 102,72,15,126,199
4127 add rsp,416
Robert Sloan4c22c5f2019-03-01 15:53:37 -08004128
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004129 jmp NEAR $L$point_double_shortcutx
4130
Robert Sloan4c22c5f2019-03-01 15:53:37 -08004131
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004132ALIGN 32
4133$L$add_proceedx:
4134 mov rdx,QWORD[((0+64))+rsp]
4135 mov r14,QWORD[((8+64))+rsp]
4136 lea rsi,[((-128+64))+rsp]
4137 mov r15,QWORD[((16+64))+rsp]
4138 mov r8,QWORD[((24+64))+rsp]
4139 lea rdi,[96+rsp]
4140 call __ecp_nistz256_sqr_montx
4141
4142 mov rdx,QWORD[448+rsp]
4143 lea rbx,[448+rsp]
4144 mov r9,QWORD[((0+0))+rsp]
4145 mov r10,QWORD[((8+0))+rsp]
4146 lea rsi,[((-128+0))+rsp]
4147 mov r11,QWORD[((16+0))+rsp]
4148 mov r12,QWORD[((24+0))+rsp]
4149 lea rdi,[352+rsp]
4150 call __ecp_nistz256_mul_montx
4151
4152 mov rdx,QWORD[((0+0))+rsp]
4153 mov r14,QWORD[((8+0))+rsp]
4154 lea rsi,[((-128+0))+rsp]
4155 mov r15,QWORD[((16+0))+rsp]
4156 mov r8,QWORD[((24+0))+rsp]
4157 lea rdi,[32+rsp]
4158 call __ecp_nistz256_sqr_montx
4159
4160 mov rdx,QWORD[544+rsp]
4161 lea rbx,[544+rsp]
4162 mov r9,QWORD[((0+352))+rsp]
4163 mov r10,QWORD[((8+352))+rsp]
4164 lea rsi,[((-128+352))+rsp]
4165 mov r11,QWORD[((16+352))+rsp]
4166 mov r12,QWORD[((24+352))+rsp]
4167 lea rdi,[352+rsp]
4168 call __ecp_nistz256_mul_montx
4169
4170 mov rdx,QWORD[rsp]
4171 lea rbx,[rsp]
4172 mov r9,QWORD[((0+32))+rsp]
4173 mov r10,QWORD[((8+32))+rsp]
4174 lea rsi,[((-128+32))+rsp]
4175 mov r11,QWORD[((16+32))+rsp]
4176 mov r12,QWORD[((24+32))+rsp]
4177 lea rdi,[128+rsp]
4178 call __ecp_nistz256_mul_montx
4179
4180 mov rdx,QWORD[160+rsp]
4181 lea rbx,[160+rsp]
4182 mov r9,QWORD[((0+32))+rsp]
4183 mov r10,QWORD[((8+32))+rsp]
4184 lea rsi,[((-128+32))+rsp]
4185 mov r11,QWORD[((16+32))+rsp]
4186 mov r12,QWORD[((24+32))+rsp]
4187 lea rdi,[192+rsp]
4188 call __ecp_nistz256_mul_montx
4189
4190
4191
4192
4193 xor r11,r11
4194 add r12,r12
4195 lea rsi,[96+rsp]
4196 adc r13,r13
4197 mov rax,r12
4198 adc r8,r8
4199 adc r9,r9
4200 mov rbp,r13
4201 adc r11,0
4202
4203 sub r12,-1
4204 mov rcx,r8
4205 sbb r13,r14
4206 sbb r8,0
4207 mov r10,r9
4208 sbb r9,r15
4209 sbb r11,0
4210
4211 cmovc r12,rax
4212 mov rax,QWORD[rsi]
4213 cmovc r13,rbp
4214 mov rbp,QWORD[8+rsi]
4215 cmovc r8,rcx
4216 mov rcx,QWORD[16+rsi]
4217 cmovc r9,r10
4218 mov r10,QWORD[24+rsi]
4219
4220 call __ecp_nistz256_subx
4221
4222 lea rbx,[128+rsp]
4223 lea rdi,[288+rsp]
4224 call __ecp_nistz256_sub_fromx
4225
4226 mov rax,QWORD[((192+0))+rsp]
4227 mov rbp,QWORD[((192+8))+rsp]
4228 mov rcx,QWORD[((192+16))+rsp]
4229 mov r10,QWORD[((192+24))+rsp]
4230 lea rdi,[320+rsp]
4231
4232 call __ecp_nistz256_subx
4233
4234 mov QWORD[rdi],r12
4235 mov QWORD[8+rdi],r13
4236 mov QWORD[16+rdi],r8
4237 mov QWORD[24+rdi],r9
4238 mov rdx,QWORD[128+rsp]
4239 lea rbx,[128+rsp]
4240 mov r9,QWORD[((0+224))+rsp]
4241 mov r10,QWORD[((8+224))+rsp]
4242 lea rsi,[((-128+224))+rsp]
4243 mov r11,QWORD[((16+224))+rsp]
4244 mov r12,QWORD[((24+224))+rsp]
4245 lea rdi,[256+rsp]
4246 call __ecp_nistz256_mul_montx
4247
4248 mov rdx,QWORD[320+rsp]
4249 lea rbx,[320+rsp]
4250 mov r9,QWORD[((0+64))+rsp]
4251 mov r10,QWORD[((8+64))+rsp]
4252 lea rsi,[((-128+64))+rsp]
4253 mov r11,QWORD[((16+64))+rsp]
4254 mov r12,QWORD[((24+64))+rsp]
4255 lea rdi,[320+rsp]
4256 call __ecp_nistz256_mul_montx
4257
4258 lea rbx,[256+rsp]
4259 lea rdi,[320+rsp]
4260 call __ecp_nistz256_sub_fromx
4261
4262DB 102,72,15,126,199
4263
4264 movdqa xmm0,xmm5
4265 movdqa xmm1,xmm5
4266 pandn xmm0,XMMWORD[352+rsp]
4267 movdqa xmm2,xmm5
4268 pandn xmm1,XMMWORD[((352+16))+rsp]
4269 movdqa xmm3,xmm5
4270 pand xmm2,XMMWORD[544+rsp]
4271 pand xmm3,XMMWORD[((544+16))+rsp]
4272 por xmm2,xmm0
4273 por xmm3,xmm1
4274
4275 movdqa xmm0,xmm4
4276 movdqa xmm1,xmm4
4277 pandn xmm0,xmm2
4278 movdqa xmm2,xmm4
4279 pandn xmm1,xmm3
4280 movdqa xmm3,xmm4
4281 pand xmm2,XMMWORD[448+rsp]
4282 pand xmm3,XMMWORD[((448+16))+rsp]
4283 por xmm2,xmm0
4284 por xmm3,xmm1
4285 movdqu XMMWORD[64+rdi],xmm2
4286 movdqu XMMWORD[80+rdi],xmm3
4287
4288 movdqa xmm0,xmm5
4289 movdqa xmm1,xmm5
4290 pandn xmm0,XMMWORD[288+rsp]
4291 movdqa xmm2,xmm5
4292 pandn xmm1,XMMWORD[((288+16))+rsp]
4293 movdqa xmm3,xmm5
4294 pand xmm2,XMMWORD[480+rsp]
4295 pand xmm3,XMMWORD[((480+16))+rsp]
4296 por xmm2,xmm0
4297 por xmm3,xmm1
4298
4299 movdqa xmm0,xmm4
4300 movdqa xmm1,xmm4
4301 pandn xmm0,xmm2
4302 movdqa xmm2,xmm4
4303 pandn xmm1,xmm3
4304 movdqa xmm3,xmm4
4305 pand xmm2,XMMWORD[384+rsp]
4306 pand xmm3,XMMWORD[((384+16))+rsp]
4307 por xmm2,xmm0
4308 por xmm3,xmm1
4309 movdqu XMMWORD[rdi],xmm2
4310 movdqu XMMWORD[16+rdi],xmm3
4311
4312 movdqa xmm0,xmm5
4313 movdqa xmm1,xmm5
4314 pandn xmm0,XMMWORD[320+rsp]
4315 movdqa xmm2,xmm5
4316 pandn xmm1,XMMWORD[((320+16))+rsp]
4317 movdqa xmm3,xmm5
4318 pand xmm2,XMMWORD[512+rsp]
4319 pand xmm3,XMMWORD[((512+16))+rsp]
4320 por xmm2,xmm0
4321 por xmm3,xmm1
4322
4323 movdqa xmm0,xmm4
4324 movdqa xmm1,xmm4
4325 pandn xmm0,xmm2
4326 movdqa xmm2,xmm4
4327 pandn xmm1,xmm3
4328 movdqa xmm3,xmm4
4329 pand xmm2,XMMWORD[416+rsp]
4330 pand xmm3,XMMWORD[((416+16))+rsp]
4331 por xmm2,xmm0
4332 por xmm3,xmm1
4333 movdqu XMMWORD[32+rdi],xmm2
4334 movdqu XMMWORD[48+rdi],xmm3
4335
4336$L$add_donex:
4337 lea rsi,[((576+56))+rsp]
4338
4339 mov r15,QWORD[((-48))+rsi]
4340
4341 mov r14,QWORD[((-40))+rsi]
4342
4343 mov r13,QWORD[((-32))+rsi]
4344
4345 mov r12,QWORD[((-24))+rsi]
4346
4347 mov rbx,QWORD[((-16))+rsi]
4348
4349 mov rbp,QWORD[((-8))+rsi]
4350
4351 lea rsp,[rsi]
4352
4353$L$point_addx_epilogue:
4354 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4355 mov rsi,QWORD[16+rsp]
4356 DB 0F3h,0C3h ;repret
4357
4358$L$SEH_end_ecp_nistz256_point_addx:
4359
4360ALIGN 32
4361ecp_nistz256_point_add_affinex:
4362 mov QWORD[8+rsp],rdi ;WIN64 prologue
4363 mov QWORD[16+rsp],rsi
4364 mov rax,rsp
4365$L$SEH_begin_ecp_nistz256_point_add_affinex:
4366 mov rdi,rcx
4367 mov rsi,rdx
4368 mov rdx,r8
4369
4370
4371
4372$L$point_add_affinex:
4373 push rbp
4374
4375 push rbx
4376
4377 push r12
4378
4379 push r13
4380
4381 push r14
4382
4383 push r15
4384
4385 sub rsp,32*15+8
4386
4387$L$add_affinex_body:
4388
4389 movdqu xmm0,XMMWORD[rsi]
4390 mov rbx,rdx
4391 movdqu xmm1,XMMWORD[16+rsi]
4392 movdqu xmm2,XMMWORD[32+rsi]
4393 movdqu xmm3,XMMWORD[48+rsi]
4394 movdqu xmm4,XMMWORD[64+rsi]
4395 movdqu xmm5,XMMWORD[80+rsi]
4396 mov rdx,QWORD[((64+0))+rsi]
4397 mov r14,QWORD[((64+8))+rsi]
4398 mov r15,QWORD[((64+16))+rsi]
4399 mov r8,QWORD[((64+24))+rsi]
4400 movdqa XMMWORD[320+rsp],xmm0
4401 movdqa XMMWORD[(320+16)+rsp],xmm1
4402 movdqa XMMWORD[352+rsp],xmm2
4403 movdqa XMMWORD[(352+16)+rsp],xmm3
4404 movdqa XMMWORD[384+rsp],xmm4
4405 movdqa XMMWORD[(384+16)+rsp],xmm5
4406 por xmm5,xmm4
4407
4408 movdqu xmm0,XMMWORD[rbx]
4409 pshufd xmm3,xmm5,0xb1
4410 movdqu xmm1,XMMWORD[16+rbx]
4411 movdqu xmm2,XMMWORD[32+rbx]
4412 por xmm5,xmm3
4413 movdqu xmm3,XMMWORD[48+rbx]
4414 movdqa XMMWORD[416+rsp],xmm0
4415 pshufd xmm4,xmm5,0x1e
4416 movdqa XMMWORD[(416+16)+rsp],xmm1
4417 por xmm1,xmm0
4418DB 102,72,15,110,199
4419 movdqa XMMWORD[448+rsp],xmm2
4420 movdqa XMMWORD[(448+16)+rsp],xmm3
4421 por xmm3,xmm2
4422 por xmm5,xmm4
4423 pxor xmm4,xmm4
4424 por xmm3,xmm1
4425
4426 lea rsi,[((64-128))+rsi]
4427 lea rdi,[32+rsp]
4428 call __ecp_nistz256_sqr_montx
4429
4430 pcmpeqd xmm5,xmm4
4431 pshufd xmm4,xmm3,0xb1
4432 mov rdx,QWORD[rbx]
4433
4434 mov r9,r12
4435 por xmm4,xmm3
4436 pshufd xmm5,xmm5,0
4437 pshufd xmm3,xmm4,0x1e
4438 mov r10,r13
4439 por xmm4,xmm3
4440 pxor xmm3,xmm3
4441 mov r11,r14
4442 pcmpeqd xmm4,xmm3
4443 pshufd xmm4,xmm4,0
4444
4445 lea rsi,[((32-128))+rsp]
4446 mov r12,r15
4447 lea rdi,[rsp]
4448 call __ecp_nistz256_mul_montx
4449
4450 lea rbx,[320+rsp]
4451 lea rdi,[64+rsp]
4452 call __ecp_nistz256_sub_fromx
4453
4454 mov rdx,QWORD[384+rsp]
4455 lea rbx,[384+rsp]
4456 mov r9,QWORD[((0+32))+rsp]
4457 mov r10,QWORD[((8+32))+rsp]
4458 lea rsi,[((-128+32))+rsp]
4459 mov r11,QWORD[((16+32))+rsp]
4460 mov r12,QWORD[((24+32))+rsp]
4461 lea rdi,[32+rsp]
4462 call __ecp_nistz256_mul_montx
4463
4464 mov rdx,QWORD[384+rsp]
4465 lea rbx,[384+rsp]
4466 mov r9,QWORD[((0+64))+rsp]
4467 mov r10,QWORD[((8+64))+rsp]
4468 lea rsi,[((-128+64))+rsp]
4469 mov r11,QWORD[((16+64))+rsp]
4470 mov r12,QWORD[((24+64))+rsp]
4471 lea rdi,[288+rsp]
4472 call __ecp_nistz256_mul_montx
4473
4474 mov rdx,QWORD[448+rsp]
4475 lea rbx,[448+rsp]
4476 mov r9,QWORD[((0+32))+rsp]
4477 mov r10,QWORD[((8+32))+rsp]
4478 lea rsi,[((-128+32))+rsp]
4479 mov r11,QWORD[((16+32))+rsp]
4480 mov r12,QWORD[((24+32))+rsp]
4481 lea rdi,[32+rsp]
4482 call __ecp_nistz256_mul_montx
4483
4484 lea rbx,[352+rsp]
4485 lea rdi,[96+rsp]
4486 call __ecp_nistz256_sub_fromx
4487
4488 mov rdx,QWORD[((0+64))+rsp]
4489 mov r14,QWORD[((8+64))+rsp]
4490 lea rsi,[((-128+64))+rsp]
4491 mov r15,QWORD[((16+64))+rsp]
4492 mov r8,QWORD[((24+64))+rsp]
4493 lea rdi,[128+rsp]
4494 call __ecp_nistz256_sqr_montx
4495
4496 mov rdx,QWORD[((0+96))+rsp]
4497 mov r14,QWORD[((8+96))+rsp]
4498 lea rsi,[((-128+96))+rsp]
4499 mov r15,QWORD[((16+96))+rsp]
4500 mov r8,QWORD[((24+96))+rsp]
4501 lea rdi,[192+rsp]
4502 call __ecp_nistz256_sqr_montx
4503
4504 mov rdx,QWORD[128+rsp]
4505 lea rbx,[128+rsp]
4506 mov r9,QWORD[((0+64))+rsp]
4507 mov r10,QWORD[((8+64))+rsp]
4508 lea rsi,[((-128+64))+rsp]
4509 mov r11,QWORD[((16+64))+rsp]
4510 mov r12,QWORD[((24+64))+rsp]
4511 lea rdi,[160+rsp]
4512 call __ecp_nistz256_mul_montx
4513
4514 mov rdx,QWORD[320+rsp]
4515 lea rbx,[320+rsp]
4516 mov r9,QWORD[((0+128))+rsp]
4517 mov r10,QWORD[((8+128))+rsp]
4518 lea rsi,[((-128+128))+rsp]
4519 mov r11,QWORD[((16+128))+rsp]
4520 mov r12,QWORD[((24+128))+rsp]
4521 lea rdi,[rsp]
4522 call __ecp_nistz256_mul_montx
4523
4524
4525
4526
4527 xor r11,r11
4528 add r12,r12
4529 lea rsi,[192+rsp]
4530 adc r13,r13
4531 mov rax,r12
4532 adc r8,r8
4533 adc r9,r9
4534 mov rbp,r13
4535 adc r11,0
4536
4537 sub r12,-1
4538 mov rcx,r8
4539 sbb r13,r14
4540 sbb r8,0
4541 mov r10,r9
4542 sbb r9,r15
4543 sbb r11,0
4544
4545 cmovc r12,rax
4546 mov rax,QWORD[rsi]
4547 cmovc r13,rbp
4548 mov rbp,QWORD[8+rsi]
4549 cmovc r8,rcx
4550 mov rcx,QWORD[16+rsi]
4551 cmovc r9,r10
4552 mov r10,QWORD[24+rsi]
4553
4554 call __ecp_nistz256_subx
4555
4556 lea rbx,[160+rsp]
4557 lea rdi,[224+rsp]
4558 call __ecp_nistz256_sub_fromx
4559
4560 mov rax,QWORD[((0+0))+rsp]
4561 mov rbp,QWORD[((0+8))+rsp]
4562 mov rcx,QWORD[((0+16))+rsp]
4563 mov r10,QWORD[((0+24))+rsp]
4564 lea rdi,[64+rsp]
4565
4566 call __ecp_nistz256_subx
4567
4568 mov QWORD[rdi],r12
4569 mov QWORD[8+rdi],r13
4570 mov QWORD[16+rdi],r8
4571 mov QWORD[24+rdi],r9
4572 mov rdx,QWORD[352+rsp]
4573 lea rbx,[352+rsp]
4574 mov r9,QWORD[((0+160))+rsp]
4575 mov r10,QWORD[((8+160))+rsp]
4576 lea rsi,[((-128+160))+rsp]
4577 mov r11,QWORD[((16+160))+rsp]
4578 mov r12,QWORD[((24+160))+rsp]
4579 lea rdi,[32+rsp]
4580 call __ecp_nistz256_mul_montx
4581
4582 mov rdx,QWORD[96+rsp]
4583 lea rbx,[96+rsp]
4584 mov r9,QWORD[((0+64))+rsp]
4585 mov r10,QWORD[((8+64))+rsp]
4586 lea rsi,[((-128+64))+rsp]
4587 mov r11,QWORD[((16+64))+rsp]
4588 mov r12,QWORD[((24+64))+rsp]
4589 lea rdi,[64+rsp]
4590 call __ecp_nistz256_mul_montx
4591
4592 lea rbx,[32+rsp]
4593 lea rdi,[256+rsp]
4594 call __ecp_nistz256_sub_fromx
4595
4596DB 102,72,15,126,199
4597
4598 movdqa xmm0,xmm5
4599 movdqa xmm1,xmm5
4600 pandn xmm0,XMMWORD[288+rsp]
4601 movdqa xmm2,xmm5
4602 pandn xmm1,XMMWORD[((288+16))+rsp]
4603 movdqa xmm3,xmm5
4604 pand xmm2,XMMWORD[$L$ONE_mont]
4605 pand xmm3,XMMWORD[(($L$ONE_mont+16))]
4606 por xmm2,xmm0
4607 por xmm3,xmm1
4608
4609 movdqa xmm0,xmm4
4610 movdqa xmm1,xmm4
4611 pandn xmm0,xmm2
4612 movdqa xmm2,xmm4
4613 pandn xmm1,xmm3
4614 movdqa xmm3,xmm4
4615 pand xmm2,XMMWORD[384+rsp]
4616 pand xmm3,XMMWORD[((384+16))+rsp]
4617 por xmm2,xmm0
4618 por xmm3,xmm1
4619 movdqu XMMWORD[64+rdi],xmm2
4620 movdqu XMMWORD[80+rdi],xmm3
4621
4622 movdqa xmm0,xmm5
4623 movdqa xmm1,xmm5
4624 pandn xmm0,XMMWORD[224+rsp]
4625 movdqa xmm2,xmm5
4626 pandn xmm1,XMMWORD[((224+16))+rsp]
4627 movdqa xmm3,xmm5
4628 pand xmm2,XMMWORD[416+rsp]
4629 pand xmm3,XMMWORD[((416+16))+rsp]
4630 por xmm2,xmm0
4631 por xmm3,xmm1
4632
4633 movdqa xmm0,xmm4
4634 movdqa xmm1,xmm4
4635 pandn xmm0,xmm2
4636 movdqa xmm2,xmm4
4637 pandn xmm1,xmm3
4638 movdqa xmm3,xmm4
4639 pand xmm2,XMMWORD[320+rsp]
4640 pand xmm3,XMMWORD[((320+16))+rsp]
4641 por xmm2,xmm0
4642 por xmm3,xmm1
4643 movdqu XMMWORD[rdi],xmm2
4644 movdqu XMMWORD[16+rdi],xmm3
4645
4646 movdqa xmm0,xmm5
4647 movdqa xmm1,xmm5
4648 pandn xmm0,XMMWORD[256+rsp]
4649 movdqa xmm2,xmm5
4650 pandn xmm1,XMMWORD[((256+16))+rsp]
4651 movdqa xmm3,xmm5
4652 pand xmm2,XMMWORD[448+rsp]
4653 pand xmm3,XMMWORD[((448+16))+rsp]
4654 por xmm2,xmm0
4655 por xmm3,xmm1
4656
4657 movdqa xmm0,xmm4
4658 movdqa xmm1,xmm4
4659 pandn xmm0,xmm2
4660 movdqa xmm2,xmm4
4661 pandn xmm1,xmm3
4662 movdqa xmm3,xmm4
4663 pand xmm2,XMMWORD[352+rsp]
4664 pand xmm3,XMMWORD[((352+16))+rsp]
4665 por xmm2,xmm0
4666 por xmm3,xmm1
4667 movdqu XMMWORD[32+rdi],xmm2
4668 movdqu XMMWORD[48+rdi],xmm3
4669
4670 lea rsi,[((480+56))+rsp]
4671
4672 mov r15,QWORD[((-48))+rsi]
4673
4674 mov r14,QWORD[((-40))+rsi]
4675
4676 mov r13,QWORD[((-32))+rsi]
4677
4678 mov r12,QWORD[((-24))+rsi]
4679
4680 mov rbx,QWORD[((-16))+rsi]
4681
4682 mov rbp,QWORD[((-8))+rsi]
4683
4684 lea rsp,[rsi]
4685
4686$L$add_affinex_epilogue:
4687 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4688 mov rsi,QWORD[16+rsp]
4689 DB 0F3h,0C3h ;repret
4690
4691$L$SEH_end_ecp_nistz256_point_add_affinex:
Robert Sloanab8b8882018-03-26 11:39:51 -07004692EXTERN __imp_RtlVirtualUnwind
4693
4694
4695ALIGN 16
4696short_handler:
4697 push rsi
4698 push rdi
4699 push rbx
4700 push rbp
4701 push r12
4702 push r13
4703 push r14
4704 push r15
4705 pushfq
4706 sub rsp,64
4707
4708 mov rax,QWORD[120+r8]
4709 mov rbx,QWORD[248+r8]
4710
4711 mov rsi,QWORD[8+r9]
4712 mov r11,QWORD[56+r9]
4713
4714 mov r10d,DWORD[r11]
4715 lea r10,[r10*1+rsi]
4716 cmp rbx,r10
4717 jb NEAR $L$common_seh_tail
4718
4719 mov rax,QWORD[152+r8]
4720
4721 mov r10d,DWORD[4+r11]
4722 lea r10,[r10*1+rsi]
4723 cmp rbx,r10
4724 jae NEAR $L$common_seh_tail
4725
4726 lea rax,[16+rax]
4727
4728 mov r12,QWORD[((-8))+rax]
4729 mov r13,QWORD[((-16))+rax]
4730 mov QWORD[216+r8],r12
4731 mov QWORD[224+r8],r13
4732
4733 jmp NEAR $L$common_seh_tail
4734
4735
4736
4737ALIGN 16
4738full_handler:
4739 push rsi
4740 push rdi
4741 push rbx
4742 push rbp
4743 push r12
4744 push r13
4745 push r14
4746 push r15
4747 pushfq
4748 sub rsp,64
4749
4750 mov rax,QWORD[120+r8]
4751 mov rbx,QWORD[248+r8]
4752
4753 mov rsi,QWORD[8+r9]
4754 mov r11,QWORD[56+r9]
4755
4756 mov r10d,DWORD[r11]
4757 lea r10,[r10*1+rsi]
4758 cmp rbx,r10
4759 jb NEAR $L$common_seh_tail
4760
4761 mov rax,QWORD[152+r8]
4762
4763 mov r10d,DWORD[4+r11]
4764 lea r10,[r10*1+rsi]
4765 cmp rbx,r10
4766 jae NEAR $L$common_seh_tail
4767
4768 mov r10d,DWORD[8+r11]
4769 lea rax,[r10*1+rax]
4770
4771 mov rbp,QWORD[((-8))+rax]
4772 mov rbx,QWORD[((-16))+rax]
4773 mov r12,QWORD[((-24))+rax]
4774 mov r13,QWORD[((-32))+rax]
4775 mov r14,QWORD[((-40))+rax]
4776 mov r15,QWORD[((-48))+rax]
4777 mov QWORD[144+r8],rbx
4778 mov QWORD[160+r8],rbp
4779 mov QWORD[216+r8],r12
4780 mov QWORD[224+r8],r13
4781 mov QWORD[232+r8],r14
4782 mov QWORD[240+r8],r15
4783
4784$L$common_seh_tail:
4785 mov rdi,QWORD[8+rax]
4786 mov rsi,QWORD[16+rax]
4787 mov QWORD[152+r8],rax
4788 mov QWORD[168+r8],rsi
4789 mov QWORD[176+r8],rdi
4790
4791 mov rdi,QWORD[40+r9]
4792 mov rsi,r8
4793 mov ecx,154
4794 DD 0xa548f3fc
4795
4796 mov rsi,r9
4797 xor rcx,rcx
4798 mov rdx,QWORD[8+rsi]
4799 mov r8,QWORD[rsi]
4800 mov r9,QWORD[16+rsi]
4801 mov r10,QWORD[40+rsi]
4802 lea r11,[56+rsi]
4803 lea r12,[24+rsi]
4804 mov QWORD[32+rsp],r10
4805 mov QWORD[40+rsp],r11
4806 mov QWORD[48+rsp],r12
4807 mov QWORD[56+rsp],rcx
4808 call QWORD[__imp_RtlVirtualUnwind]
4809
4810 mov eax,1
4811 add rsp,64
4812 popfq
Adam Langleyfad63272015-11-12 12:15:39 -08004813 pop r15
4814 pop r14
4815 pop r13
4816 pop r12
Adam Langleyfad63272015-11-12 12:15:39 -08004817 pop rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07004818 pop rbx
4819 pop rdi
4820 pop rsi
Adam Langleyfad63272015-11-12 12:15:39 -08004821 DB 0F3h,0C3h ;repret
Robert Sloanab8b8882018-03-26 11:39:51 -07004822
4823
4824section .pdata rdata align=4
4825ALIGN 4
4826 DD $L$SEH_begin_ecp_nistz256_neg wrt ..imagebase
4827 DD $L$SEH_end_ecp_nistz256_neg wrt ..imagebase
4828 DD $L$SEH_info_ecp_nistz256_neg wrt ..imagebase
4829
Robert Sloan5cbb5c82018-04-24 11:35:46 -07004830 DD $L$SEH_begin_ecp_nistz256_ord_mul_mont wrt ..imagebase
4831 DD $L$SEH_end_ecp_nistz256_ord_mul_mont wrt ..imagebase
4832 DD $L$SEH_info_ecp_nistz256_ord_mul_mont wrt ..imagebase
4833
4834 DD $L$SEH_begin_ecp_nistz256_ord_sqr_mont wrt ..imagebase
4835 DD $L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase
4836 DD $L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004837 DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase
4838 DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase
4839 DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase
4840
4841 DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase
4842 DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase
4843 DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase
Robert Sloanab8b8882018-03-26 11:39:51 -07004844 DD $L$SEH_begin_ecp_nistz256_mul_mont wrt ..imagebase
4845 DD $L$SEH_end_ecp_nistz256_mul_mont wrt ..imagebase
4846 DD $L$SEH_info_ecp_nistz256_mul_mont wrt ..imagebase
4847
4848 DD $L$SEH_begin_ecp_nistz256_sqr_mont wrt ..imagebase
4849 DD $L$SEH_end_ecp_nistz256_sqr_mont wrt ..imagebase
4850 DD $L$SEH_info_ecp_nistz256_sqr_mont wrt ..imagebase
4851
4852 DD $L$SEH_begin_ecp_nistz256_select_w5 wrt ..imagebase
4853 DD $L$SEH_end_ecp_nistz256_select_w5 wrt ..imagebase
4854 DD $L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
4855
4856 DD $L$SEH_begin_ecp_nistz256_select_w7 wrt ..imagebase
4857 DD $L$SEH_end_ecp_nistz256_select_w7 wrt ..imagebase
4858 DD $L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
4859 DD $L$SEH_begin_ecp_nistz256_avx2_select_w5 wrt ..imagebase
4860 DD $L$SEH_end_ecp_nistz256_avx2_select_w5 wrt ..imagebase
4861 DD $L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
4862
4863 DD $L$SEH_begin_ecp_nistz256_avx2_select_w7 wrt ..imagebase
4864 DD $L$SEH_end_ecp_nistz256_avx2_select_w7 wrt ..imagebase
4865 DD $L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
4866 DD $L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase
4867 DD $L$SEH_end_ecp_nistz256_point_double wrt ..imagebase
4868 DD $L$SEH_info_ecp_nistz256_point_double wrt ..imagebase
4869
4870 DD $L$SEH_begin_ecp_nistz256_point_add wrt ..imagebase
4871 DD $L$SEH_end_ecp_nistz256_point_add wrt ..imagebase
4872 DD $L$SEH_info_ecp_nistz256_point_add wrt ..imagebase
4873
4874 DD $L$SEH_begin_ecp_nistz256_point_add_affine wrt ..imagebase
4875 DD $L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase
4876 DD $L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004877 DD $L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase
4878 DD $L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase
4879 DD $L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase
4880
4881 DD $L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase
4882 DD $L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase
4883 DD $L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase
4884
4885 DD $L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase
4886 DD $L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase
4887 DD $L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase
Robert Sloanab8b8882018-03-26 11:39:51 -07004888
4889section .xdata rdata align=8
4890ALIGN 8
4891$L$SEH_info_ecp_nistz256_neg:
4892DB 9,0,0,0
4893 DD short_handler wrt ..imagebase
4894 DD $L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase
Robert Sloan5cbb5c82018-04-24 11:35:46 -07004895$L$SEH_info_ecp_nistz256_ord_mul_mont:
4896DB 9,0,0,0
4897 DD full_handler wrt ..imagebase
4898 DD $L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase
4899 DD 48,0
4900$L$SEH_info_ecp_nistz256_ord_sqr_mont:
4901DB 9,0,0,0
4902 DD full_handler wrt ..imagebase
4903 DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase
4904 DD 48,0
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004905$L$SEH_info_ecp_nistz256_ord_mul_montx:
4906DB 9,0,0,0
4907 DD full_handler wrt ..imagebase
4908 DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase
4909 DD 48,0
4910$L$SEH_info_ecp_nistz256_ord_sqr_montx:
4911DB 9,0,0,0
4912 DD full_handler wrt ..imagebase
4913 DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase
4914 DD 48,0
Robert Sloanab8b8882018-03-26 11:39:51 -07004915$L$SEH_info_ecp_nistz256_mul_mont:
4916DB 9,0,0,0
4917 DD full_handler wrt ..imagebase
4918 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
4919 DD 48,0
4920$L$SEH_info_ecp_nistz256_sqr_mont:
4921DB 9,0,0,0
4922 DD full_handler wrt ..imagebase
4923 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
4924 DD 48,0
4925$L$SEH_info_ecp_nistz256_select_wX:
4926DB 0x01,0x33,0x16,0x00
4927DB 0x33,0xf8,0x09,0x00
4928DB 0x2e,0xe8,0x08,0x00
4929DB 0x29,0xd8,0x07,0x00
4930DB 0x24,0xc8,0x06,0x00
4931DB 0x1f,0xb8,0x05,0x00
4932DB 0x1a,0xa8,0x04,0x00
4933DB 0x15,0x98,0x03,0x00
4934DB 0x10,0x88,0x02,0x00
4935DB 0x0c,0x78,0x01,0x00
4936DB 0x08,0x68,0x00,0x00
4937DB 0x04,0x01,0x15,0x00
4938ALIGN 8
4939$L$SEH_info_ecp_nistz256_avx2_select_wX:
4940DB 0x01,0x36,0x17,0x0b
4941DB 0x36,0xf8,0x09,0x00
4942DB 0x31,0xe8,0x08,0x00
4943DB 0x2c,0xd8,0x07,0x00
4944DB 0x27,0xc8,0x06,0x00
4945DB 0x22,0xb8,0x05,0x00
4946DB 0x1d,0xa8,0x04,0x00
4947DB 0x18,0x98,0x03,0x00
4948DB 0x13,0x88,0x02,0x00
4949DB 0x0e,0x78,0x01,0x00
4950DB 0x09,0x68,0x00,0x00
4951DB 0x04,0x01,0x15,0x00
4952DB 0x00,0xb3,0x00,0x00
4953ALIGN 8
4954$L$SEH_info_ecp_nistz256_point_double:
4955DB 9,0,0,0
4956 DD full_handler wrt ..imagebase
4957 DD $L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase
4958 DD 32*5+56,0
4959$L$SEH_info_ecp_nistz256_point_add:
4960DB 9,0,0,0
4961 DD full_handler wrt ..imagebase
4962 DD $L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase
4963 DD 32*18+56,0
4964$L$SEH_info_ecp_nistz256_point_add_affine:
4965DB 9,0,0,0
4966 DD full_handler wrt ..imagebase
4967 DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase
4968 DD 32*15+56,0
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01004969ALIGN 8
4970$L$SEH_info_ecp_nistz256_point_doublex:
4971DB 9,0,0,0
4972 DD full_handler wrt ..imagebase
4973 DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase
4974 DD 32*5+56,0
4975$L$SEH_info_ecp_nistz256_point_addx:
4976DB 9,0,0,0
4977 DD full_handler wrt ..imagebase
4978 DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase
4979 DD 32*18+56,0
4980$L$SEH_info_ecp_nistz256_point_add_affinex:
4981DB 9,0,0,0
4982 DD full_handler wrt ..imagebase
4983 DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase
4984 DD 32*15+56,0