blob: 3a575228bc2d8944c671a51e21e05560a62a0f6a [file] [log] [blame]
Robert Sloanc9abfe42018-11-26 12:19:07 -08001# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
Robert Sloan726e9d12018-09-11 11:45:04 -07004#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
Robert Sloan6f79a502017-04-03 09:16:40 -070010#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
Robert Sloan726e9d12018-09-11 11:45:04 -070011#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
Adam Langleyfad63272015-11-12 12:15:39 -080014.text
15.extern OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
17
18
19.align 64
20.Lpoly:
21.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
22
Adam Langleyfad63272015-11-12 12:15:39 -080023.LOne:
24.long 1,1,1,1,1,1,1,1
25.LTwo:
26.long 2,2,2,2,2,2,2,2
27.LThree:
28.long 3,3,3,3,3,3,3,3
29.LONE_mont:
30.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
31
Adam Langleyfad63272015-11-12 12:15:39 -080032
Robert Sloan5cbb5c82018-04-24 11:35:46 -070033.Lord:
34.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
35.LordK:
36.quad 0xccd1c8aaee00bc4f
37
38
Adam Langleyfad63272015-11-12 12:15:39 -080039
Adam Langleyfad63272015-11-12 12:15:39 -080040.globl ecp_nistz256_neg
41.hidden ecp_nistz256_neg
42.type ecp_nistz256_neg,@function
43.align 32
44ecp_nistz256_neg:
Robert Sloanab8b8882018-03-26 11:39:51 -070045.cfi_startproc
Adam Langleyfad63272015-11-12 12:15:39 -080046 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -070047.cfi_adjust_cfa_offset 8
48.cfi_offset %r12,-16
Adam Langleyfad63272015-11-12 12:15:39 -080049 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -070050.cfi_adjust_cfa_offset 8
51.cfi_offset %r13,-24
52.Lneg_body:
Adam Langleyfad63272015-11-12 12:15:39 -080053
54 xorq %r8,%r8
55 xorq %r9,%r9
56 xorq %r10,%r10
57 xorq %r11,%r11
58 xorq %r13,%r13
59
60 subq 0(%rsi),%r8
61 sbbq 8(%rsi),%r9
62 sbbq 16(%rsi),%r10
63 movq %r8,%rax
64 sbbq 24(%rsi),%r11
65 leaq .Lpoly(%rip),%rsi
66 movq %r9,%rdx
67 sbbq $0,%r13
68
69 addq 0(%rsi),%r8
70 movq %r10,%rcx
71 adcq 8(%rsi),%r9
72 adcq 16(%rsi),%r10
73 movq %r11,%r12
74 adcq 24(%rsi),%r11
75 testq %r13,%r13
76
77 cmovzq %rax,%r8
78 cmovzq %rdx,%r9
79 movq %r8,0(%rdi)
80 cmovzq %rcx,%r10
81 movq %r9,8(%rdi)
82 cmovzq %r12,%r11
83 movq %r10,16(%rdi)
84 movq %r11,24(%rdi)
85
Robert Sloanab8b8882018-03-26 11:39:51 -070086 movq 0(%rsp),%r13
87.cfi_restore %r13
88 movq 8(%rsp),%r12
89.cfi_restore %r12
90 leaq 16(%rsp),%rsp
91.cfi_adjust_cfa_offset -16
92.Lneg_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -080093 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -070094.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -080095.size ecp_nistz256_neg,.-ecp_nistz256_neg
96
97
98
99
Adam Langleyfad63272015-11-12 12:15:39 -0800100
101
Robert Sloan5cbb5c82018-04-24 11:35:46 -0700102.globl ecp_nistz256_ord_mul_mont
103.hidden ecp_nistz256_ord_mul_mont
104.type ecp_nistz256_ord_mul_mont,@function
105.align 32
106ecp_nistz256_ord_mul_mont:
107.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100108 leaq OPENSSL_ia32cap_P(%rip),%rcx
109 movq 8(%rcx),%rcx
110 andl $0x80100,%ecx
111 cmpl $0x80100,%ecx
112 je .Lecp_nistz256_ord_mul_montx
Robert Sloan5cbb5c82018-04-24 11:35:46 -0700113 pushq %rbp
114.cfi_adjust_cfa_offset 8
115.cfi_offset %rbp,-16
116 pushq %rbx
117.cfi_adjust_cfa_offset 8
118.cfi_offset %rbx,-24
119 pushq %r12
120.cfi_adjust_cfa_offset 8
121.cfi_offset %r12,-32
122 pushq %r13
123.cfi_adjust_cfa_offset 8
124.cfi_offset %r13,-40
125 pushq %r14
126.cfi_adjust_cfa_offset 8
127.cfi_offset %r14,-48
128 pushq %r15
129.cfi_adjust_cfa_offset 8
130.cfi_offset %r15,-56
131.Lord_mul_body:
132
133 movq 0(%rdx),%rax
134 movq %rdx,%rbx
135 leaq .Lord(%rip),%r14
136 movq .LordK(%rip),%r15
137
138
139 movq %rax,%rcx
140 mulq 0(%rsi)
141 movq %rax,%r8
142 movq %rcx,%rax
143 movq %rdx,%r9
144
145 mulq 8(%rsi)
146 addq %rax,%r9
147 movq %rcx,%rax
148 adcq $0,%rdx
149 movq %rdx,%r10
150
151 mulq 16(%rsi)
152 addq %rax,%r10
153 movq %rcx,%rax
154 adcq $0,%rdx
155
156 movq %r8,%r13
157 imulq %r15,%r8
158
159 movq %rdx,%r11
160 mulq 24(%rsi)
161 addq %rax,%r11
162 movq %r8,%rax
163 adcq $0,%rdx
164 movq %rdx,%r12
165
166
167 mulq 0(%r14)
168 movq %r8,%rbp
169 addq %rax,%r13
170 movq %r8,%rax
171 adcq $0,%rdx
172 movq %rdx,%rcx
173
174 subq %r8,%r10
175 sbbq $0,%r8
176
177 mulq 8(%r14)
178 addq %rcx,%r9
179 adcq $0,%rdx
180 addq %rax,%r9
181 movq %rbp,%rax
182 adcq %rdx,%r10
183 movq %rbp,%rdx
184 adcq $0,%r8
185
186 shlq $32,%rax
187 shrq $32,%rdx
188 subq %rax,%r11
189 movq 8(%rbx),%rax
190 sbbq %rdx,%rbp
191
192 addq %r8,%r11
193 adcq %rbp,%r12
194 adcq $0,%r13
195
196
197 movq %rax,%rcx
198 mulq 0(%rsi)
199 addq %rax,%r9
200 movq %rcx,%rax
201 adcq $0,%rdx
202 movq %rdx,%rbp
203
204 mulq 8(%rsi)
205 addq %rbp,%r10
206 adcq $0,%rdx
207 addq %rax,%r10
208 movq %rcx,%rax
209 adcq $0,%rdx
210 movq %rdx,%rbp
211
212 mulq 16(%rsi)
213 addq %rbp,%r11
214 adcq $0,%rdx
215 addq %rax,%r11
216 movq %rcx,%rax
217 adcq $0,%rdx
218
219 movq %r9,%rcx
220 imulq %r15,%r9
221
222 movq %rdx,%rbp
223 mulq 24(%rsi)
224 addq %rbp,%r12
225 adcq $0,%rdx
226 xorq %r8,%r8
227 addq %rax,%r12
228 movq %r9,%rax
229 adcq %rdx,%r13
230 adcq $0,%r8
231
232
233 mulq 0(%r14)
234 movq %r9,%rbp
235 addq %rax,%rcx
236 movq %r9,%rax
237 adcq %rdx,%rcx
238
239 subq %r9,%r11
240 sbbq $0,%r9
241
242 mulq 8(%r14)
243 addq %rcx,%r10
244 adcq $0,%rdx
245 addq %rax,%r10
246 movq %rbp,%rax
247 adcq %rdx,%r11
248 movq %rbp,%rdx
249 adcq $0,%r9
250
251 shlq $32,%rax
252 shrq $32,%rdx
253 subq %rax,%r12
254 movq 16(%rbx),%rax
255 sbbq %rdx,%rbp
256
257 addq %r9,%r12
258 adcq %rbp,%r13
259 adcq $0,%r8
260
261
262 movq %rax,%rcx
263 mulq 0(%rsi)
264 addq %rax,%r10
265 movq %rcx,%rax
266 adcq $0,%rdx
267 movq %rdx,%rbp
268
269 mulq 8(%rsi)
270 addq %rbp,%r11
271 adcq $0,%rdx
272 addq %rax,%r11
273 movq %rcx,%rax
274 adcq $0,%rdx
275 movq %rdx,%rbp
276
277 mulq 16(%rsi)
278 addq %rbp,%r12
279 adcq $0,%rdx
280 addq %rax,%r12
281 movq %rcx,%rax
282 adcq $0,%rdx
283
284 movq %r10,%rcx
285 imulq %r15,%r10
286
287 movq %rdx,%rbp
288 mulq 24(%rsi)
289 addq %rbp,%r13
290 adcq $0,%rdx
291 xorq %r9,%r9
292 addq %rax,%r13
293 movq %r10,%rax
294 adcq %rdx,%r8
295 adcq $0,%r9
296
297
298 mulq 0(%r14)
299 movq %r10,%rbp
300 addq %rax,%rcx
301 movq %r10,%rax
302 adcq %rdx,%rcx
303
304 subq %r10,%r12
305 sbbq $0,%r10
306
307 mulq 8(%r14)
308 addq %rcx,%r11
309 adcq $0,%rdx
310 addq %rax,%r11
311 movq %rbp,%rax
312 adcq %rdx,%r12
313 movq %rbp,%rdx
314 adcq $0,%r10
315
316 shlq $32,%rax
317 shrq $32,%rdx
318 subq %rax,%r13
319 movq 24(%rbx),%rax
320 sbbq %rdx,%rbp
321
322 addq %r10,%r13
323 adcq %rbp,%r8
324 adcq $0,%r9
325
326
327 movq %rax,%rcx
328 mulq 0(%rsi)
329 addq %rax,%r11
330 movq %rcx,%rax
331 adcq $0,%rdx
332 movq %rdx,%rbp
333
334 mulq 8(%rsi)
335 addq %rbp,%r12
336 adcq $0,%rdx
337 addq %rax,%r12
338 movq %rcx,%rax
339 adcq $0,%rdx
340 movq %rdx,%rbp
341
342 mulq 16(%rsi)
343 addq %rbp,%r13
344 adcq $0,%rdx
345 addq %rax,%r13
346 movq %rcx,%rax
347 adcq $0,%rdx
348
349 movq %r11,%rcx
350 imulq %r15,%r11
351
352 movq %rdx,%rbp
353 mulq 24(%rsi)
354 addq %rbp,%r8
355 adcq $0,%rdx
356 xorq %r10,%r10
357 addq %rax,%r8
358 movq %r11,%rax
359 adcq %rdx,%r9
360 adcq $0,%r10
361
362
363 mulq 0(%r14)
364 movq %r11,%rbp
365 addq %rax,%rcx
366 movq %r11,%rax
367 adcq %rdx,%rcx
368
369 subq %r11,%r13
370 sbbq $0,%r11
371
372 mulq 8(%r14)
373 addq %rcx,%r12
374 adcq $0,%rdx
375 addq %rax,%r12
376 movq %rbp,%rax
377 adcq %rdx,%r13
378 movq %rbp,%rdx
379 adcq $0,%r11
380
381 shlq $32,%rax
382 shrq $32,%rdx
383 subq %rax,%r8
384 sbbq %rdx,%rbp
385
386 addq %r11,%r8
387 adcq %rbp,%r9
388 adcq $0,%r10
389
390
391 movq %r12,%rsi
392 subq 0(%r14),%r12
393 movq %r13,%r11
394 sbbq 8(%r14),%r13
395 movq %r8,%rcx
396 sbbq 16(%r14),%r8
397 movq %r9,%rbp
398 sbbq 24(%r14),%r9
399 sbbq $0,%r10
400
401 cmovcq %rsi,%r12
402 cmovcq %r11,%r13
403 cmovcq %rcx,%r8
404 cmovcq %rbp,%r9
405
406 movq %r12,0(%rdi)
407 movq %r13,8(%rdi)
408 movq %r8,16(%rdi)
409 movq %r9,24(%rdi)
410
411 movq 0(%rsp),%r15
412.cfi_restore %r15
413 movq 8(%rsp),%r14
414.cfi_restore %r14
415 movq 16(%rsp),%r13
416.cfi_restore %r13
417 movq 24(%rsp),%r12
418.cfi_restore %r12
419 movq 32(%rsp),%rbx
420.cfi_restore %rbx
421 movq 40(%rsp),%rbp
422.cfi_restore %rbp
423 leaq 48(%rsp),%rsp
424.cfi_adjust_cfa_offset -48
425.Lord_mul_epilogue:
426 .byte 0xf3,0xc3
427.cfi_endproc
428.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
429
430
431
432
433
434
435
436.globl ecp_nistz256_ord_sqr_mont
437.hidden ecp_nistz256_ord_sqr_mont
438.type ecp_nistz256_ord_sqr_mont,@function
439.align 32
440ecp_nistz256_ord_sqr_mont:
441.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100442 leaq OPENSSL_ia32cap_P(%rip),%rcx
443 movq 8(%rcx),%rcx
444 andl $0x80100,%ecx
445 cmpl $0x80100,%ecx
446 je .Lecp_nistz256_ord_sqr_montx
Robert Sloan5cbb5c82018-04-24 11:35:46 -0700447 pushq %rbp
448.cfi_adjust_cfa_offset 8
449.cfi_offset %rbp,-16
450 pushq %rbx
451.cfi_adjust_cfa_offset 8
452.cfi_offset %rbx,-24
453 pushq %r12
454.cfi_adjust_cfa_offset 8
455.cfi_offset %r12,-32
456 pushq %r13
457.cfi_adjust_cfa_offset 8
458.cfi_offset %r13,-40
459 pushq %r14
460.cfi_adjust_cfa_offset 8
461.cfi_offset %r14,-48
462 pushq %r15
463.cfi_adjust_cfa_offset 8
464.cfi_offset %r15,-56
465.Lord_sqr_body:
466
467 movq 0(%rsi),%r8
468 movq 8(%rsi),%rax
469 movq 16(%rsi),%r14
470 movq 24(%rsi),%r15
471 leaq .Lord(%rip),%rsi
472 movq %rdx,%rbx
473 jmp .Loop_ord_sqr
474
475.align 32
476.Loop_ord_sqr:
477
478 movq %rax,%rbp
479 mulq %r8
480 movq %rax,%r9
481.byte 102,72,15,110,205
482 movq %r14,%rax
483 movq %rdx,%r10
484
485 mulq %r8
486 addq %rax,%r10
487 movq %r15,%rax
488.byte 102,73,15,110,214
489 adcq $0,%rdx
490 movq %rdx,%r11
491
492 mulq %r8
493 addq %rax,%r11
494 movq %r15,%rax
495.byte 102,73,15,110,223
496 adcq $0,%rdx
497 movq %rdx,%r12
498
499
500 mulq %r14
501 movq %rax,%r13
502 movq %r14,%rax
503 movq %rdx,%r14
504
505
506 mulq %rbp
507 addq %rax,%r11
508 movq %r15,%rax
509 adcq $0,%rdx
510 movq %rdx,%r15
511
512 mulq %rbp
513 addq %rax,%r12
514 adcq $0,%rdx
515
516 addq %r15,%r12
517 adcq %rdx,%r13
518 adcq $0,%r14
519
520
521 xorq %r15,%r15
522 movq %r8,%rax
523 addq %r9,%r9
524 adcq %r10,%r10
525 adcq %r11,%r11
526 adcq %r12,%r12
527 adcq %r13,%r13
528 adcq %r14,%r14
529 adcq $0,%r15
530
531
532 mulq %rax
533 movq %rax,%r8
534.byte 102,72,15,126,200
535 movq %rdx,%rbp
536
537 mulq %rax
538 addq %rbp,%r9
539 adcq %rax,%r10
540.byte 102,72,15,126,208
541 adcq $0,%rdx
542 movq %rdx,%rbp
543
544 mulq %rax
545 addq %rbp,%r11
546 adcq %rax,%r12
547.byte 102,72,15,126,216
548 adcq $0,%rdx
549 movq %rdx,%rbp
550
551 movq %r8,%rcx
552 imulq 32(%rsi),%r8
553
554 mulq %rax
555 addq %rbp,%r13
556 adcq %rax,%r14
557 movq 0(%rsi),%rax
558 adcq %rdx,%r15
559
560
561 mulq %r8
562 movq %r8,%rbp
563 addq %rax,%rcx
564 movq 8(%rsi),%rax
565 adcq %rdx,%rcx
566
567 subq %r8,%r10
568 sbbq $0,%rbp
569
570 mulq %r8
571 addq %rcx,%r9
572 adcq $0,%rdx
573 addq %rax,%r9
574 movq %r8,%rax
575 adcq %rdx,%r10
576 movq %r8,%rdx
577 adcq $0,%rbp
578
579 movq %r9,%rcx
580 imulq 32(%rsi),%r9
581
582 shlq $32,%rax
583 shrq $32,%rdx
584 subq %rax,%r11
585 movq 0(%rsi),%rax
586 sbbq %rdx,%r8
587
588 addq %rbp,%r11
589 adcq $0,%r8
590
591
592 mulq %r9
593 movq %r9,%rbp
594 addq %rax,%rcx
595 movq 8(%rsi),%rax
596 adcq %rdx,%rcx
597
598 subq %r9,%r11
599 sbbq $0,%rbp
600
601 mulq %r9
602 addq %rcx,%r10
603 adcq $0,%rdx
604 addq %rax,%r10
605 movq %r9,%rax
606 adcq %rdx,%r11
607 movq %r9,%rdx
608 adcq $0,%rbp
609
610 movq %r10,%rcx
611 imulq 32(%rsi),%r10
612
613 shlq $32,%rax
614 shrq $32,%rdx
615 subq %rax,%r8
616 movq 0(%rsi),%rax
617 sbbq %rdx,%r9
618
619 addq %rbp,%r8
620 adcq $0,%r9
621
622
623 mulq %r10
624 movq %r10,%rbp
625 addq %rax,%rcx
626 movq 8(%rsi),%rax
627 adcq %rdx,%rcx
628
629 subq %r10,%r8
630 sbbq $0,%rbp
631
632 mulq %r10
633 addq %rcx,%r11
634 adcq $0,%rdx
635 addq %rax,%r11
636 movq %r10,%rax
637 adcq %rdx,%r8
638 movq %r10,%rdx
639 adcq $0,%rbp
640
641 movq %r11,%rcx
642 imulq 32(%rsi),%r11
643
644 shlq $32,%rax
645 shrq $32,%rdx
646 subq %rax,%r9
647 movq 0(%rsi),%rax
648 sbbq %rdx,%r10
649
650 addq %rbp,%r9
651 adcq $0,%r10
652
653
654 mulq %r11
655 movq %r11,%rbp
656 addq %rax,%rcx
657 movq 8(%rsi),%rax
658 adcq %rdx,%rcx
659
660 subq %r11,%r9
661 sbbq $0,%rbp
662
663 mulq %r11
664 addq %rcx,%r8
665 adcq $0,%rdx
666 addq %rax,%r8
667 movq %r11,%rax
668 adcq %rdx,%r9
669 movq %r11,%rdx
670 adcq $0,%rbp
671
672 shlq $32,%rax
673 shrq $32,%rdx
674 subq %rax,%r10
675 sbbq %rdx,%r11
676
677 addq %rbp,%r10
678 adcq $0,%r11
679
680
681 xorq %rdx,%rdx
682 addq %r12,%r8
683 adcq %r13,%r9
684 movq %r8,%r12
685 adcq %r14,%r10
686 adcq %r15,%r11
687 movq %r9,%rax
688 adcq $0,%rdx
689
690
691 subq 0(%rsi),%r8
692 movq %r10,%r14
693 sbbq 8(%rsi),%r9
694 sbbq 16(%rsi),%r10
695 movq %r11,%r15
696 sbbq 24(%rsi),%r11
697 sbbq $0,%rdx
698
699 cmovcq %r12,%r8
700 cmovncq %r9,%rax
701 cmovncq %r10,%r14
702 cmovncq %r11,%r15
703
704 decq %rbx
705 jnz .Loop_ord_sqr
706
707 movq %r8,0(%rdi)
708 movq %rax,8(%rdi)
709 pxor %xmm1,%xmm1
710 movq %r14,16(%rdi)
711 pxor %xmm2,%xmm2
712 movq %r15,24(%rdi)
713 pxor %xmm3,%xmm3
714
715 movq 0(%rsp),%r15
716.cfi_restore %r15
717 movq 8(%rsp),%r14
718.cfi_restore %r14
719 movq 16(%rsp),%r13
720.cfi_restore %r13
721 movq 24(%rsp),%r12
722.cfi_restore %r12
723 movq 32(%rsp),%rbx
724.cfi_restore %rbx
725 movq 40(%rsp),%rbp
726.cfi_restore %rbp
727 leaq 48(%rsp),%rsp
728.cfi_adjust_cfa_offset -48
729.Lord_sqr_epilogue:
730 .byte 0xf3,0xc3
731.cfi_endproc
732.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
733
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100734.type ecp_nistz256_ord_mul_montx,@function
735.align 32
736ecp_nistz256_ord_mul_montx:
737.cfi_startproc
738.Lecp_nistz256_ord_mul_montx:
739 pushq %rbp
740.cfi_adjust_cfa_offset 8
741.cfi_offset %rbp,-16
742 pushq %rbx
743.cfi_adjust_cfa_offset 8
744.cfi_offset %rbx,-24
745 pushq %r12
746.cfi_adjust_cfa_offset 8
747.cfi_offset %r12,-32
748 pushq %r13
749.cfi_adjust_cfa_offset 8
750.cfi_offset %r13,-40
751 pushq %r14
752.cfi_adjust_cfa_offset 8
753.cfi_offset %r14,-48
754 pushq %r15
755.cfi_adjust_cfa_offset 8
756.cfi_offset %r15,-56
757.Lord_mulx_body:
758
759 movq %rdx,%rbx
760 movq 0(%rdx),%rdx
761 movq 0(%rsi),%r9
762 movq 8(%rsi),%r10
763 movq 16(%rsi),%r11
764 movq 24(%rsi),%r12
765 leaq -128(%rsi),%rsi
766 leaq .Lord-128(%rip),%r14
767 movq .LordK(%rip),%r15
768
769
770 mulxq %r9,%r8,%r9
771 mulxq %r10,%rcx,%r10
772 mulxq %r11,%rbp,%r11
773 addq %rcx,%r9
774 mulxq %r12,%rcx,%r12
775 movq %r8,%rdx
776 mulxq %r15,%rdx,%rax
777 adcq %rbp,%r10
778 adcq %rcx,%r11
779 adcq $0,%r12
780
781
782 xorq %r13,%r13
783 mulxq 0+128(%r14),%rcx,%rbp
784 adcxq %rcx,%r8
785 adoxq %rbp,%r9
786
787 mulxq 8+128(%r14),%rcx,%rbp
788 adcxq %rcx,%r9
789 adoxq %rbp,%r10
790
791 mulxq 16+128(%r14),%rcx,%rbp
792 adcxq %rcx,%r10
793 adoxq %rbp,%r11
794
795 mulxq 24+128(%r14),%rcx,%rbp
796 movq 8(%rbx),%rdx
797 adcxq %rcx,%r11
798 adoxq %rbp,%r12
799 adcxq %r8,%r12
800 adoxq %r8,%r13
801 adcq $0,%r13
802
803
804 mulxq 0+128(%rsi),%rcx,%rbp
805 adcxq %rcx,%r9
806 adoxq %rbp,%r10
807
808 mulxq 8+128(%rsi),%rcx,%rbp
809 adcxq %rcx,%r10
810 adoxq %rbp,%r11
811
812 mulxq 16+128(%rsi),%rcx,%rbp
813 adcxq %rcx,%r11
814 adoxq %rbp,%r12
815
816 mulxq 24+128(%rsi),%rcx,%rbp
817 movq %r9,%rdx
818 mulxq %r15,%rdx,%rax
819 adcxq %rcx,%r12
820 adoxq %rbp,%r13
821
822 adcxq %r8,%r13
823 adoxq %r8,%r8
824 adcq $0,%r8
825
826
827 mulxq 0+128(%r14),%rcx,%rbp
828 adcxq %rcx,%r9
829 adoxq %rbp,%r10
830
831 mulxq 8+128(%r14),%rcx,%rbp
832 adcxq %rcx,%r10
833 adoxq %rbp,%r11
834
835 mulxq 16+128(%r14),%rcx,%rbp
836 adcxq %rcx,%r11
837 adoxq %rbp,%r12
838
839 mulxq 24+128(%r14),%rcx,%rbp
840 movq 16(%rbx),%rdx
841 adcxq %rcx,%r12
842 adoxq %rbp,%r13
843 adcxq %r9,%r13
844 adoxq %r9,%r8
845 adcq $0,%r8
846
847
848 mulxq 0+128(%rsi),%rcx,%rbp
849 adcxq %rcx,%r10
850 adoxq %rbp,%r11
851
852 mulxq 8+128(%rsi),%rcx,%rbp
853 adcxq %rcx,%r11
854 adoxq %rbp,%r12
855
856 mulxq 16+128(%rsi),%rcx,%rbp
857 adcxq %rcx,%r12
858 adoxq %rbp,%r13
859
860 mulxq 24+128(%rsi),%rcx,%rbp
861 movq %r10,%rdx
862 mulxq %r15,%rdx,%rax
863 adcxq %rcx,%r13
864 adoxq %rbp,%r8
865
866 adcxq %r9,%r8
867 adoxq %r9,%r9
868 adcq $0,%r9
869
870
871 mulxq 0+128(%r14),%rcx,%rbp
872 adcxq %rcx,%r10
873 adoxq %rbp,%r11
874
875 mulxq 8+128(%r14),%rcx,%rbp
876 adcxq %rcx,%r11
877 adoxq %rbp,%r12
878
879 mulxq 16+128(%r14),%rcx,%rbp
880 adcxq %rcx,%r12
881 adoxq %rbp,%r13
882
883 mulxq 24+128(%r14),%rcx,%rbp
884 movq 24(%rbx),%rdx
885 adcxq %rcx,%r13
886 adoxq %rbp,%r8
887 adcxq %r10,%r8
888 adoxq %r10,%r9
889 adcq $0,%r9
890
891
892 mulxq 0+128(%rsi),%rcx,%rbp
893 adcxq %rcx,%r11
894 adoxq %rbp,%r12
895
896 mulxq 8+128(%rsi),%rcx,%rbp
897 adcxq %rcx,%r12
898 adoxq %rbp,%r13
899
900 mulxq 16+128(%rsi),%rcx,%rbp
901 adcxq %rcx,%r13
902 adoxq %rbp,%r8
903
904 mulxq 24+128(%rsi),%rcx,%rbp
905 movq %r11,%rdx
906 mulxq %r15,%rdx,%rax
907 adcxq %rcx,%r8
908 adoxq %rbp,%r9
909
910 adcxq %r10,%r9
911 adoxq %r10,%r10
912 adcq $0,%r10
913
914
915 mulxq 0+128(%r14),%rcx,%rbp
916 adcxq %rcx,%r11
917 adoxq %rbp,%r12
918
919 mulxq 8+128(%r14),%rcx,%rbp
920 adcxq %rcx,%r12
921 adoxq %rbp,%r13
922
923 mulxq 16+128(%r14),%rcx,%rbp
924 adcxq %rcx,%r13
925 adoxq %rbp,%r8
926
927 mulxq 24+128(%r14),%rcx,%rbp
928 leaq 128(%r14),%r14
929 movq %r12,%rbx
930 adcxq %rcx,%r8
931 adoxq %rbp,%r9
932 movq %r13,%rdx
933 adcxq %r11,%r9
934 adoxq %r11,%r10
935 adcq $0,%r10
936
937
938
939 movq %r8,%rcx
940 subq 0(%r14),%r12
941 sbbq 8(%r14),%r13
942 sbbq 16(%r14),%r8
943 movq %r9,%rbp
944 sbbq 24(%r14),%r9
945 sbbq $0,%r10
946
947 cmovcq %rbx,%r12
948 cmovcq %rdx,%r13
949 cmovcq %rcx,%r8
950 cmovcq %rbp,%r9
951
952 movq %r12,0(%rdi)
953 movq %r13,8(%rdi)
954 movq %r8,16(%rdi)
955 movq %r9,24(%rdi)
956
957 movq 0(%rsp),%r15
958.cfi_restore %r15
959 movq 8(%rsp),%r14
960.cfi_restore %r14
961 movq 16(%rsp),%r13
962.cfi_restore %r13
963 movq 24(%rsp),%r12
964.cfi_restore %r12
965 movq 32(%rsp),%rbx
966.cfi_restore %rbx
967 movq 40(%rsp),%rbp
968.cfi_restore %rbp
969 leaq 48(%rsp),%rsp
970.cfi_adjust_cfa_offset -48
971.Lord_mulx_epilogue:
972 .byte 0xf3,0xc3
973.cfi_endproc
974.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
975
976.type ecp_nistz256_ord_sqr_montx,@function
977.align 32
978ecp_nistz256_ord_sqr_montx:
979.cfi_startproc
980.Lecp_nistz256_ord_sqr_montx:
981 pushq %rbp
982.cfi_adjust_cfa_offset 8
983.cfi_offset %rbp,-16
984 pushq %rbx
985.cfi_adjust_cfa_offset 8
986.cfi_offset %rbx,-24
987 pushq %r12
988.cfi_adjust_cfa_offset 8
989.cfi_offset %r12,-32
990 pushq %r13
991.cfi_adjust_cfa_offset 8
992.cfi_offset %r13,-40
993 pushq %r14
994.cfi_adjust_cfa_offset 8
995.cfi_offset %r14,-48
996 pushq %r15
997.cfi_adjust_cfa_offset 8
998.cfi_offset %r15,-56
999.Lord_sqrx_body:
1000
1001 movq %rdx,%rbx
1002 movq 0(%rsi),%rdx
1003 movq 8(%rsi),%r14
1004 movq 16(%rsi),%r15
1005 movq 24(%rsi),%r8
1006 leaq .Lord(%rip),%rsi
1007 jmp .Loop_ord_sqrx
1008
1009.align 32
1010.Loop_ord_sqrx:
1011 mulxq %r14,%r9,%r10
1012 mulxq %r15,%rcx,%r11
1013 movq %rdx,%rax
1014.byte 102,73,15,110,206
1015 mulxq %r8,%rbp,%r12
1016 movq %r14,%rdx
1017 addq %rcx,%r10
1018.byte 102,73,15,110,215
1019 adcq %rbp,%r11
1020 adcq $0,%r12
1021 xorq %r13,%r13
1022
1023 mulxq %r15,%rcx,%rbp
1024 adcxq %rcx,%r11
1025 adoxq %rbp,%r12
1026
1027 mulxq %r8,%rcx,%rbp
1028 movq %r15,%rdx
1029 adcxq %rcx,%r12
1030 adoxq %rbp,%r13
1031 adcq $0,%r13
1032
1033 mulxq %r8,%rcx,%r14
1034 movq %rax,%rdx
1035.byte 102,73,15,110,216
1036 xorq %r15,%r15
1037 adcxq %r9,%r9
1038 adoxq %rcx,%r13
1039 adcxq %r10,%r10
1040 adoxq %r15,%r14
1041
1042
1043 mulxq %rdx,%r8,%rbp
1044.byte 102,72,15,126,202
1045 adcxq %r11,%r11
1046 adoxq %rbp,%r9
1047 adcxq %r12,%r12
1048 mulxq %rdx,%rcx,%rax
1049.byte 102,72,15,126,210
1050 adcxq %r13,%r13
1051 adoxq %rcx,%r10
1052 adcxq %r14,%r14
1053 mulxq %rdx,%rcx,%rbp
1054.byte 0x67
1055.byte 102,72,15,126,218
1056 adoxq %rax,%r11
1057 adcxq %r15,%r15
1058 adoxq %rcx,%r12
1059 adoxq %rbp,%r13
1060 mulxq %rdx,%rcx,%rax
1061 adoxq %rcx,%r14
1062 adoxq %rax,%r15
1063
1064
1065 movq %r8,%rdx
1066 mulxq 32(%rsi),%rdx,%rcx
1067
1068 xorq %rax,%rax
1069 mulxq 0(%rsi),%rcx,%rbp
1070 adcxq %rcx,%r8
1071 adoxq %rbp,%r9
1072 mulxq 8(%rsi),%rcx,%rbp
1073 adcxq %rcx,%r9
1074 adoxq %rbp,%r10
1075 mulxq 16(%rsi),%rcx,%rbp
1076 adcxq %rcx,%r10
1077 adoxq %rbp,%r11
1078 mulxq 24(%rsi),%rcx,%rbp
1079 adcxq %rcx,%r11
1080 adoxq %rbp,%r8
1081 adcxq %rax,%r8
1082
1083
1084 movq %r9,%rdx
1085 mulxq 32(%rsi),%rdx,%rcx
1086
1087 mulxq 0(%rsi),%rcx,%rbp
1088 adoxq %rcx,%r9
1089 adcxq %rbp,%r10
1090 mulxq 8(%rsi),%rcx,%rbp
1091 adoxq %rcx,%r10
1092 adcxq %rbp,%r11
1093 mulxq 16(%rsi),%rcx,%rbp
1094 adoxq %rcx,%r11
1095 adcxq %rbp,%r8
1096 mulxq 24(%rsi),%rcx,%rbp
1097 adoxq %rcx,%r8
1098 adcxq %rbp,%r9
1099 adoxq %rax,%r9
1100
1101
1102 movq %r10,%rdx
1103 mulxq 32(%rsi),%rdx,%rcx
1104
1105 mulxq 0(%rsi),%rcx,%rbp
1106 adcxq %rcx,%r10
1107 adoxq %rbp,%r11
1108 mulxq 8(%rsi),%rcx,%rbp
1109 adcxq %rcx,%r11
1110 adoxq %rbp,%r8
1111 mulxq 16(%rsi),%rcx,%rbp
1112 adcxq %rcx,%r8
1113 adoxq %rbp,%r9
1114 mulxq 24(%rsi),%rcx,%rbp
1115 adcxq %rcx,%r9
1116 adoxq %rbp,%r10
1117 adcxq %rax,%r10
1118
1119
1120 movq %r11,%rdx
1121 mulxq 32(%rsi),%rdx,%rcx
1122
1123 mulxq 0(%rsi),%rcx,%rbp
1124 adoxq %rcx,%r11
1125 adcxq %rbp,%r8
1126 mulxq 8(%rsi),%rcx,%rbp
1127 adoxq %rcx,%r8
1128 adcxq %rbp,%r9
1129 mulxq 16(%rsi),%rcx,%rbp
1130 adoxq %rcx,%r9
1131 adcxq %rbp,%r10
1132 mulxq 24(%rsi),%rcx,%rbp
1133 adoxq %rcx,%r10
1134 adcxq %rbp,%r11
1135 adoxq %rax,%r11
1136
1137
1138 addq %r8,%r12
1139 adcq %r13,%r9
1140 movq %r12,%rdx
1141 adcq %r14,%r10
1142 adcq %r15,%r11
1143 movq %r9,%r14
1144 adcq $0,%rax
1145
1146
1147 subq 0(%rsi),%r12
1148 movq %r10,%r15
1149 sbbq 8(%rsi),%r9
1150 sbbq 16(%rsi),%r10
1151 movq %r11,%r8
1152 sbbq 24(%rsi),%r11
1153 sbbq $0,%rax
1154
1155 cmovncq %r12,%rdx
1156 cmovncq %r9,%r14
1157 cmovncq %r10,%r15
1158 cmovncq %r11,%r8
1159
1160 decq %rbx
1161 jnz .Loop_ord_sqrx
1162
1163 movq %rdx,0(%rdi)
1164 movq %r14,8(%rdi)
1165 pxor %xmm1,%xmm1
1166 movq %r15,16(%rdi)
1167 pxor %xmm2,%xmm2
1168 movq %r8,24(%rdi)
1169 pxor %xmm3,%xmm3
1170
1171 movq 0(%rsp),%r15
1172.cfi_restore %r15
1173 movq 8(%rsp),%r14
1174.cfi_restore %r14
1175 movq 16(%rsp),%r13
1176.cfi_restore %r13
1177 movq 24(%rsp),%r12
1178.cfi_restore %r12
1179 movq 32(%rsp),%rbx
1180.cfi_restore %rbx
1181 movq 40(%rsp),%rbp
1182.cfi_restore %rbp
1183 leaq 48(%rsp),%rsp
1184.cfi_adjust_cfa_offset -48
1185.Lord_sqrx_epilogue:
1186 .byte 0xf3,0xc3
1187.cfi_endproc
1188.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
1189
Robert Sloan5cbb5c82018-04-24 11:35:46 -07001190
1191
1192
1193
1194
Adam Langleyfad63272015-11-12 12:15:39 -08001195.globl ecp_nistz256_mul_mont
1196.hidden ecp_nistz256_mul_mont
1197.type ecp_nistz256_mul_mont,@function
1198.align 32
1199ecp_nistz256_mul_mont:
Robert Sloanab8b8882018-03-26 11:39:51 -07001200.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001201 leaq OPENSSL_ia32cap_P(%rip),%rcx
1202 movq 8(%rcx),%rcx
1203 andl $0x80100,%ecx
Adam Langleyfad63272015-11-12 12:15:39 -08001204.Lmul_mont:
1205 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001206.cfi_adjust_cfa_offset 8
1207.cfi_offset %rbp,-16
Adam Langleyfad63272015-11-12 12:15:39 -08001208 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07001209.cfi_adjust_cfa_offset 8
1210.cfi_offset %rbx,-24
Adam Langleyfad63272015-11-12 12:15:39 -08001211 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -07001212.cfi_adjust_cfa_offset 8
1213.cfi_offset %r12,-32
Adam Langleyfad63272015-11-12 12:15:39 -08001214 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -07001215.cfi_adjust_cfa_offset 8
1216.cfi_offset %r13,-40
Adam Langleyfad63272015-11-12 12:15:39 -08001217 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -07001218.cfi_adjust_cfa_offset 8
1219.cfi_offset %r14,-48
Adam Langleyfad63272015-11-12 12:15:39 -08001220 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -07001221.cfi_adjust_cfa_offset 8
1222.cfi_offset %r15,-56
1223.Lmul_body:
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001224 cmpl $0x80100,%ecx
1225 je .Lmul_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001226 movq %rdx,%rbx
1227 movq 0(%rdx),%rax
1228 movq 0(%rsi),%r9
1229 movq 8(%rsi),%r10
1230 movq 16(%rsi),%r11
1231 movq 24(%rsi),%r12
1232
1233 call __ecp_nistz256_mul_montq
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001234 jmp .Lmul_mont_done
1235
1236.align 32
1237.Lmul_montx:
1238 movq %rdx,%rbx
1239 movq 0(%rdx),%rdx
1240 movq 0(%rsi),%r9
1241 movq 8(%rsi),%r10
1242 movq 16(%rsi),%r11
1243 movq 24(%rsi),%r12
1244 leaq -128(%rsi),%rsi
1245
1246 call __ecp_nistz256_mul_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001247.Lmul_mont_done:
Robert Sloanab8b8882018-03-26 11:39:51 -07001248 movq 0(%rsp),%r15
1249.cfi_restore %r15
1250 movq 8(%rsp),%r14
1251.cfi_restore %r14
1252 movq 16(%rsp),%r13
1253.cfi_restore %r13
1254 movq 24(%rsp),%r12
1255.cfi_restore %r12
1256 movq 32(%rsp),%rbx
1257.cfi_restore %rbx
1258 movq 40(%rsp),%rbp
1259.cfi_restore %rbp
1260 leaq 48(%rsp),%rsp
1261.cfi_adjust_cfa_offset -48
1262.Lmul_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08001263 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07001264.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08001265.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
1266
1267.type __ecp_nistz256_mul_montq,@function
1268.align 32
1269__ecp_nistz256_mul_montq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001270.cfi_startproc
Adam Langleyfad63272015-11-12 12:15:39 -08001271
1272
1273 movq %rax,%rbp
1274 mulq %r9
1275 movq .Lpoly+8(%rip),%r14
1276 movq %rax,%r8
1277 movq %rbp,%rax
1278 movq %rdx,%r9
1279
1280 mulq %r10
1281 movq .Lpoly+24(%rip),%r15
1282 addq %rax,%r9
1283 movq %rbp,%rax
1284 adcq $0,%rdx
1285 movq %rdx,%r10
1286
1287 mulq %r11
1288 addq %rax,%r10
1289 movq %rbp,%rax
1290 adcq $0,%rdx
1291 movq %rdx,%r11
1292
1293 mulq %r12
1294 addq %rax,%r11
1295 movq %r8,%rax
1296 adcq $0,%rdx
1297 xorq %r13,%r13
1298 movq %rdx,%r12
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309 movq %r8,%rbp
1310 shlq $32,%r8
1311 mulq %r15
1312 shrq $32,%rbp
1313 addq %r8,%r9
1314 adcq %rbp,%r10
1315 adcq %rax,%r11
1316 movq 8(%rbx),%rax
1317 adcq %rdx,%r12
1318 adcq $0,%r13
1319 xorq %r8,%r8
1320
1321
1322
1323 movq %rax,%rbp
1324 mulq 0(%rsi)
1325 addq %rax,%r9
1326 movq %rbp,%rax
1327 adcq $0,%rdx
1328 movq %rdx,%rcx
1329
1330 mulq 8(%rsi)
1331 addq %rcx,%r10
1332 adcq $0,%rdx
1333 addq %rax,%r10
1334 movq %rbp,%rax
1335 adcq $0,%rdx
1336 movq %rdx,%rcx
1337
1338 mulq 16(%rsi)
1339 addq %rcx,%r11
1340 adcq $0,%rdx
1341 addq %rax,%r11
1342 movq %rbp,%rax
1343 adcq $0,%rdx
1344 movq %rdx,%rcx
1345
1346 mulq 24(%rsi)
1347 addq %rcx,%r12
1348 adcq $0,%rdx
1349 addq %rax,%r12
1350 movq %r9,%rax
1351 adcq %rdx,%r13
1352 adcq $0,%r8
1353
1354
1355
1356 movq %r9,%rbp
1357 shlq $32,%r9
1358 mulq %r15
1359 shrq $32,%rbp
1360 addq %r9,%r10
1361 adcq %rbp,%r11
1362 adcq %rax,%r12
1363 movq 16(%rbx),%rax
1364 adcq %rdx,%r13
1365 adcq $0,%r8
1366 xorq %r9,%r9
1367
1368
1369
1370 movq %rax,%rbp
1371 mulq 0(%rsi)
1372 addq %rax,%r10
1373 movq %rbp,%rax
1374 adcq $0,%rdx
1375 movq %rdx,%rcx
1376
1377 mulq 8(%rsi)
1378 addq %rcx,%r11
1379 adcq $0,%rdx
1380 addq %rax,%r11
1381 movq %rbp,%rax
1382 adcq $0,%rdx
1383 movq %rdx,%rcx
1384
1385 mulq 16(%rsi)
1386 addq %rcx,%r12
1387 adcq $0,%rdx
1388 addq %rax,%r12
1389 movq %rbp,%rax
1390 adcq $0,%rdx
1391 movq %rdx,%rcx
1392
1393 mulq 24(%rsi)
1394 addq %rcx,%r13
1395 adcq $0,%rdx
1396 addq %rax,%r13
1397 movq %r10,%rax
1398 adcq %rdx,%r8
1399 adcq $0,%r9
1400
1401
1402
1403 movq %r10,%rbp
1404 shlq $32,%r10
1405 mulq %r15
1406 shrq $32,%rbp
1407 addq %r10,%r11
1408 adcq %rbp,%r12
1409 adcq %rax,%r13
1410 movq 24(%rbx),%rax
1411 adcq %rdx,%r8
1412 adcq $0,%r9
1413 xorq %r10,%r10
1414
1415
1416
1417 movq %rax,%rbp
1418 mulq 0(%rsi)
1419 addq %rax,%r11
1420 movq %rbp,%rax
1421 adcq $0,%rdx
1422 movq %rdx,%rcx
1423
1424 mulq 8(%rsi)
1425 addq %rcx,%r12
1426 adcq $0,%rdx
1427 addq %rax,%r12
1428 movq %rbp,%rax
1429 adcq $0,%rdx
1430 movq %rdx,%rcx
1431
1432 mulq 16(%rsi)
1433 addq %rcx,%r13
1434 adcq $0,%rdx
1435 addq %rax,%r13
1436 movq %rbp,%rax
1437 adcq $0,%rdx
1438 movq %rdx,%rcx
1439
1440 mulq 24(%rsi)
1441 addq %rcx,%r8
1442 adcq $0,%rdx
1443 addq %rax,%r8
1444 movq %r11,%rax
1445 adcq %rdx,%r9
1446 adcq $0,%r10
1447
1448
1449
1450 movq %r11,%rbp
1451 shlq $32,%r11
1452 mulq %r15
1453 shrq $32,%rbp
1454 addq %r11,%r12
1455 adcq %rbp,%r13
1456 movq %r12,%rcx
1457 adcq %rax,%r8
1458 adcq %rdx,%r9
1459 movq %r13,%rbp
1460 adcq $0,%r10
1461
1462
1463
1464 subq $-1,%r12
1465 movq %r8,%rbx
1466 sbbq %r14,%r13
1467 sbbq $0,%r8
1468 movq %r9,%rdx
1469 sbbq %r15,%r9
1470 sbbq $0,%r10
1471
1472 cmovcq %rcx,%r12
1473 cmovcq %rbp,%r13
1474 movq %r12,0(%rdi)
1475 cmovcq %rbx,%r8
1476 movq %r13,8(%rdi)
1477 cmovcq %rdx,%r9
1478 movq %r8,16(%rdi)
1479 movq %r9,24(%rdi)
1480
1481 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001482.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08001483.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
1484
1485
1486
1487
1488
1489
1490
1491
1492.globl ecp_nistz256_sqr_mont
1493.hidden ecp_nistz256_sqr_mont
1494.type ecp_nistz256_sqr_mont,@function
1495.align 32
1496ecp_nistz256_sqr_mont:
Robert Sloanab8b8882018-03-26 11:39:51 -07001497.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001498 leaq OPENSSL_ia32cap_P(%rip),%rcx
1499 movq 8(%rcx),%rcx
1500 andl $0x80100,%ecx
Adam Langleyfad63272015-11-12 12:15:39 -08001501 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001502.cfi_adjust_cfa_offset 8
1503.cfi_offset %rbp,-16
Adam Langleyfad63272015-11-12 12:15:39 -08001504 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07001505.cfi_adjust_cfa_offset 8
1506.cfi_offset %rbx,-24
Adam Langleyfad63272015-11-12 12:15:39 -08001507 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -07001508.cfi_adjust_cfa_offset 8
1509.cfi_offset %r12,-32
Adam Langleyfad63272015-11-12 12:15:39 -08001510 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -07001511.cfi_adjust_cfa_offset 8
1512.cfi_offset %r13,-40
Adam Langleyfad63272015-11-12 12:15:39 -08001513 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -07001514.cfi_adjust_cfa_offset 8
1515.cfi_offset %r14,-48
Adam Langleyfad63272015-11-12 12:15:39 -08001516 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -07001517.cfi_adjust_cfa_offset 8
1518.cfi_offset %r15,-56
1519.Lsqr_body:
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001520 cmpl $0x80100,%ecx
1521 je .Lsqr_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001522 movq 0(%rsi),%rax
1523 movq 8(%rsi),%r14
1524 movq 16(%rsi),%r15
1525 movq 24(%rsi),%r8
1526
1527 call __ecp_nistz256_sqr_montq
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001528 jmp .Lsqr_mont_done
1529
1530.align 32
1531.Lsqr_montx:
1532 movq 0(%rsi),%rdx
1533 movq 8(%rsi),%r14
1534 movq 16(%rsi),%r15
1535 movq 24(%rsi),%r8
1536 leaq -128(%rsi),%rsi
1537
1538 call __ecp_nistz256_sqr_montx
Adam Langleyfad63272015-11-12 12:15:39 -08001539.Lsqr_mont_done:
Robert Sloanab8b8882018-03-26 11:39:51 -07001540 movq 0(%rsp),%r15
1541.cfi_restore %r15
1542 movq 8(%rsp),%r14
1543.cfi_restore %r14
1544 movq 16(%rsp),%r13
1545.cfi_restore %r13
1546 movq 24(%rsp),%r12
1547.cfi_restore %r12
1548 movq 32(%rsp),%rbx
1549.cfi_restore %rbx
1550 movq 40(%rsp),%rbp
1551.cfi_restore %rbp
1552 leaq 48(%rsp),%rsp
1553.cfi_adjust_cfa_offset -48
1554.Lsqr_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08001555 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07001556.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08001557.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
1558
1559.type __ecp_nistz256_sqr_montq,@function
1560.align 32
1561__ecp_nistz256_sqr_montq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001562.cfi_startproc
Adam Langleyfad63272015-11-12 12:15:39 -08001563 movq %rax,%r13
1564 mulq %r14
1565 movq %rax,%r9
1566 movq %r15,%rax
1567 movq %rdx,%r10
1568
1569 mulq %r13
1570 addq %rax,%r10
1571 movq %r8,%rax
1572 adcq $0,%rdx
1573 movq %rdx,%r11
1574
1575 mulq %r13
1576 addq %rax,%r11
1577 movq %r15,%rax
1578 adcq $0,%rdx
1579 movq %rdx,%r12
1580
1581
1582 mulq %r14
1583 addq %rax,%r11
1584 movq %r8,%rax
1585 adcq $0,%rdx
1586 movq %rdx,%rbp
1587
1588 mulq %r14
1589 addq %rax,%r12
1590 movq %r8,%rax
1591 adcq $0,%rdx
1592 addq %rbp,%r12
1593 movq %rdx,%r13
1594 adcq $0,%r13
1595
1596
1597 mulq %r15
1598 xorq %r15,%r15
1599 addq %rax,%r13
1600 movq 0(%rsi),%rax
1601 movq %rdx,%r14
1602 adcq $0,%r14
1603
1604 addq %r9,%r9
1605 adcq %r10,%r10
1606 adcq %r11,%r11
1607 adcq %r12,%r12
1608 adcq %r13,%r13
1609 adcq %r14,%r14
1610 adcq $0,%r15
1611
1612 mulq %rax
1613 movq %rax,%r8
1614 movq 8(%rsi),%rax
1615 movq %rdx,%rcx
1616
1617 mulq %rax
1618 addq %rcx,%r9
1619 adcq %rax,%r10
1620 movq 16(%rsi),%rax
1621 adcq $0,%rdx
1622 movq %rdx,%rcx
1623
1624 mulq %rax
1625 addq %rcx,%r11
1626 adcq %rax,%r12
1627 movq 24(%rsi),%rax
1628 adcq $0,%rdx
1629 movq %rdx,%rcx
1630
1631 mulq %rax
1632 addq %rcx,%r13
1633 adcq %rax,%r14
1634 movq %r8,%rax
1635 adcq %rdx,%r15
1636
1637 movq .Lpoly+8(%rip),%rsi
1638 movq .Lpoly+24(%rip),%rbp
1639
1640
1641
1642
1643 movq %r8,%rcx
1644 shlq $32,%r8
1645 mulq %rbp
1646 shrq $32,%rcx
1647 addq %r8,%r9
1648 adcq %rcx,%r10
1649 adcq %rax,%r11
1650 movq %r9,%rax
1651 adcq $0,%rdx
1652
1653
1654
1655 movq %r9,%rcx
1656 shlq $32,%r9
1657 movq %rdx,%r8
1658 mulq %rbp
1659 shrq $32,%rcx
1660 addq %r9,%r10
1661 adcq %rcx,%r11
1662 adcq %rax,%r8
1663 movq %r10,%rax
1664 adcq $0,%rdx
1665
1666
1667
1668 movq %r10,%rcx
1669 shlq $32,%r10
1670 movq %rdx,%r9
1671 mulq %rbp
1672 shrq $32,%rcx
1673 addq %r10,%r11
1674 adcq %rcx,%r8
1675 adcq %rax,%r9
1676 movq %r11,%rax
1677 adcq $0,%rdx
1678
1679
1680
1681 movq %r11,%rcx
1682 shlq $32,%r11
1683 movq %rdx,%r10
1684 mulq %rbp
1685 shrq $32,%rcx
1686 addq %r11,%r8
1687 adcq %rcx,%r9
1688 adcq %rax,%r10
1689 adcq $0,%rdx
1690 xorq %r11,%r11
1691
1692
1693
1694 addq %r8,%r12
1695 adcq %r9,%r13
1696 movq %r12,%r8
1697 adcq %r10,%r14
1698 adcq %rdx,%r15
1699 movq %r13,%r9
1700 adcq $0,%r11
1701
1702 subq $-1,%r12
1703 movq %r14,%r10
1704 sbbq %rsi,%r13
1705 sbbq $0,%r14
1706 movq %r15,%rcx
1707 sbbq %rbp,%r15
1708 sbbq $0,%r11
1709
1710 cmovcq %r8,%r12
1711 cmovcq %r9,%r13
1712 movq %r12,0(%rdi)
1713 cmovcq %r10,%r14
1714 movq %r13,8(%rdi)
1715 cmovcq %rcx,%r15
1716 movq %r14,16(%rdi)
1717 movq %r15,24(%rdi)
1718
1719 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001720.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08001721.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001722.type __ecp_nistz256_mul_montx,@function
1723.align 32
1724__ecp_nistz256_mul_montx:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001725.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001726
1727
1728 mulxq %r9,%r8,%r9
1729 mulxq %r10,%rcx,%r10
1730 movq $32,%r14
1731 xorq %r13,%r13
1732 mulxq %r11,%rbp,%r11
1733 movq .Lpoly+24(%rip),%r15
1734 adcq %rcx,%r9
1735 mulxq %r12,%rcx,%r12
1736 movq %r8,%rdx
1737 adcq %rbp,%r10
1738 shlxq %r14,%r8,%rbp
1739 adcq %rcx,%r11
1740 shrxq %r14,%r8,%rcx
1741 adcq $0,%r12
1742
1743
1744
1745 addq %rbp,%r9
1746 adcq %rcx,%r10
1747
1748 mulxq %r15,%rcx,%rbp
1749 movq 8(%rbx),%rdx
1750 adcq %rcx,%r11
1751 adcq %rbp,%r12
1752 adcq $0,%r13
1753 xorq %r8,%r8
1754
1755
1756
1757 mulxq 0+128(%rsi),%rcx,%rbp
1758 adcxq %rcx,%r9
1759 adoxq %rbp,%r10
1760
1761 mulxq 8+128(%rsi),%rcx,%rbp
1762 adcxq %rcx,%r10
1763 adoxq %rbp,%r11
1764
1765 mulxq 16+128(%rsi),%rcx,%rbp
1766 adcxq %rcx,%r11
1767 adoxq %rbp,%r12
1768
1769 mulxq 24+128(%rsi),%rcx,%rbp
1770 movq %r9,%rdx
1771 adcxq %rcx,%r12
1772 shlxq %r14,%r9,%rcx
1773 adoxq %rbp,%r13
1774 shrxq %r14,%r9,%rbp
1775
1776 adcxq %r8,%r13
1777 adoxq %r8,%r8
1778 adcq $0,%r8
1779
1780
1781
1782 addq %rcx,%r10
1783 adcq %rbp,%r11
1784
1785 mulxq %r15,%rcx,%rbp
1786 movq 16(%rbx),%rdx
1787 adcq %rcx,%r12
1788 adcq %rbp,%r13
1789 adcq $0,%r8
1790 xorq %r9,%r9
1791
1792
1793
1794 mulxq 0+128(%rsi),%rcx,%rbp
1795 adcxq %rcx,%r10
1796 adoxq %rbp,%r11
1797
1798 mulxq 8+128(%rsi),%rcx,%rbp
1799 adcxq %rcx,%r11
1800 adoxq %rbp,%r12
1801
1802 mulxq 16+128(%rsi),%rcx,%rbp
1803 adcxq %rcx,%r12
1804 adoxq %rbp,%r13
1805
1806 mulxq 24+128(%rsi),%rcx,%rbp
1807 movq %r10,%rdx
1808 adcxq %rcx,%r13
1809 shlxq %r14,%r10,%rcx
1810 adoxq %rbp,%r8
1811 shrxq %r14,%r10,%rbp
1812
1813 adcxq %r9,%r8
1814 adoxq %r9,%r9
1815 adcq $0,%r9
1816
1817
1818
1819 addq %rcx,%r11
1820 adcq %rbp,%r12
1821
1822 mulxq %r15,%rcx,%rbp
1823 movq 24(%rbx),%rdx
1824 adcq %rcx,%r13
1825 adcq %rbp,%r8
1826 adcq $0,%r9
1827 xorq %r10,%r10
1828
1829
1830
1831 mulxq 0+128(%rsi),%rcx,%rbp
1832 adcxq %rcx,%r11
1833 adoxq %rbp,%r12
1834
1835 mulxq 8+128(%rsi),%rcx,%rbp
1836 adcxq %rcx,%r12
1837 adoxq %rbp,%r13
1838
1839 mulxq 16+128(%rsi),%rcx,%rbp
1840 adcxq %rcx,%r13
1841 adoxq %rbp,%r8
1842
1843 mulxq 24+128(%rsi),%rcx,%rbp
1844 movq %r11,%rdx
1845 adcxq %rcx,%r8
1846 shlxq %r14,%r11,%rcx
1847 adoxq %rbp,%r9
1848 shrxq %r14,%r11,%rbp
1849
1850 adcxq %r10,%r9
1851 adoxq %r10,%r10
1852 adcq $0,%r10
1853
1854
1855
1856 addq %rcx,%r12
1857 adcq %rbp,%r13
1858
1859 mulxq %r15,%rcx,%rbp
1860 movq %r12,%rbx
1861 movq .Lpoly+8(%rip),%r14
1862 adcq %rcx,%r8
1863 movq %r13,%rdx
1864 adcq %rbp,%r9
1865 adcq $0,%r10
1866
1867
1868
1869 xorl %eax,%eax
1870 movq %r8,%rcx
1871 sbbq $-1,%r12
1872 sbbq %r14,%r13
1873 sbbq $0,%r8
1874 movq %r9,%rbp
1875 sbbq %r15,%r9
1876 sbbq $0,%r10
1877
1878 cmovcq %rbx,%r12
1879 cmovcq %rdx,%r13
1880 movq %r12,0(%rdi)
1881 cmovcq %rcx,%r8
1882 movq %r13,8(%rdi)
1883 cmovcq %rbp,%r9
1884 movq %r8,16(%rdi)
1885 movq %r9,24(%rdi)
1886
1887 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001888.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001889.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
1890
1891.type __ecp_nistz256_sqr_montx,@function
1892.align 32
1893__ecp_nistz256_sqr_montx:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001894.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001895 mulxq %r14,%r9,%r10
1896 mulxq %r15,%rcx,%r11
1897 xorl %eax,%eax
1898 adcq %rcx,%r10
1899 mulxq %r8,%rbp,%r12
1900 movq %r14,%rdx
1901 adcq %rbp,%r11
1902 adcq $0,%r12
1903 xorq %r13,%r13
1904
1905
1906 mulxq %r15,%rcx,%rbp
1907 adcxq %rcx,%r11
1908 adoxq %rbp,%r12
1909
1910 mulxq %r8,%rcx,%rbp
1911 movq %r15,%rdx
1912 adcxq %rcx,%r12
1913 adoxq %rbp,%r13
1914 adcq $0,%r13
1915
1916
1917 mulxq %r8,%rcx,%r14
1918 movq 0+128(%rsi),%rdx
1919 xorq %r15,%r15
1920 adcxq %r9,%r9
1921 adoxq %rcx,%r13
1922 adcxq %r10,%r10
1923 adoxq %r15,%r14
1924
1925 mulxq %rdx,%r8,%rbp
1926 movq 8+128(%rsi),%rdx
1927 adcxq %r11,%r11
1928 adoxq %rbp,%r9
1929 adcxq %r12,%r12
1930 mulxq %rdx,%rcx,%rax
1931 movq 16+128(%rsi),%rdx
1932 adcxq %r13,%r13
1933 adoxq %rcx,%r10
1934 adcxq %r14,%r14
1935.byte 0x67
1936 mulxq %rdx,%rcx,%rbp
1937 movq 24+128(%rsi),%rdx
1938 adoxq %rax,%r11
1939 adcxq %r15,%r15
1940 adoxq %rcx,%r12
1941 movq $32,%rsi
1942 adoxq %rbp,%r13
1943.byte 0x67,0x67
1944 mulxq %rdx,%rcx,%rax
1945 movq .Lpoly+24(%rip),%rdx
1946 adoxq %rcx,%r14
1947 shlxq %rsi,%r8,%rcx
1948 adoxq %rax,%r15
1949 shrxq %rsi,%r8,%rax
1950 movq %rdx,%rbp
1951
1952
1953 addq %rcx,%r9
1954 adcq %rax,%r10
1955
1956 mulxq %r8,%rcx,%r8
1957 adcq %rcx,%r11
1958 shlxq %rsi,%r9,%rcx
1959 adcq $0,%r8
1960 shrxq %rsi,%r9,%rax
1961
1962
1963 addq %rcx,%r10
1964 adcq %rax,%r11
1965
1966 mulxq %r9,%rcx,%r9
1967 adcq %rcx,%r8
1968 shlxq %rsi,%r10,%rcx
1969 adcq $0,%r9
1970 shrxq %rsi,%r10,%rax
1971
1972
1973 addq %rcx,%r11
1974 adcq %rax,%r8
1975
1976 mulxq %r10,%rcx,%r10
1977 adcq %rcx,%r9
1978 shlxq %rsi,%r11,%rcx
1979 adcq $0,%r10
1980 shrxq %rsi,%r11,%rax
1981
1982
1983 addq %rcx,%r8
1984 adcq %rax,%r9
1985
1986 mulxq %r11,%rcx,%r11
1987 adcq %rcx,%r10
1988 adcq $0,%r11
1989
1990 xorq %rdx,%rdx
1991 addq %r8,%r12
1992 movq .Lpoly+8(%rip),%rsi
1993 adcq %r9,%r13
1994 movq %r12,%r8
1995 adcq %r10,%r14
1996 adcq %r11,%r15
1997 movq %r13,%r9
1998 adcq $0,%rdx
1999
2000 subq $-1,%r12
2001 movq %r14,%r10
2002 sbbq %rsi,%r13
2003 sbbq $0,%r14
2004 movq %r15,%r11
2005 sbbq %rbp,%r15
2006 sbbq $0,%rdx
2007
2008 cmovcq %r8,%r12
2009 cmovcq %r9,%r13
2010 movq %r12,0(%rdi)
2011 cmovcq %r10,%r14
2012 movq %r13,8(%rdi)
2013 cmovcq %r11,%r15
2014 movq %r14,16(%rdi)
2015 movq %r15,24(%rdi)
2016
2017 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002018.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002019.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
Adam Langleyfad63272015-11-12 12:15:39 -08002020
2021
Adam Langleyfad63272015-11-12 12:15:39 -08002022.globl ecp_nistz256_select_w5
2023.hidden ecp_nistz256_select_w5
2024.type ecp_nistz256_select_w5,@function
2025.align 32
2026ecp_nistz256_select_w5:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002027.cfi_startproc
Robert Sloan8f860b12017-08-28 07:37:06 -07002028 leaq OPENSSL_ia32cap_P(%rip),%rax
2029 movq 8(%rax),%rax
2030 testl $32,%eax
2031 jnz .Lavx2_select_w5
Adam Langleyfad63272015-11-12 12:15:39 -08002032 movdqa .LOne(%rip),%xmm0
2033 movd %edx,%xmm1
2034
2035 pxor %xmm2,%xmm2
2036 pxor %xmm3,%xmm3
2037 pxor %xmm4,%xmm4
2038 pxor %xmm5,%xmm5
2039 pxor %xmm6,%xmm6
2040 pxor %xmm7,%xmm7
2041
2042 movdqa %xmm0,%xmm8
2043 pshufd $0,%xmm1,%xmm1
2044
2045 movq $16,%rax
2046.Lselect_loop_sse_w5:
2047
2048 movdqa %xmm8,%xmm15
2049 paddd %xmm0,%xmm8
2050 pcmpeqd %xmm1,%xmm15
2051
2052 movdqa 0(%rsi),%xmm9
2053 movdqa 16(%rsi),%xmm10
2054 movdqa 32(%rsi),%xmm11
2055 movdqa 48(%rsi),%xmm12
2056 movdqa 64(%rsi),%xmm13
2057 movdqa 80(%rsi),%xmm14
2058 leaq 96(%rsi),%rsi
2059
2060 pand %xmm15,%xmm9
2061 pand %xmm15,%xmm10
2062 por %xmm9,%xmm2
2063 pand %xmm15,%xmm11
2064 por %xmm10,%xmm3
2065 pand %xmm15,%xmm12
2066 por %xmm11,%xmm4
2067 pand %xmm15,%xmm13
2068 por %xmm12,%xmm5
2069 pand %xmm15,%xmm14
2070 por %xmm13,%xmm6
2071 por %xmm14,%xmm7
2072
2073 decq %rax
2074 jnz .Lselect_loop_sse_w5
2075
2076 movdqu %xmm2,0(%rdi)
2077 movdqu %xmm3,16(%rdi)
2078 movdqu %xmm4,32(%rdi)
2079 movdqu %xmm5,48(%rdi)
2080 movdqu %xmm6,64(%rdi)
2081 movdqu %xmm7,80(%rdi)
2082 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002083.cfi_endproc
Robert Sloanab8b8882018-03-26 11:39:51 -07002084.LSEH_end_ecp_nistz256_select_w5:
Adam Langleyfad63272015-11-12 12:15:39 -08002085.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
2086
2087
2088
2089.globl ecp_nistz256_select_w7
2090.hidden ecp_nistz256_select_w7
2091.type ecp_nistz256_select_w7,@function
2092.align 32
2093ecp_nistz256_select_w7:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002094.cfi_startproc
Robert Sloan8f860b12017-08-28 07:37:06 -07002095 leaq OPENSSL_ia32cap_P(%rip),%rax
2096 movq 8(%rax),%rax
2097 testl $32,%eax
2098 jnz .Lavx2_select_w7
Adam Langleyfad63272015-11-12 12:15:39 -08002099 movdqa .LOne(%rip),%xmm8
2100 movd %edx,%xmm1
2101
2102 pxor %xmm2,%xmm2
2103 pxor %xmm3,%xmm3
2104 pxor %xmm4,%xmm4
2105 pxor %xmm5,%xmm5
2106
2107 movdqa %xmm8,%xmm0
2108 pshufd $0,%xmm1,%xmm1
2109 movq $64,%rax
2110
2111.Lselect_loop_sse_w7:
2112 movdqa %xmm8,%xmm15
2113 paddd %xmm0,%xmm8
2114 movdqa 0(%rsi),%xmm9
2115 movdqa 16(%rsi),%xmm10
2116 pcmpeqd %xmm1,%xmm15
2117 movdqa 32(%rsi),%xmm11
2118 movdqa 48(%rsi),%xmm12
2119 leaq 64(%rsi),%rsi
2120
2121 pand %xmm15,%xmm9
2122 pand %xmm15,%xmm10
2123 por %xmm9,%xmm2
2124 pand %xmm15,%xmm11
2125 por %xmm10,%xmm3
2126 pand %xmm15,%xmm12
2127 por %xmm11,%xmm4
2128 prefetcht0 255(%rsi)
2129 por %xmm12,%xmm5
2130
2131 decq %rax
2132 jnz .Lselect_loop_sse_w7
2133
2134 movdqu %xmm2,0(%rdi)
2135 movdqu %xmm3,16(%rdi)
2136 movdqu %xmm4,32(%rdi)
2137 movdqu %xmm5,48(%rdi)
2138 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002139.cfi_endproc
Robert Sloanab8b8882018-03-26 11:39:51 -07002140.LSEH_end_ecp_nistz256_select_w7:
Adam Langleyfad63272015-11-12 12:15:39 -08002141.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
Robert Sloan8f860b12017-08-28 07:37:06 -07002142
2143
2144.type ecp_nistz256_avx2_select_w5,@function
2145.align 32
2146ecp_nistz256_avx2_select_w5:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002147.cfi_startproc
Robert Sloan8f860b12017-08-28 07:37:06 -07002148.Lavx2_select_w5:
2149 vzeroupper
2150 vmovdqa .LTwo(%rip),%ymm0
2151
2152 vpxor %ymm2,%ymm2,%ymm2
2153 vpxor %ymm3,%ymm3,%ymm3
2154 vpxor %ymm4,%ymm4,%ymm4
2155
2156 vmovdqa .LOne(%rip),%ymm5
2157 vmovdqa .LTwo(%rip),%ymm10
2158
2159 vmovd %edx,%xmm1
2160 vpermd %ymm1,%ymm2,%ymm1
2161
2162 movq $8,%rax
2163.Lselect_loop_avx2_w5:
2164
2165 vmovdqa 0(%rsi),%ymm6
2166 vmovdqa 32(%rsi),%ymm7
2167 vmovdqa 64(%rsi),%ymm8
2168
2169 vmovdqa 96(%rsi),%ymm11
2170 vmovdqa 128(%rsi),%ymm12
2171 vmovdqa 160(%rsi),%ymm13
2172
2173 vpcmpeqd %ymm1,%ymm5,%ymm9
2174 vpcmpeqd %ymm1,%ymm10,%ymm14
2175
2176 vpaddd %ymm0,%ymm5,%ymm5
2177 vpaddd %ymm0,%ymm10,%ymm10
2178 leaq 192(%rsi),%rsi
2179
2180 vpand %ymm9,%ymm6,%ymm6
2181 vpand %ymm9,%ymm7,%ymm7
2182 vpand %ymm9,%ymm8,%ymm8
2183 vpand %ymm14,%ymm11,%ymm11
2184 vpand %ymm14,%ymm12,%ymm12
2185 vpand %ymm14,%ymm13,%ymm13
2186
2187 vpxor %ymm6,%ymm2,%ymm2
2188 vpxor %ymm7,%ymm3,%ymm3
2189 vpxor %ymm8,%ymm4,%ymm4
2190 vpxor %ymm11,%ymm2,%ymm2
2191 vpxor %ymm12,%ymm3,%ymm3
2192 vpxor %ymm13,%ymm4,%ymm4
2193
2194 decq %rax
2195 jnz .Lselect_loop_avx2_w5
2196
2197 vmovdqu %ymm2,0(%rdi)
2198 vmovdqu %ymm3,32(%rdi)
2199 vmovdqu %ymm4,64(%rdi)
2200 vzeroupper
2201 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002202.cfi_endproc
Robert Sloanab8b8882018-03-26 11:39:51 -07002203.LSEH_end_ecp_nistz256_avx2_select_w5:
Robert Sloan8f860b12017-08-28 07:37:06 -07002204.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
2205
2206
2207
Adam Langleyfad63272015-11-12 12:15:39 -08002208.globl ecp_nistz256_avx2_select_w7
2209.hidden ecp_nistz256_avx2_select_w7
2210.type ecp_nistz256_avx2_select_w7,@function
2211.align 32
2212ecp_nistz256_avx2_select_w7:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002213.cfi_startproc
Robert Sloan8f860b12017-08-28 07:37:06 -07002214.Lavx2_select_w7:
2215 vzeroupper
2216 vmovdqa .LThree(%rip),%ymm0
2217
2218 vpxor %ymm2,%ymm2,%ymm2
2219 vpxor %ymm3,%ymm3,%ymm3
2220
2221 vmovdqa .LOne(%rip),%ymm4
2222 vmovdqa .LTwo(%rip),%ymm8
2223 vmovdqa .LThree(%rip),%ymm12
2224
2225 vmovd %edx,%xmm1
2226 vpermd %ymm1,%ymm2,%ymm1
2227
2228
2229 movq $21,%rax
2230.Lselect_loop_avx2_w7:
2231
2232 vmovdqa 0(%rsi),%ymm5
2233 vmovdqa 32(%rsi),%ymm6
2234
2235 vmovdqa 64(%rsi),%ymm9
2236 vmovdqa 96(%rsi),%ymm10
2237
2238 vmovdqa 128(%rsi),%ymm13
2239 vmovdqa 160(%rsi),%ymm14
2240
2241 vpcmpeqd %ymm1,%ymm4,%ymm7
2242 vpcmpeqd %ymm1,%ymm8,%ymm11
2243 vpcmpeqd %ymm1,%ymm12,%ymm15
2244
2245 vpaddd %ymm0,%ymm4,%ymm4
2246 vpaddd %ymm0,%ymm8,%ymm8
2247 vpaddd %ymm0,%ymm12,%ymm12
2248 leaq 192(%rsi),%rsi
2249
2250 vpand %ymm7,%ymm5,%ymm5
2251 vpand %ymm7,%ymm6,%ymm6
2252 vpand %ymm11,%ymm9,%ymm9
2253 vpand %ymm11,%ymm10,%ymm10
2254 vpand %ymm15,%ymm13,%ymm13
2255 vpand %ymm15,%ymm14,%ymm14
2256
2257 vpxor %ymm5,%ymm2,%ymm2
2258 vpxor %ymm6,%ymm3,%ymm3
2259 vpxor %ymm9,%ymm2,%ymm2
2260 vpxor %ymm10,%ymm3,%ymm3
2261 vpxor %ymm13,%ymm2,%ymm2
2262 vpxor %ymm14,%ymm3,%ymm3
2263
2264 decq %rax
2265 jnz .Lselect_loop_avx2_w7
2266
2267
2268 vmovdqa 0(%rsi),%ymm5
2269 vmovdqa 32(%rsi),%ymm6
2270
2271 vpcmpeqd %ymm1,%ymm4,%ymm7
2272
2273 vpand %ymm7,%ymm5,%ymm5
2274 vpand %ymm7,%ymm6,%ymm6
2275
2276 vpxor %ymm5,%ymm2,%ymm2
2277 vpxor %ymm6,%ymm3,%ymm3
2278
2279 vmovdqu %ymm2,0(%rdi)
2280 vmovdqu %ymm3,32(%rdi)
2281 vzeroupper
Adam Langleyfad63272015-11-12 12:15:39 -08002282 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002283.cfi_endproc
Robert Sloanab8b8882018-03-26 11:39:51 -07002284.LSEH_end_ecp_nistz256_avx2_select_w7:
Adam Langleyfad63272015-11-12 12:15:39 -08002285.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
2286.type __ecp_nistz256_add_toq,@function
2287.align 32
2288__ecp_nistz256_add_toq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002289.cfi_startproc
Steven Valdez909b19f2016-11-21 15:35:44 -05002290 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002291 addq 0(%rbx),%r12
2292 adcq 8(%rbx),%r13
2293 movq %r12,%rax
2294 adcq 16(%rbx),%r8
2295 adcq 24(%rbx),%r9
2296 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05002297 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002298
2299 subq $-1,%r12
2300 movq %r8,%rcx
2301 sbbq %r14,%r13
2302 sbbq $0,%r8
2303 movq %r9,%r10
2304 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -05002305 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002306
Steven Valdez909b19f2016-11-21 15:35:44 -05002307 cmovcq %rax,%r12
2308 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -08002309 movq %r12,0(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -05002310 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -08002311 movq %r13,8(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -05002312 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -08002313 movq %r8,16(%rdi)
2314 movq %r9,24(%rdi)
2315
2316 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002317.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08002318.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
2319
2320.type __ecp_nistz256_sub_fromq,@function
2321.align 32
2322__ecp_nistz256_sub_fromq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002323.cfi_startproc
Adam Langleyfad63272015-11-12 12:15:39 -08002324 subq 0(%rbx),%r12
2325 sbbq 8(%rbx),%r13
2326 movq %r12,%rax
2327 sbbq 16(%rbx),%r8
2328 sbbq 24(%rbx),%r9
2329 movq %r13,%rbp
2330 sbbq %r11,%r11
2331
2332 addq $-1,%r12
2333 movq %r8,%rcx
2334 adcq %r14,%r13
2335 adcq $0,%r8
2336 movq %r9,%r10
2337 adcq %r15,%r9
2338 testq %r11,%r11
2339
2340 cmovzq %rax,%r12
2341 cmovzq %rbp,%r13
2342 movq %r12,0(%rdi)
2343 cmovzq %rcx,%r8
2344 movq %r13,8(%rdi)
2345 cmovzq %r10,%r9
2346 movq %r8,16(%rdi)
2347 movq %r9,24(%rdi)
2348
2349 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002350.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08002351.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
2352
2353.type __ecp_nistz256_subq,@function
2354.align 32
2355__ecp_nistz256_subq:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002356.cfi_startproc
Adam Langleyfad63272015-11-12 12:15:39 -08002357 subq %r12,%rax
2358 sbbq %r13,%rbp
2359 movq %rax,%r12
2360 sbbq %r8,%rcx
2361 sbbq %r9,%r10
2362 movq %rbp,%r13
2363 sbbq %r11,%r11
2364
2365 addq $-1,%rax
2366 movq %rcx,%r8
2367 adcq %r14,%rbp
2368 adcq $0,%rcx
2369 movq %r10,%r9
2370 adcq %r15,%r10
2371 testq %r11,%r11
2372
2373 cmovnzq %rax,%r12
2374 cmovnzq %rbp,%r13
2375 cmovnzq %rcx,%r8
2376 cmovnzq %r10,%r9
2377
2378 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002379.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08002380.size __ecp_nistz256_subq,.-__ecp_nistz256_subq
2381
2382.type __ecp_nistz256_mul_by_2q,@function
2383.align 32
2384__ecp_nistz256_mul_by_2q:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002385.cfi_startproc
Steven Valdez909b19f2016-11-21 15:35:44 -05002386 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002387 addq %r12,%r12
2388 adcq %r13,%r13
2389 movq %r12,%rax
2390 adcq %r8,%r8
2391 adcq %r9,%r9
2392 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05002393 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002394
2395 subq $-1,%r12
2396 movq %r8,%rcx
2397 sbbq %r14,%r13
2398 sbbq $0,%r8
2399 movq %r9,%r10
2400 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -05002401 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002402
Steven Valdez909b19f2016-11-21 15:35:44 -05002403 cmovcq %rax,%r12
2404 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -08002405 movq %r12,0(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -05002406 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -08002407 movq %r13,8(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -05002408 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -08002409 movq %r8,16(%rdi)
2410 movq %r9,24(%rdi)
2411
2412 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002413.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08002414.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
2415.globl ecp_nistz256_point_double
2416.hidden ecp_nistz256_point_double
2417.type ecp_nistz256_point_double,@function
2418.align 32
2419ecp_nistz256_point_double:
Robert Sloanab8b8882018-03-26 11:39:51 -07002420.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002421 leaq OPENSSL_ia32cap_P(%rip),%rcx
2422 movq 8(%rcx),%rcx
2423 andl $0x80100,%ecx
2424 cmpl $0x80100,%ecx
2425 je .Lpoint_doublex
Adam Langleyfad63272015-11-12 12:15:39 -08002426 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07002427.cfi_adjust_cfa_offset 8
2428.cfi_offset %rbp,-16
Adam Langleyfad63272015-11-12 12:15:39 -08002429 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07002430.cfi_adjust_cfa_offset 8
2431.cfi_offset %rbx,-24
Adam Langleyfad63272015-11-12 12:15:39 -08002432 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -07002433.cfi_adjust_cfa_offset 8
2434.cfi_offset %r12,-32
Adam Langleyfad63272015-11-12 12:15:39 -08002435 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -07002436.cfi_adjust_cfa_offset 8
2437.cfi_offset %r13,-40
Adam Langleyfad63272015-11-12 12:15:39 -08002438 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -07002439.cfi_adjust_cfa_offset 8
2440.cfi_offset %r14,-48
Adam Langleyfad63272015-11-12 12:15:39 -08002441 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -07002442.cfi_adjust_cfa_offset 8
2443.cfi_offset %r15,-56
Adam Langleyfad63272015-11-12 12:15:39 -08002444 subq $160+8,%rsp
Robert Sloanab8b8882018-03-26 11:39:51 -07002445.cfi_adjust_cfa_offset 32*5+8
2446.Lpoint_doubleq_body:
Adam Langleyfad63272015-11-12 12:15:39 -08002447
David Benjamin4969cc92016-04-22 15:02:23 -04002448.Lpoint_double_shortcutq:
Adam Langleyfad63272015-11-12 12:15:39 -08002449 movdqu 0(%rsi),%xmm0
2450 movq %rsi,%rbx
2451 movdqu 16(%rsi),%xmm1
2452 movq 32+0(%rsi),%r12
2453 movq 32+8(%rsi),%r13
2454 movq 32+16(%rsi),%r8
2455 movq 32+24(%rsi),%r9
2456 movq .Lpoly+8(%rip),%r14
2457 movq .Lpoly+24(%rip),%r15
2458 movdqa %xmm0,96(%rsp)
2459 movdqa %xmm1,96+16(%rsp)
2460 leaq 32(%rdi),%r10
2461 leaq 64(%rdi),%r11
2462.byte 102,72,15,110,199
2463.byte 102,73,15,110,202
2464.byte 102,73,15,110,211
2465
2466 leaq 0(%rsp),%rdi
2467 call __ecp_nistz256_mul_by_2q
2468
2469 movq 64+0(%rsi),%rax
2470 movq 64+8(%rsi),%r14
2471 movq 64+16(%rsi),%r15
2472 movq 64+24(%rsi),%r8
2473 leaq 64-0(%rsi),%rsi
2474 leaq 64(%rsp),%rdi
2475 call __ecp_nistz256_sqr_montq
2476
2477 movq 0+0(%rsp),%rax
2478 movq 8+0(%rsp),%r14
2479 leaq 0+0(%rsp),%rsi
2480 movq 16+0(%rsp),%r15
2481 movq 24+0(%rsp),%r8
2482 leaq 0(%rsp),%rdi
2483 call __ecp_nistz256_sqr_montq
2484
2485 movq 32(%rbx),%rax
2486 movq 64+0(%rbx),%r9
2487 movq 64+8(%rbx),%r10
2488 movq 64+16(%rbx),%r11
2489 movq 64+24(%rbx),%r12
2490 leaq 64-0(%rbx),%rsi
2491 leaq 32(%rbx),%rbx
2492.byte 102,72,15,126,215
2493 call __ecp_nistz256_mul_montq
2494 call __ecp_nistz256_mul_by_2q
2495
2496 movq 96+0(%rsp),%r12
2497 movq 96+8(%rsp),%r13
2498 leaq 64(%rsp),%rbx
2499 movq 96+16(%rsp),%r8
2500 movq 96+24(%rsp),%r9
2501 leaq 32(%rsp),%rdi
2502 call __ecp_nistz256_add_toq
2503
2504 movq 96+0(%rsp),%r12
2505 movq 96+8(%rsp),%r13
2506 leaq 64(%rsp),%rbx
2507 movq 96+16(%rsp),%r8
2508 movq 96+24(%rsp),%r9
2509 leaq 64(%rsp),%rdi
2510 call __ecp_nistz256_sub_fromq
2511
2512 movq 0+0(%rsp),%rax
2513 movq 8+0(%rsp),%r14
2514 leaq 0+0(%rsp),%rsi
2515 movq 16+0(%rsp),%r15
2516 movq 24+0(%rsp),%r8
2517.byte 102,72,15,126,207
2518 call __ecp_nistz256_sqr_montq
2519 xorq %r9,%r9
2520 movq %r12,%rax
2521 addq $-1,%r12
2522 movq %r13,%r10
2523 adcq %rsi,%r13
2524 movq %r14,%rcx
2525 adcq $0,%r14
2526 movq %r15,%r8
2527 adcq %rbp,%r15
2528 adcq $0,%r9
2529 xorq %rsi,%rsi
2530 testq $1,%rax
2531
2532 cmovzq %rax,%r12
2533 cmovzq %r10,%r13
2534 cmovzq %rcx,%r14
2535 cmovzq %r8,%r15
2536 cmovzq %rsi,%r9
2537
2538 movq %r13,%rax
2539 shrq $1,%r12
2540 shlq $63,%rax
2541 movq %r14,%r10
2542 shrq $1,%r13
2543 orq %rax,%r12
2544 shlq $63,%r10
2545 movq %r15,%rcx
2546 shrq $1,%r14
2547 orq %r10,%r13
2548 shlq $63,%rcx
2549 movq %r12,0(%rdi)
2550 shrq $1,%r15
2551 movq %r13,8(%rdi)
2552 shlq $63,%r9
2553 orq %rcx,%r14
2554 orq %r9,%r15
2555 movq %r14,16(%rdi)
2556 movq %r15,24(%rdi)
2557 movq 64(%rsp),%rax
2558 leaq 64(%rsp),%rbx
2559 movq 0+32(%rsp),%r9
2560 movq 8+32(%rsp),%r10
2561 leaq 0+32(%rsp),%rsi
2562 movq 16+32(%rsp),%r11
2563 movq 24+32(%rsp),%r12
2564 leaq 32(%rsp),%rdi
2565 call __ecp_nistz256_mul_montq
2566
2567 leaq 128(%rsp),%rdi
2568 call __ecp_nistz256_mul_by_2q
2569
2570 leaq 32(%rsp),%rbx
2571 leaq 32(%rsp),%rdi
2572 call __ecp_nistz256_add_toq
2573
2574 movq 96(%rsp),%rax
2575 leaq 96(%rsp),%rbx
2576 movq 0+0(%rsp),%r9
2577 movq 8+0(%rsp),%r10
2578 leaq 0+0(%rsp),%rsi
2579 movq 16+0(%rsp),%r11
2580 movq 24+0(%rsp),%r12
2581 leaq 0(%rsp),%rdi
2582 call __ecp_nistz256_mul_montq
2583
2584 leaq 128(%rsp),%rdi
2585 call __ecp_nistz256_mul_by_2q
2586
2587 movq 0+32(%rsp),%rax
2588 movq 8+32(%rsp),%r14
2589 leaq 0+32(%rsp),%rsi
2590 movq 16+32(%rsp),%r15
2591 movq 24+32(%rsp),%r8
2592.byte 102,72,15,126,199
2593 call __ecp_nistz256_sqr_montq
2594
2595 leaq 128(%rsp),%rbx
2596 movq %r14,%r8
2597 movq %r15,%r9
2598 movq %rsi,%r14
2599 movq %rbp,%r15
2600 call __ecp_nistz256_sub_fromq
2601
2602 movq 0+0(%rsp),%rax
2603 movq 0+8(%rsp),%rbp
2604 movq 0+16(%rsp),%rcx
2605 movq 0+24(%rsp),%r10
2606 leaq 0(%rsp),%rdi
2607 call __ecp_nistz256_subq
2608
2609 movq 32(%rsp),%rax
2610 leaq 32(%rsp),%rbx
2611 movq %r12,%r14
2612 xorl %ecx,%ecx
2613 movq %r12,0+0(%rsp)
2614 movq %r13,%r10
2615 movq %r13,0+8(%rsp)
2616 cmovzq %r8,%r11
2617 movq %r8,0+16(%rsp)
2618 leaq 0-0(%rsp),%rsi
2619 cmovzq %r9,%r12
2620 movq %r9,0+24(%rsp)
2621 movq %r14,%r9
2622 leaq 0(%rsp),%rdi
2623 call __ecp_nistz256_mul_montq
2624
2625.byte 102,72,15,126,203
2626.byte 102,72,15,126,207
2627 call __ecp_nistz256_sub_fromq
2628
Robert Sloanab8b8882018-03-26 11:39:51 -07002629 leaq 160+56(%rsp),%rsi
2630.cfi_def_cfa %rsi,8
2631 movq -48(%rsi),%r15
2632.cfi_restore %r15
2633 movq -40(%rsi),%r14
2634.cfi_restore %r14
2635 movq -32(%rsi),%r13
2636.cfi_restore %r13
2637 movq -24(%rsi),%r12
2638.cfi_restore %r12
2639 movq -16(%rsi),%rbx
2640.cfi_restore %rbx
2641 movq -8(%rsi),%rbp
2642.cfi_restore %rbp
2643 leaq (%rsi),%rsp
2644.cfi_def_cfa_register %rsp
2645.Lpoint_doubleq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08002646 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07002647.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08002648.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
2649.globl ecp_nistz256_point_add
2650.hidden ecp_nistz256_point_add
2651.type ecp_nistz256_point_add,@function
2652.align 32
2653ecp_nistz256_point_add:
Robert Sloanab8b8882018-03-26 11:39:51 -07002654.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002655 leaq OPENSSL_ia32cap_P(%rip),%rcx
2656 movq 8(%rcx),%rcx
2657 andl $0x80100,%ecx
2658 cmpl $0x80100,%ecx
2659 je .Lpoint_addx
Adam Langleyfad63272015-11-12 12:15:39 -08002660 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07002661.cfi_adjust_cfa_offset 8
2662.cfi_offset %rbp,-16
Adam Langleyfad63272015-11-12 12:15:39 -08002663 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07002664.cfi_adjust_cfa_offset 8
2665.cfi_offset %rbx,-24
Adam Langleyfad63272015-11-12 12:15:39 -08002666 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -07002667.cfi_adjust_cfa_offset 8
2668.cfi_offset %r12,-32
Adam Langleyfad63272015-11-12 12:15:39 -08002669 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -07002670.cfi_adjust_cfa_offset 8
2671.cfi_offset %r13,-40
Adam Langleyfad63272015-11-12 12:15:39 -08002672 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -07002673.cfi_adjust_cfa_offset 8
2674.cfi_offset %r14,-48
Adam Langleyfad63272015-11-12 12:15:39 -08002675 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -07002676.cfi_adjust_cfa_offset 8
2677.cfi_offset %r15,-56
Adam Langleyfad63272015-11-12 12:15:39 -08002678 subq $576+8,%rsp
Robert Sloanab8b8882018-03-26 11:39:51 -07002679.cfi_adjust_cfa_offset 32*18+8
2680.Lpoint_addq_body:
Adam Langleyfad63272015-11-12 12:15:39 -08002681
2682 movdqu 0(%rsi),%xmm0
2683 movdqu 16(%rsi),%xmm1
2684 movdqu 32(%rsi),%xmm2
2685 movdqu 48(%rsi),%xmm3
2686 movdqu 64(%rsi),%xmm4
2687 movdqu 80(%rsi),%xmm5
2688 movq %rsi,%rbx
2689 movq %rdx,%rsi
2690 movdqa %xmm0,384(%rsp)
2691 movdqa %xmm1,384+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08002692 movdqa %xmm2,416(%rsp)
2693 movdqa %xmm3,416+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08002694 movdqa %xmm4,448(%rsp)
2695 movdqa %xmm5,448+16(%rsp)
Steven Valdez909b19f2016-11-21 15:35:44 -05002696 por %xmm4,%xmm5
Adam Langleyfad63272015-11-12 12:15:39 -08002697
2698 movdqu 0(%rsi),%xmm0
Steven Valdez909b19f2016-11-21 15:35:44 -05002699 pshufd $0xb1,%xmm5,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08002700 movdqu 16(%rsi),%xmm1
2701 movdqu 32(%rsi),%xmm2
2702 por %xmm3,%xmm5
2703 movdqu 48(%rsi),%xmm3
2704 movq 64+0(%rsi),%rax
2705 movq 64+8(%rsi),%r14
2706 movq 64+16(%rsi),%r15
2707 movq 64+24(%rsi),%r8
2708 movdqa %xmm0,480(%rsp)
David Benjamin4969cc92016-04-22 15:02:23 -04002709 pshufd $0x1e,%xmm5,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08002710 movdqa %xmm1,480+16(%rsp)
Steven Valdez909b19f2016-11-21 15:35:44 -05002711 movdqu 64(%rsi),%xmm0
2712 movdqu 80(%rsi),%xmm1
Adam Langleyfad63272015-11-12 12:15:39 -08002713 movdqa %xmm2,512(%rsp)
2714 movdqa %xmm3,512+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08002715 por %xmm4,%xmm5
2716 pxor %xmm4,%xmm4
Steven Valdez909b19f2016-11-21 15:35:44 -05002717 por %xmm0,%xmm1
2718.byte 102,72,15,110,199
Adam Langleyfad63272015-11-12 12:15:39 -08002719
2720 leaq 64-0(%rsi),%rsi
2721 movq %rax,544+0(%rsp)
2722 movq %r14,544+8(%rsp)
2723 movq %r15,544+16(%rsp)
2724 movq %r8,544+24(%rsp)
2725 leaq 96(%rsp),%rdi
2726 call __ecp_nistz256_sqr_montq
2727
2728 pcmpeqd %xmm4,%xmm5
Steven Valdez909b19f2016-11-21 15:35:44 -05002729 pshufd $0xb1,%xmm1,%xmm4
2730 por %xmm1,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08002731 pshufd $0,%xmm5,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -04002732 pshufd $0x1e,%xmm4,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08002733 por %xmm3,%xmm4
2734 pxor %xmm3,%xmm3
2735 pcmpeqd %xmm3,%xmm4
2736 pshufd $0,%xmm4,%xmm4
2737 movq 64+0(%rbx),%rax
2738 movq 64+8(%rbx),%r14
2739 movq 64+16(%rbx),%r15
2740 movq 64+24(%rbx),%r8
David Benjamin4969cc92016-04-22 15:02:23 -04002741.byte 102,72,15,110,203
Adam Langleyfad63272015-11-12 12:15:39 -08002742
2743 leaq 64-0(%rbx),%rsi
2744 leaq 32(%rsp),%rdi
2745 call __ecp_nistz256_sqr_montq
2746
2747 movq 544(%rsp),%rax
2748 leaq 544(%rsp),%rbx
2749 movq 0+96(%rsp),%r9
2750 movq 8+96(%rsp),%r10
2751 leaq 0+96(%rsp),%rsi
2752 movq 16+96(%rsp),%r11
2753 movq 24+96(%rsp),%r12
2754 leaq 224(%rsp),%rdi
2755 call __ecp_nistz256_mul_montq
2756
2757 movq 448(%rsp),%rax
2758 leaq 448(%rsp),%rbx
2759 movq 0+32(%rsp),%r9
2760 movq 8+32(%rsp),%r10
2761 leaq 0+32(%rsp),%rsi
2762 movq 16+32(%rsp),%r11
2763 movq 24+32(%rsp),%r12
2764 leaq 256(%rsp),%rdi
2765 call __ecp_nistz256_mul_montq
2766
2767 movq 416(%rsp),%rax
2768 leaq 416(%rsp),%rbx
2769 movq 0+224(%rsp),%r9
2770 movq 8+224(%rsp),%r10
2771 leaq 0+224(%rsp),%rsi
2772 movq 16+224(%rsp),%r11
2773 movq 24+224(%rsp),%r12
2774 leaq 224(%rsp),%rdi
2775 call __ecp_nistz256_mul_montq
2776
2777 movq 512(%rsp),%rax
2778 leaq 512(%rsp),%rbx
2779 movq 0+256(%rsp),%r9
2780 movq 8+256(%rsp),%r10
2781 leaq 0+256(%rsp),%rsi
2782 movq 16+256(%rsp),%r11
2783 movq 24+256(%rsp),%r12
2784 leaq 256(%rsp),%rdi
2785 call __ecp_nistz256_mul_montq
2786
2787 leaq 224(%rsp),%rbx
2788 leaq 64(%rsp),%rdi
2789 call __ecp_nistz256_sub_fromq
2790
2791 orq %r13,%r12
2792 movdqa %xmm4,%xmm2
2793 orq %r8,%r12
2794 orq %r9,%r12
2795 por %xmm5,%xmm2
2796.byte 102,73,15,110,220
2797
2798 movq 384(%rsp),%rax
2799 leaq 384(%rsp),%rbx
2800 movq 0+96(%rsp),%r9
2801 movq 8+96(%rsp),%r10
2802 leaq 0+96(%rsp),%rsi
2803 movq 16+96(%rsp),%r11
2804 movq 24+96(%rsp),%r12
2805 leaq 160(%rsp),%rdi
2806 call __ecp_nistz256_mul_montq
2807
2808 movq 480(%rsp),%rax
2809 leaq 480(%rsp),%rbx
2810 movq 0+32(%rsp),%r9
2811 movq 8+32(%rsp),%r10
2812 leaq 0+32(%rsp),%rsi
2813 movq 16+32(%rsp),%r11
2814 movq 24+32(%rsp),%r12
2815 leaq 192(%rsp),%rdi
2816 call __ecp_nistz256_mul_montq
2817
2818 leaq 160(%rsp),%rbx
2819 leaq 0(%rsp),%rdi
2820 call __ecp_nistz256_sub_fromq
2821
2822 orq %r13,%r12
2823 orq %r8,%r12
2824 orq %r9,%r12
2825
Pete Bentleya5c947b2019-08-09 14:24:27 +00002826.byte 0x3e
Pete Bentley228bd622019-08-08 14:53:19 +00002827 jnz .Ladd_proceedq
Srinivas Paladugudd42a612019-08-09 19:30:39 +00002828.byte 102,73,15,126,208
2829.byte 102,73,15,126,217
2830 testq %r8,%r8
2831 jnz .Ladd_proceedq
Adam Langleyfad63272015-11-12 12:15:39 -08002832 testq %r9,%r9
David Benjamin4969cc92016-04-22 15:02:23 -04002833 jz .Ladd_doubleq
Adam Langleyfad63272015-11-12 12:15:39 -08002834
2835.byte 102,72,15,126,199
2836 pxor %xmm0,%xmm0
2837 movdqu %xmm0,0(%rdi)
2838 movdqu %xmm0,16(%rdi)
2839 movdqu %xmm0,32(%rdi)
2840 movdqu %xmm0,48(%rdi)
2841 movdqu %xmm0,64(%rdi)
2842 movdqu %xmm0,80(%rdi)
2843 jmp .Ladd_doneq
2844
2845.align 32
David Benjamin4969cc92016-04-22 15:02:23 -04002846.Ladd_doubleq:
2847.byte 102,72,15,126,206
2848.byte 102,72,15,126,199
2849 addq $416,%rsp
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002850.cfi_adjust_cfa_offset -416
David Benjamin4969cc92016-04-22 15:02:23 -04002851 jmp .Lpoint_double_shortcutq
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002852.cfi_adjust_cfa_offset 416
David Benjamin4969cc92016-04-22 15:02:23 -04002853
2854.align 32
Adam Langleyfad63272015-11-12 12:15:39 -08002855.Ladd_proceedq:
2856 movq 0+64(%rsp),%rax
2857 movq 8+64(%rsp),%r14
2858 leaq 0+64(%rsp),%rsi
2859 movq 16+64(%rsp),%r15
2860 movq 24+64(%rsp),%r8
2861 leaq 96(%rsp),%rdi
2862 call __ecp_nistz256_sqr_montq
2863
2864 movq 448(%rsp),%rax
2865 leaq 448(%rsp),%rbx
2866 movq 0+0(%rsp),%r9
2867 movq 8+0(%rsp),%r10
2868 leaq 0+0(%rsp),%rsi
2869 movq 16+0(%rsp),%r11
2870 movq 24+0(%rsp),%r12
2871 leaq 352(%rsp),%rdi
2872 call __ecp_nistz256_mul_montq
2873
2874 movq 0+0(%rsp),%rax
2875 movq 8+0(%rsp),%r14
2876 leaq 0+0(%rsp),%rsi
2877 movq 16+0(%rsp),%r15
2878 movq 24+0(%rsp),%r8
2879 leaq 32(%rsp),%rdi
2880 call __ecp_nistz256_sqr_montq
2881
2882 movq 544(%rsp),%rax
2883 leaq 544(%rsp),%rbx
2884 movq 0+352(%rsp),%r9
2885 movq 8+352(%rsp),%r10
2886 leaq 0+352(%rsp),%rsi
2887 movq 16+352(%rsp),%r11
2888 movq 24+352(%rsp),%r12
2889 leaq 352(%rsp),%rdi
2890 call __ecp_nistz256_mul_montq
2891
2892 movq 0(%rsp),%rax
2893 leaq 0(%rsp),%rbx
2894 movq 0+32(%rsp),%r9
2895 movq 8+32(%rsp),%r10
2896 leaq 0+32(%rsp),%rsi
2897 movq 16+32(%rsp),%r11
2898 movq 24+32(%rsp),%r12
2899 leaq 128(%rsp),%rdi
2900 call __ecp_nistz256_mul_montq
2901
2902 movq 160(%rsp),%rax
2903 leaq 160(%rsp),%rbx
2904 movq 0+32(%rsp),%r9
2905 movq 8+32(%rsp),%r10
2906 leaq 0+32(%rsp),%rsi
2907 movq 16+32(%rsp),%r11
2908 movq 24+32(%rsp),%r12
2909 leaq 192(%rsp),%rdi
2910 call __ecp_nistz256_mul_montq
2911
2912
2913
2914
Steven Valdez909b19f2016-11-21 15:35:44 -05002915 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002916 addq %r12,%r12
2917 leaq 96(%rsp),%rsi
2918 adcq %r13,%r13
2919 movq %r12,%rax
2920 adcq %r8,%r8
2921 adcq %r9,%r9
2922 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05002923 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002924
2925 subq $-1,%r12
2926 movq %r8,%rcx
2927 sbbq %r14,%r13
2928 sbbq $0,%r8
2929 movq %r9,%r10
2930 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -05002931 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08002932
Steven Valdez909b19f2016-11-21 15:35:44 -05002933 cmovcq %rax,%r12
Adam Langleyfad63272015-11-12 12:15:39 -08002934 movq 0(%rsi),%rax
Steven Valdez909b19f2016-11-21 15:35:44 -05002935 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -08002936 movq 8(%rsi),%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05002937 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -08002938 movq 16(%rsi),%rcx
Steven Valdez909b19f2016-11-21 15:35:44 -05002939 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -08002940 movq 24(%rsi),%r10
2941
2942 call __ecp_nistz256_subq
2943
2944 leaq 128(%rsp),%rbx
2945 leaq 288(%rsp),%rdi
2946 call __ecp_nistz256_sub_fromq
2947
2948 movq 192+0(%rsp),%rax
2949 movq 192+8(%rsp),%rbp
2950 movq 192+16(%rsp),%rcx
2951 movq 192+24(%rsp),%r10
2952 leaq 320(%rsp),%rdi
2953
2954 call __ecp_nistz256_subq
2955
2956 movq %r12,0(%rdi)
2957 movq %r13,8(%rdi)
2958 movq %r8,16(%rdi)
2959 movq %r9,24(%rdi)
2960 movq 128(%rsp),%rax
2961 leaq 128(%rsp),%rbx
2962 movq 0+224(%rsp),%r9
2963 movq 8+224(%rsp),%r10
2964 leaq 0+224(%rsp),%rsi
2965 movq 16+224(%rsp),%r11
2966 movq 24+224(%rsp),%r12
2967 leaq 256(%rsp),%rdi
2968 call __ecp_nistz256_mul_montq
2969
2970 movq 320(%rsp),%rax
2971 leaq 320(%rsp),%rbx
2972 movq 0+64(%rsp),%r9
2973 movq 8+64(%rsp),%r10
2974 leaq 0+64(%rsp),%rsi
2975 movq 16+64(%rsp),%r11
2976 movq 24+64(%rsp),%r12
2977 leaq 320(%rsp),%rdi
2978 call __ecp_nistz256_mul_montq
2979
2980 leaq 256(%rsp),%rbx
2981 leaq 320(%rsp),%rdi
2982 call __ecp_nistz256_sub_fromq
2983
2984.byte 102,72,15,126,199
2985
2986 movdqa %xmm5,%xmm0
2987 movdqa %xmm5,%xmm1
2988 pandn 352(%rsp),%xmm0
2989 movdqa %xmm5,%xmm2
2990 pandn 352+16(%rsp),%xmm1
2991 movdqa %xmm5,%xmm3
2992 pand 544(%rsp),%xmm2
2993 pand 544+16(%rsp),%xmm3
2994 por %xmm0,%xmm2
2995 por %xmm1,%xmm3
2996
2997 movdqa %xmm4,%xmm0
2998 movdqa %xmm4,%xmm1
2999 pandn %xmm2,%xmm0
3000 movdqa %xmm4,%xmm2
3001 pandn %xmm3,%xmm1
3002 movdqa %xmm4,%xmm3
3003 pand 448(%rsp),%xmm2
3004 pand 448+16(%rsp),%xmm3
3005 por %xmm0,%xmm2
3006 por %xmm1,%xmm3
3007 movdqu %xmm2,64(%rdi)
3008 movdqu %xmm3,80(%rdi)
3009
3010 movdqa %xmm5,%xmm0
3011 movdqa %xmm5,%xmm1
3012 pandn 288(%rsp),%xmm0
3013 movdqa %xmm5,%xmm2
3014 pandn 288+16(%rsp),%xmm1
3015 movdqa %xmm5,%xmm3
3016 pand 480(%rsp),%xmm2
3017 pand 480+16(%rsp),%xmm3
3018 por %xmm0,%xmm2
3019 por %xmm1,%xmm3
3020
3021 movdqa %xmm4,%xmm0
3022 movdqa %xmm4,%xmm1
3023 pandn %xmm2,%xmm0
3024 movdqa %xmm4,%xmm2
3025 pandn %xmm3,%xmm1
3026 movdqa %xmm4,%xmm3
3027 pand 384(%rsp),%xmm2
3028 pand 384+16(%rsp),%xmm3
3029 por %xmm0,%xmm2
3030 por %xmm1,%xmm3
3031 movdqu %xmm2,0(%rdi)
3032 movdqu %xmm3,16(%rdi)
3033
3034 movdqa %xmm5,%xmm0
3035 movdqa %xmm5,%xmm1
3036 pandn 320(%rsp),%xmm0
3037 movdqa %xmm5,%xmm2
3038 pandn 320+16(%rsp),%xmm1
3039 movdqa %xmm5,%xmm3
3040 pand 512(%rsp),%xmm2
3041 pand 512+16(%rsp),%xmm3
3042 por %xmm0,%xmm2
3043 por %xmm1,%xmm3
3044
3045 movdqa %xmm4,%xmm0
3046 movdqa %xmm4,%xmm1
3047 pandn %xmm2,%xmm0
3048 movdqa %xmm4,%xmm2
3049 pandn %xmm3,%xmm1
3050 movdqa %xmm4,%xmm3
3051 pand 416(%rsp),%xmm2
3052 pand 416+16(%rsp),%xmm3
3053 por %xmm0,%xmm2
3054 por %xmm1,%xmm3
3055 movdqu %xmm2,32(%rdi)
3056 movdqu %xmm3,48(%rdi)
3057
3058.Ladd_doneq:
Robert Sloanab8b8882018-03-26 11:39:51 -07003059 leaq 576+56(%rsp),%rsi
3060.cfi_def_cfa %rsi,8
3061 movq -48(%rsi),%r15
3062.cfi_restore %r15
3063 movq -40(%rsi),%r14
3064.cfi_restore %r14
3065 movq -32(%rsi),%r13
3066.cfi_restore %r13
3067 movq -24(%rsi),%r12
3068.cfi_restore %r12
3069 movq -16(%rsi),%rbx
3070.cfi_restore %rbx
3071 movq -8(%rsi),%rbp
3072.cfi_restore %rbp
3073 leaq (%rsi),%rsp
3074.cfi_def_cfa_register %rsp
3075.Lpoint_addq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08003076 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07003077.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08003078.size ecp_nistz256_point_add,.-ecp_nistz256_point_add
3079.globl ecp_nistz256_point_add_affine
3080.hidden ecp_nistz256_point_add_affine
3081.type ecp_nistz256_point_add_affine,@function
3082.align 32
3083ecp_nistz256_point_add_affine:
Robert Sloanab8b8882018-03-26 11:39:51 -07003084.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003085 leaq OPENSSL_ia32cap_P(%rip),%rcx
3086 movq 8(%rcx),%rcx
3087 andl $0x80100,%ecx
3088 cmpl $0x80100,%ecx
3089 je .Lpoint_add_affinex
Adam Langleyfad63272015-11-12 12:15:39 -08003090 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07003091.cfi_adjust_cfa_offset 8
3092.cfi_offset %rbp,-16
Adam Langleyfad63272015-11-12 12:15:39 -08003093 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07003094.cfi_adjust_cfa_offset 8
3095.cfi_offset %rbx,-24
Adam Langleyfad63272015-11-12 12:15:39 -08003096 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -07003097.cfi_adjust_cfa_offset 8
3098.cfi_offset %r12,-32
Adam Langleyfad63272015-11-12 12:15:39 -08003099 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -07003100.cfi_adjust_cfa_offset 8
3101.cfi_offset %r13,-40
Adam Langleyfad63272015-11-12 12:15:39 -08003102 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -07003103.cfi_adjust_cfa_offset 8
3104.cfi_offset %r14,-48
Adam Langleyfad63272015-11-12 12:15:39 -08003105 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -07003106.cfi_adjust_cfa_offset 8
3107.cfi_offset %r15,-56
Adam Langleyfad63272015-11-12 12:15:39 -08003108 subq $480+8,%rsp
Robert Sloanab8b8882018-03-26 11:39:51 -07003109.cfi_adjust_cfa_offset 32*15+8
3110.Ladd_affineq_body:
Adam Langleyfad63272015-11-12 12:15:39 -08003111
3112 movdqu 0(%rsi),%xmm0
3113 movq %rdx,%rbx
3114 movdqu 16(%rsi),%xmm1
3115 movdqu 32(%rsi),%xmm2
3116 movdqu 48(%rsi),%xmm3
3117 movdqu 64(%rsi),%xmm4
3118 movdqu 80(%rsi),%xmm5
3119 movq 64+0(%rsi),%rax
3120 movq 64+8(%rsi),%r14
3121 movq 64+16(%rsi),%r15
3122 movq 64+24(%rsi),%r8
3123 movdqa %xmm0,320(%rsp)
3124 movdqa %xmm1,320+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08003125 movdqa %xmm2,352(%rsp)
3126 movdqa %xmm3,352+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08003127 movdqa %xmm4,384(%rsp)
3128 movdqa %xmm5,384+16(%rsp)
Steven Valdez909b19f2016-11-21 15:35:44 -05003129 por %xmm4,%xmm5
Adam Langleyfad63272015-11-12 12:15:39 -08003130
3131 movdqu 0(%rbx),%xmm0
Steven Valdez909b19f2016-11-21 15:35:44 -05003132 pshufd $0xb1,%xmm5,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08003133 movdqu 16(%rbx),%xmm1
3134 movdqu 32(%rbx),%xmm2
3135 por %xmm3,%xmm5
3136 movdqu 48(%rbx),%xmm3
3137 movdqa %xmm0,416(%rsp)
David Benjamin4969cc92016-04-22 15:02:23 -04003138 pshufd $0x1e,%xmm5,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08003139 movdqa %xmm1,416+16(%rsp)
3140 por %xmm0,%xmm1
3141.byte 102,72,15,110,199
3142 movdqa %xmm2,448(%rsp)
3143 movdqa %xmm3,448+16(%rsp)
3144 por %xmm2,%xmm3
3145 por %xmm4,%xmm5
3146 pxor %xmm4,%xmm4
3147 por %xmm1,%xmm3
3148
3149 leaq 64-0(%rsi),%rsi
3150 leaq 32(%rsp),%rdi
3151 call __ecp_nistz256_sqr_montq
3152
3153 pcmpeqd %xmm4,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -04003154 pshufd $0xb1,%xmm3,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08003155 movq 0(%rbx),%rax
3156
3157 movq %r12,%r9
3158 por %xmm3,%xmm4
3159 pshufd $0,%xmm5,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -04003160 pshufd $0x1e,%xmm4,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08003161 movq %r13,%r10
3162 por %xmm3,%xmm4
3163 pxor %xmm3,%xmm3
3164 movq %r14,%r11
3165 pcmpeqd %xmm3,%xmm4
3166 pshufd $0,%xmm4,%xmm4
3167
3168 leaq 32-0(%rsp),%rsi
3169 movq %r15,%r12
3170 leaq 0(%rsp),%rdi
3171 call __ecp_nistz256_mul_montq
3172
3173 leaq 320(%rsp),%rbx
3174 leaq 64(%rsp),%rdi
3175 call __ecp_nistz256_sub_fromq
3176
3177 movq 384(%rsp),%rax
3178 leaq 384(%rsp),%rbx
3179 movq 0+32(%rsp),%r9
3180 movq 8+32(%rsp),%r10
3181 leaq 0+32(%rsp),%rsi
3182 movq 16+32(%rsp),%r11
3183 movq 24+32(%rsp),%r12
3184 leaq 32(%rsp),%rdi
3185 call __ecp_nistz256_mul_montq
3186
3187 movq 384(%rsp),%rax
3188 leaq 384(%rsp),%rbx
3189 movq 0+64(%rsp),%r9
3190 movq 8+64(%rsp),%r10
3191 leaq 0+64(%rsp),%rsi
3192 movq 16+64(%rsp),%r11
3193 movq 24+64(%rsp),%r12
3194 leaq 288(%rsp),%rdi
3195 call __ecp_nistz256_mul_montq
3196
3197 movq 448(%rsp),%rax
3198 leaq 448(%rsp),%rbx
3199 movq 0+32(%rsp),%r9
3200 movq 8+32(%rsp),%r10
3201 leaq 0+32(%rsp),%rsi
3202 movq 16+32(%rsp),%r11
3203 movq 24+32(%rsp),%r12
3204 leaq 32(%rsp),%rdi
3205 call __ecp_nistz256_mul_montq
3206
3207 leaq 352(%rsp),%rbx
3208 leaq 96(%rsp),%rdi
3209 call __ecp_nistz256_sub_fromq
3210
3211 movq 0+64(%rsp),%rax
3212 movq 8+64(%rsp),%r14
3213 leaq 0+64(%rsp),%rsi
3214 movq 16+64(%rsp),%r15
3215 movq 24+64(%rsp),%r8
3216 leaq 128(%rsp),%rdi
3217 call __ecp_nistz256_sqr_montq
3218
3219 movq 0+96(%rsp),%rax
3220 movq 8+96(%rsp),%r14
3221 leaq 0+96(%rsp),%rsi
3222 movq 16+96(%rsp),%r15
3223 movq 24+96(%rsp),%r8
3224 leaq 192(%rsp),%rdi
3225 call __ecp_nistz256_sqr_montq
3226
3227 movq 128(%rsp),%rax
3228 leaq 128(%rsp),%rbx
3229 movq 0+64(%rsp),%r9
3230 movq 8+64(%rsp),%r10
3231 leaq 0+64(%rsp),%rsi
3232 movq 16+64(%rsp),%r11
3233 movq 24+64(%rsp),%r12
3234 leaq 160(%rsp),%rdi
3235 call __ecp_nistz256_mul_montq
3236
3237 movq 320(%rsp),%rax
3238 leaq 320(%rsp),%rbx
3239 movq 0+128(%rsp),%r9
3240 movq 8+128(%rsp),%r10
3241 leaq 0+128(%rsp),%rsi
3242 movq 16+128(%rsp),%r11
3243 movq 24+128(%rsp),%r12
3244 leaq 0(%rsp),%rdi
3245 call __ecp_nistz256_mul_montq
3246
3247
3248
3249
Steven Valdez909b19f2016-11-21 15:35:44 -05003250 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08003251 addq %r12,%r12
3252 leaq 192(%rsp),%rsi
3253 adcq %r13,%r13
3254 movq %r12,%rax
3255 adcq %r8,%r8
3256 adcq %r9,%r9
3257 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05003258 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08003259
3260 subq $-1,%r12
3261 movq %r8,%rcx
3262 sbbq %r14,%r13
3263 sbbq $0,%r8
3264 movq %r9,%r10
3265 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -05003266 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08003267
Steven Valdez909b19f2016-11-21 15:35:44 -05003268 cmovcq %rax,%r12
Adam Langleyfad63272015-11-12 12:15:39 -08003269 movq 0(%rsi),%rax
Steven Valdez909b19f2016-11-21 15:35:44 -05003270 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -08003271 movq 8(%rsi),%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05003272 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -08003273 movq 16(%rsi),%rcx
Steven Valdez909b19f2016-11-21 15:35:44 -05003274 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -08003275 movq 24(%rsi),%r10
3276
3277 call __ecp_nistz256_subq
3278
3279 leaq 160(%rsp),%rbx
3280 leaq 224(%rsp),%rdi
3281 call __ecp_nistz256_sub_fromq
3282
3283 movq 0+0(%rsp),%rax
3284 movq 0+8(%rsp),%rbp
3285 movq 0+16(%rsp),%rcx
3286 movq 0+24(%rsp),%r10
3287 leaq 64(%rsp),%rdi
3288
3289 call __ecp_nistz256_subq
3290
3291 movq %r12,0(%rdi)
3292 movq %r13,8(%rdi)
3293 movq %r8,16(%rdi)
3294 movq %r9,24(%rdi)
3295 movq 352(%rsp),%rax
3296 leaq 352(%rsp),%rbx
3297 movq 0+160(%rsp),%r9
3298 movq 8+160(%rsp),%r10
3299 leaq 0+160(%rsp),%rsi
3300 movq 16+160(%rsp),%r11
3301 movq 24+160(%rsp),%r12
3302 leaq 32(%rsp),%rdi
3303 call __ecp_nistz256_mul_montq
3304
3305 movq 96(%rsp),%rax
3306 leaq 96(%rsp),%rbx
3307 movq 0+64(%rsp),%r9
3308 movq 8+64(%rsp),%r10
3309 leaq 0+64(%rsp),%rsi
3310 movq 16+64(%rsp),%r11
3311 movq 24+64(%rsp),%r12
3312 leaq 64(%rsp),%rdi
3313 call __ecp_nistz256_mul_montq
3314
3315 leaq 32(%rsp),%rbx
3316 leaq 256(%rsp),%rdi
3317 call __ecp_nistz256_sub_fromq
3318
3319.byte 102,72,15,126,199
3320
3321 movdqa %xmm5,%xmm0
3322 movdqa %xmm5,%xmm1
3323 pandn 288(%rsp),%xmm0
3324 movdqa %xmm5,%xmm2
3325 pandn 288+16(%rsp),%xmm1
3326 movdqa %xmm5,%xmm3
3327 pand .LONE_mont(%rip),%xmm2
3328 pand .LONE_mont+16(%rip),%xmm3
3329 por %xmm0,%xmm2
3330 por %xmm1,%xmm3
3331
3332 movdqa %xmm4,%xmm0
3333 movdqa %xmm4,%xmm1
3334 pandn %xmm2,%xmm0
3335 movdqa %xmm4,%xmm2
3336 pandn %xmm3,%xmm1
3337 movdqa %xmm4,%xmm3
3338 pand 384(%rsp),%xmm2
3339 pand 384+16(%rsp),%xmm3
3340 por %xmm0,%xmm2
3341 por %xmm1,%xmm3
3342 movdqu %xmm2,64(%rdi)
3343 movdqu %xmm3,80(%rdi)
3344
3345 movdqa %xmm5,%xmm0
3346 movdqa %xmm5,%xmm1
3347 pandn 224(%rsp),%xmm0
3348 movdqa %xmm5,%xmm2
3349 pandn 224+16(%rsp),%xmm1
3350 movdqa %xmm5,%xmm3
3351 pand 416(%rsp),%xmm2
3352 pand 416+16(%rsp),%xmm3
3353 por %xmm0,%xmm2
3354 por %xmm1,%xmm3
3355
3356 movdqa %xmm4,%xmm0
3357 movdqa %xmm4,%xmm1
3358 pandn %xmm2,%xmm0
3359 movdqa %xmm4,%xmm2
3360 pandn %xmm3,%xmm1
3361 movdqa %xmm4,%xmm3
3362 pand 320(%rsp),%xmm2
3363 pand 320+16(%rsp),%xmm3
3364 por %xmm0,%xmm2
3365 por %xmm1,%xmm3
3366 movdqu %xmm2,0(%rdi)
3367 movdqu %xmm3,16(%rdi)
3368
3369 movdqa %xmm5,%xmm0
3370 movdqa %xmm5,%xmm1
3371 pandn 256(%rsp),%xmm0
3372 movdqa %xmm5,%xmm2
3373 pandn 256+16(%rsp),%xmm1
3374 movdqa %xmm5,%xmm3
3375 pand 448(%rsp),%xmm2
3376 pand 448+16(%rsp),%xmm3
3377 por %xmm0,%xmm2
3378 por %xmm1,%xmm3
3379
3380 movdqa %xmm4,%xmm0
3381 movdqa %xmm4,%xmm1
3382 pandn %xmm2,%xmm0
3383 movdqa %xmm4,%xmm2
3384 pandn %xmm3,%xmm1
3385 movdqa %xmm4,%xmm3
3386 pand 352(%rsp),%xmm2
3387 pand 352+16(%rsp),%xmm3
3388 por %xmm0,%xmm2
3389 por %xmm1,%xmm3
3390 movdqu %xmm2,32(%rdi)
3391 movdqu %xmm3,48(%rdi)
3392
Robert Sloanab8b8882018-03-26 11:39:51 -07003393 leaq 480+56(%rsp),%rsi
3394.cfi_def_cfa %rsi,8
3395 movq -48(%rsi),%r15
3396.cfi_restore %r15
3397 movq -40(%rsi),%r14
3398.cfi_restore %r14
3399 movq -32(%rsi),%r13
3400.cfi_restore %r13
3401 movq -24(%rsi),%r12
3402.cfi_restore %r12
3403 movq -16(%rsi),%rbx
3404.cfi_restore %rbx
3405 movq -8(%rsi),%rbp
3406.cfi_restore %rbp
3407 leaq (%rsi),%rsp
3408.cfi_def_cfa_register %rsp
3409.Ladd_affineq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08003410 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07003411.cfi_endproc
Adam Langleyfad63272015-11-12 12:15:39 -08003412.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003413.type __ecp_nistz256_add_tox,@function
3414.align 32
3415__ecp_nistz256_add_tox:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003416.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003417 xorq %r11,%r11
3418 adcq 0(%rbx),%r12
3419 adcq 8(%rbx),%r13
3420 movq %r12,%rax
3421 adcq 16(%rbx),%r8
3422 adcq 24(%rbx),%r9
3423 movq %r13,%rbp
3424 adcq $0,%r11
3425
3426 xorq %r10,%r10
3427 sbbq $-1,%r12
3428 movq %r8,%rcx
3429 sbbq %r14,%r13
3430 sbbq $0,%r8
3431 movq %r9,%r10
3432 sbbq %r15,%r9
3433 sbbq $0,%r11
3434
3435 cmovcq %rax,%r12
3436 cmovcq %rbp,%r13
3437 movq %r12,0(%rdi)
3438 cmovcq %rcx,%r8
3439 movq %r13,8(%rdi)
3440 cmovcq %r10,%r9
3441 movq %r8,16(%rdi)
3442 movq %r9,24(%rdi)
3443
3444 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003445.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003446.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
3447
3448.type __ecp_nistz256_sub_fromx,@function
3449.align 32
3450__ecp_nistz256_sub_fromx:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003451.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003452 xorq %r11,%r11
3453 sbbq 0(%rbx),%r12
3454 sbbq 8(%rbx),%r13
3455 movq %r12,%rax
3456 sbbq 16(%rbx),%r8
3457 sbbq 24(%rbx),%r9
3458 movq %r13,%rbp
3459 sbbq $0,%r11
3460
3461 xorq %r10,%r10
3462 adcq $-1,%r12
3463 movq %r8,%rcx
3464 adcq %r14,%r13
3465 adcq $0,%r8
3466 movq %r9,%r10
3467 adcq %r15,%r9
3468
3469 btq $0,%r11
3470 cmovncq %rax,%r12
3471 cmovncq %rbp,%r13
3472 movq %r12,0(%rdi)
3473 cmovncq %rcx,%r8
3474 movq %r13,8(%rdi)
3475 cmovncq %r10,%r9
3476 movq %r8,16(%rdi)
3477 movq %r9,24(%rdi)
3478
3479 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003480.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003481.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
3482
3483.type __ecp_nistz256_subx,@function
3484.align 32
3485__ecp_nistz256_subx:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003486.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003487 xorq %r11,%r11
3488 sbbq %r12,%rax
3489 sbbq %r13,%rbp
3490 movq %rax,%r12
3491 sbbq %r8,%rcx
3492 sbbq %r9,%r10
3493 movq %rbp,%r13
3494 sbbq $0,%r11
3495
3496 xorq %r9,%r9
3497 adcq $-1,%rax
3498 movq %rcx,%r8
3499 adcq %r14,%rbp
3500 adcq $0,%rcx
3501 movq %r10,%r9
3502 adcq %r15,%r10
3503
3504 btq $0,%r11
3505 cmovcq %rax,%r12
3506 cmovcq %rbp,%r13
3507 cmovcq %rcx,%r8
3508 cmovcq %r10,%r9
3509
3510 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003511.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003512.size __ecp_nistz256_subx,.-__ecp_nistz256_subx
3513
3514.type __ecp_nistz256_mul_by_2x,@function
3515.align 32
3516__ecp_nistz256_mul_by_2x:
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003517.cfi_startproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003518 xorq %r11,%r11
3519 adcq %r12,%r12
3520 adcq %r13,%r13
3521 movq %r12,%rax
3522 adcq %r8,%r8
3523 adcq %r9,%r9
3524 movq %r13,%rbp
3525 adcq $0,%r11
3526
3527 xorq %r10,%r10
3528 sbbq $-1,%r12
3529 movq %r8,%rcx
3530 sbbq %r14,%r13
3531 sbbq $0,%r8
3532 movq %r9,%r10
3533 sbbq %r15,%r9
3534 sbbq $0,%r11
3535
3536 cmovcq %rax,%r12
3537 cmovcq %rbp,%r13
3538 movq %r12,0(%rdi)
3539 cmovcq %rcx,%r8
3540 movq %r13,8(%rdi)
3541 cmovcq %r10,%r9
3542 movq %r8,16(%rdi)
3543 movq %r9,24(%rdi)
3544
3545 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003546.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003547.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
3548.type ecp_nistz256_point_doublex,@function
3549.align 32
3550ecp_nistz256_point_doublex:
3551.cfi_startproc
3552.Lpoint_doublex:
3553 pushq %rbp
3554.cfi_adjust_cfa_offset 8
3555.cfi_offset %rbp,-16
3556 pushq %rbx
3557.cfi_adjust_cfa_offset 8
3558.cfi_offset %rbx,-24
3559 pushq %r12
3560.cfi_adjust_cfa_offset 8
3561.cfi_offset %r12,-32
3562 pushq %r13
3563.cfi_adjust_cfa_offset 8
3564.cfi_offset %r13,-40
3565 pushq %r14
3566.cfi_adjust_cfa_offset 8
3567.cfi_offset %r14,-48
3568 pushq %r15
3569.cfi_adjust_cfa_offset 8
3570.cfi_offset %r15,-56
3571 subq $160+8,%rsp
3572.cfi_adjust_cfa_offset 32*5+8
3573.Lpoint_doublex_body:
3574
3575.Lpoint_double_shortcutx:
3576 movdqu 0(%rsi),%xmm0
3577 movq %rsi,%rbx
3578 movdqu 16(%rsi),%xmm1
3579 movq 32+0(%rsi),%r12
3580 movq 32+8(%rsi),%r13
3581 movq 32+16(%rsi),%r8
3582 movq 32+24(%rsi),%r9
3583 movq .Lpoly+8(%rip),%r14
3584 movq .Lpoly+24(%rip),%r15
3585 movdqa %xmm0,96(%rsp)
3586 movdqa %xmm1,96+16(%rsp)
3587 leaq 32(%rdi),%r10
3588 leaq 64(%rdi),%r11
3589.byte 102,72,15,110,199
3590.byte 102,73,15,110,202
3591.byte 102,73,15,110,211
3592
3593 leaq 0(%rsp),%rdi
3594 call __ecp_nistz256_mul_by_2x
3595
3596 movq 64+0(%rsi),%rdx
3597 movq 64+8(%rsi),%r14
3598 movq 64+16(%rsi),%r15
3599 movq 64+24(%rsi),%r8
3600 leaq 64-128(%rsi),%rsi
3601 leaq 64(%rsp),%rdi
3602 call __ecp_nistz256_sqr_montx
3603
3604 movq 0+0(%rsp),%rdx
3605 movq 8+0(%rsp),%r14
3606 leaq -128+0(%rsp),%rsi
3607 movq 16+0(%rsp),%r15
3608 movq 24+0(%rsp),%r8
3609 leaq 0(%rsp),%rdi
3610 call __ecp_nistz256_sqr_montx
3611
3612 movq 32(%rbx),%rdx
3613 movq 64+0(%rbx),%r9
3614 movq 64+8(%rbx),%r10
3615 movq 64+16(%rbx),%r11
3616 movq 64+24(%rbx),%r12
3617 leaq 64-128(%rbx),%rsi
3618 leaq 32(%rbx),%rbx
3619.byte 102,72,15,126,215
3620 call __ecp_nistz256_mul_montx
3621 call __ecp_nistz256_mul_by_2x
3622
3623 movq 96+0(%rsp),%r12
3624 movq 96+8(%rsp),%r13
3625 leaq 64(%rsp),%rbx
3626 movq 96+16(%rsp),%r8
3627 movq 96+24(%rsp),%r9
3628 leaq 32(%rsp),%rdi
3629 call __ecp_nistz256_add_tox
3630
3631 movq 96+0(%rsp),%r12
3632 movq 96+8(%rsp),%r13
3633 leaq 64(%rsp),%rbx
3634 movq 96+16(%rsp),%r8
3635 movq 96+24(%rsp),%r9
3636 leaq 64(%rsp),%rdi
3637 call __ecp_nistz256_sub_fromx
3638
3639 movq 0+0(%rsp),%rdx
3640 movq 8+0(%rsp),%r14
3641 leaq -128+0(%rsp),%rsi
3642 movq 16+0(%rsp),%r15
3643 movq 24+0(%rsp),%r8
3644.byte 102,72,15,126,207
3645 call __ecp_nistz256_sqr_montx
3646 xorq %r9,%r9
3647 movq %r12,%rax
3648 addq $-1,%r12
3649 movq %r13,%r10
3650 adcq %rsi,%r13
3651 movq %r14,%rcx
3652 adcq $0,%r14
3653 movq %r15,%r8
3654 adcq %rbp,%r15
3655 adcq $0,%r9
3656 xorq %rsi,%rsi
3657 testq $1,%rax
3658
3659 cmovzq %rax,%r12
3660 cmovzq %r10,%r13
3661 cmovzq %rcx,%r14
3662 cmovzq %r8,%r15
3663 cmovzq %rsi,%r9
3664
3665 movq %r13,%rax
3666 shrq $1,%r12
3667 shlq $63,%rax
3668 movq %r14,%r10
3669 shrq $1,%r13
3670 orq %rax,%r12
3671 shlq $63,%r10
3672 movq %r15,%rcx
3673 shrq $1,%r14
3674 orq %r10,%r13
3675 shlq $63,%rcx
3676 movq %r12,0(%rdi)
3677 shrq $1,%r15
3678 movq %r13,8(%rdi)
3679 shlq $63,%r9
3680 orq %rcx,%r14
3681 orq %r9,%r15
3682 movq %r14,16(%rdi)
3683 movq %r15,24(%rdi)
3684 movq 64(%rsp),%rdx
3685 leaq 64(%rsp),%rbx
3686 movq 0+32(%rsp),%r9
3687 movq 8+32(%rsp),%r10
3688 leaq -128+32(%rsp),%rsi
3689 movq 16+32(%rsp),%r11
3690 movq 24+32(%rsp),%r12
3691 leaq 32(%rsp),%rdi
3692 call __ecp_nistz256_mul_montx
3693
3694 leaq 128(%rsp),%rdi
3695 call __ecp_nistz256_mul_by_2x
3696
3697 leaq 32(%rsp),%rbx
3698 leaq 32(%rsp),%rdi
3699 call __ecp_nistz256_add_tox
3700
3701 movq 96(%rsp),%rdx
3702 leaq 96(%rsp),%rbx
3703 movq 0+0(%rsp),%r9
3704 movq 8+0(%rsp),%r10
3705 leaq -128+0(%rsp),%rsi
3706 movq 16+0(%rsp),%r11
3707 movq 24+0(%rsp),%r12
3708 leaq 0(%rsp),%rdi
3709 call __ecp_nistz256_mul_montx
3710
3711 leaq 128(%rsp),%rdi
3712 call __ecp_nistz256_mul_by_2x
3713
3714 movq 0+32(%rsp),%rdx
3715 movq 8+32(%rsp),%r14
3716 leaq -128+32(%rsp),%rsi
3717 movq 16+32(%rsp),%r15
3718 movq 24+32(%rsp),%r8
3719.byte 102,72,15,126,199
3720 call __ecp_nistz256_sqr_montx
3721
3722 leaq 128(%rsp),%rbx
3723 movq %r14,%r8
3724 movq %r15,%r9
3725 movq %rsi,%r14
3726 movq %rbp,%r15
3727 call __ecp_nistz256_sub_fromx
3728
3729 movq 0+0(%rsp),%rax
3730 movq 0+8(%rsp),%rbp
3731 movq 0+16(%rsp),%rcx
3732 movq 0+24(%rsp),%r10
3733 leaq 0(%rsp),%rdi
3734 call __ecp_nistz256_subx
3735
3736 movq 32(%rsp),%rdx
3737 leaq 32(%rsp),%rbx
3738 movq %r12,%r14
3739 xorl %ecx,%ecx
3740 movq %r12,0+0(%rsp)
3741 movq %r13,%r10
3742 movq %r13,0+8(%rsp)
3743 cmovzq %r8,%r11
3744 movq %r8,0+16(%rsp)
3745 leaq 0-128(%rsp),%rsi
3746 cmovzq %r9,%r12
3747 movq %r9,0+24(%rsp)
3748 movq %r14,%r9
3749 leaq 0(%rsp),%rdi
3750 call __ecp_nistz256_mul_montx
3751
3752.byte 102,72,15,126,203
3753.byte 102,72,15,126,207
3754 call __ecp_nistz256_sub_fromx
3755
3756 leaq 160+56(%rsp),%rsi
3757.cfi_def_cfa %rsi,8
3758 movq -48(%rsi),%r15
3759.cfi_restore %r15
3760 movq -40(%rsi),%r14
3761.cfi_restore %r14
3762 movq -32(%rsi),%r13
3763.cfi_restore %r13
3764 movq -24(%rsi),%r12
3765.cfi_restore %r12
3766 movq -16(%rsi),%rbx
3767.cfi_restore %rbx
3768 movq -8(%rsi),%rbp
3769.cfi_restore %rbp
3770 leaq (%rsi),%rsp
3771.cfi_def_cfa_register %rsp
3772.Lpoint_doublex_epilogue:
3773 .byte 0xf3,0xc3
3774.cfi_endproc
3775.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
3776.type ecp_nistz256_point_addx,@function
3777.align 32
3778ecp_nistz256_point_addx:
3779.cfi_startproc
3780.Lpoint_addx:
3781 pushq %rbp
3782.cfi_adjust_cfa_offset 8
3783.cfi_offset %rbp,-16
3784 pushq %rbx
3785.cfi_adjust_cfa_offset 8
3786.cfi_offset %rbx,-24
3787 pushq %r12
3788.cfi_adjust_cfa_offset 8
3789.cfi_offset %r12,-32
3790 pushq %r13
3791.cfi_adjust_cfa_offset 8
3792.cfi_offset %r13,-40
3793 pushq %r14
3794.cfi_adjust_cfa_offset 8
3795.cfi_offset %r14,-48
3796 pushq %r15
3797.cfi_adjust_cfa_offset 8
3798.cfi_offset %r15,-56
3799 subq $576+8,%rsp
3800.cfi_adjust_cfa_offset 32*18+8
3801.Lpoint_addx_body:
3802
3803 movdqu 0(%rsi),%xmm0
3804 movdqu 16(%rsi),%xmm1
3805 movdqu 32(%rsi),%xmm2
3806 movdqu 48(%rsi),%xmm3
3807 movdqu 64(%rsi),%xmm4
3808 movdqu 80(%rsi),%xmm5
3809 movq %rsi,%rbx
3810 movq %rdx,%rsi
3811 movdqa %xmm0,384(%rsp)
3812 movdqa %xmm1,384+16(%rsp)
3813 movdqa %xmm2,416(%rsp)
3814 movdqa %xmm3,416+16(%rsp)
3815 movdqa %xmm4,448(%rsp)
3816 movdqa %xmm5,448+16(%rsp)
3817 por %xmm4,%xmm5
3818
3819 movdqu 0(%rsi),%xmm0
3820 pshufd $0xb1,%xmm5,%xmm3
3821 movdqu 16(%rsi),%xmm1
3822 movdqu 32(%rsi),%xmm2
3823 por %xmm3,%xmm5
3824 movdqu 48(%rsi),%xmm3
3825 movq 64+0(%rsi),%rdx
3826 movq 64+8(%rsi),%r14
3827 movq 64+16(%rsi),%r15
3828 movq 64+24(%rsi),%r8
3829 movdqa %xmm0,480(%rsp)
3830 pshufd $0x1e,%xmm5,%xmm4
3831 movdqa %xmm1,480+16(%rsp)
3832 movdqu 64(%rsi),%xmm0
3833 movdqu 80(%rsi),%xmm1
3834 movdqa %xmm2,512(%rsp)
3835 movdqa %xmm3,512+16(%rsp)
3836 por %xmm4,%xmm5
3837 pxor %xmm4,%xmm4
3838 por %xmm0,%xmm1
3839.byte 102,72,15,110,199
3840
3841 leaq 64-128(%rsi),%rsi
3842 movq %rdx,544+0(%rsp)
3843 movq %r14,544+8(%rsp)
3844 movq %r15,544+16(%rsp)
3845 movq %r8,544+24(%rsp)
3846 leaq 96(%rsp),%rdi
3847 call __ecp_nistz256_sqr_montx
3848
3849 pcmpeqd %xmm4,%xmm5
3850 pshufd $0xb1,%xmm1,%xmm4
3851 por %xmm1,%xmm4
3852 pshufd $0,%xmm5,%xmm5
3853 pshufd $0x1e,%xmm4,%xmm3
3854 por %xmm3,%xmm4
3855 pxor %xmm3,%xmm3
3856 pcmpeqd %xmm3,%xmm4
3857 pshufd $0,%xmm4,%xmm4
3858 movq 64+0(%rbx),%rdx
3859 movq 64+8(%rbx),%r14
3860 movq 64+16(%rbx),%r15
3861 movq 64+24(%rbx),%r8
3862.byte 102,72,15,110,203
3863
3864 leaq 64-128(%rbx),%rsi
3865 leaq 32(%rsp),%rdi
3866 call __ecp_nistz256_sqr_montx
3867
3868 movq 544(%rsp),%rdx
3869 leaq 544(%rsp),%rbx
3870 movq 0+96(%rsp),%r9
3871 movq 8+96(%rsp),%r10
3872 leaq -128+96(%rsp),%rsi
3873 movq 16+96(%rsp),%r11
3874 movq 24+96(%rsp),%r12
3875 leaq 224(%rsp),%rdi
3876 call __ecp_nistz256_mul_montx
3877
3878 movq 448(%rsp),%rdx
3879 leaq 448(%rsp),%rbx
3880 movq 0+32(%rsp),%r9
3881 movq 8+32(%rsp),%r10
3882 leaq -128+32(%rsp),%rsi
3883 movq 16+32(%rsp),%r11
3884 movq 24+32(%rsp),%r12
3885 leaq 256(%rsp),%rdi
3886 call __ecp_nistz256_mul_montx
3887
3888 movq 416(%rsp),%rdx
3889 leaq 416(%rsp),%rbx
3890 movq 0+224(%rsp),%r9
3891 movq 8+224(%rsp),%r10
3892 leaq -128+224(%rsp),%rsi
3893 movq 16+224(%rsp),%r11
3894 movq 24+224(%rsp),%r12
3895 leaq 224(%rsp),%rdi
3896 call __ecp_nistz256_mul_montx
3897
3898 movq 512(%rsp),%rdx
3899 leaq 512(%rsp),%rbx
3900 movq 0+256(%rsp),%r9
3901 movq 8+256(%rsp),%r10
3902 leaq -128+256(%rsp),%rsi
3903 movq 16+256(%rsp),%r11
3904 movq 24+256(%rsp),%r12
3905 leaq 256(%rsp),%rdi
3906 call __ecp_nistz256_mul_montx
3907
3908 leaq 224(%rsp),%rbx
3909 leaq 64(%rsp),%rdi
3910 call __ecp_nistz256_sub_fromx
3911
3912 orq %r13,%r12
3913 movdqa %xmm4,%xmm2
3914 orq %r8,%r12
3915 orq %r9,%r12
3916 por %xmm5,%xmm2
3917.byte 102,73,15,110,220
3918
3919 movq 384(%rsp),%rdx
3920 leaq 384(%rsp),%rbx
3921 movq 0+96(%rsp),%r9
3922 movq 8+96(%rsp),%r10
3923 leaq -128+96(%rsp),%rsi
3924 movq 16+96(%rsp),%r11
3925 movq 24+96(%rsp),%r12
3926 leaq 160(%rsp),%rdi
3927 call __ecp_nistz256_mul_montx
3928
3929 movq 480(%rsp),%rdx
3930 leaq 480(%rsp),%rbx
3931 movq 0+32(%rsp),%r9
3932 movq 8+32(%rsp),%r10
3933 leaq -128+32(%rsp),%rsi
3934 movq 16+32(%rsp),%r11
3935 movq 24+32(%rsp),%r12
3936 leaq 192(%rsp),%rdi
3937 call __ecp_nistz256_mul_montx
3938
3939 leaq 160(%rsp),%rbx
3940 leaq 0(%rsp),%rdi
3941 call __ecp_nistz256_sub_fromx
3942
3943 orq %r13,%r12
3944 orq %r8,%r12
3945 orq %r9,%r12
3946
Pete Bentleya5c947b2019-08-09 14:24:27 +00003947.byte 0x3e
Pete Bentley228bd622019-08-08 14:53:19 +00003948 jnz .Ladd_proceedx
Srinivas Paladugudd42a612019-08-09 19:30:39 +00003949.byte 102,73,15,126,208
3950.byte 102,73,15,126,217
3951 testq %r8,%r8
3952 jnz .Ladd_proceedx
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003953 testq %r9,%r9
3954 jz .Ladd_doublex
3955
3956.byte 102,72,15,126,199
3957 pxor %xmm0,%xmm0
3958 movdqu %xmm0,0(%rdi)
3959 movdqu %xmm0,16(%rdi)
3960 movdqu %xmm0,32(%rdi)
3961 movdqu %xmm0,48(%rdi)
3962 movdqu %xmm0,64(%rdi)
3963 movdqu %xmm0,80(%rdi)
3964 jmp .Ladd_donex
3965
3966.align 32
3967.Ladd_doublex:
3968.byte 102,72,15,126,206
3969.byte 102,72,15,126,199
3970 addq $416,%rsp
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003971.cfi_adjust_cfa_offset -416
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003972 jmp .Lpoint_double_shortcutx
Robert Sloan4c22c5f2019-03-01 15:53:37 -08003973.cfi_adjust_cfa_offset 416
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01003974
3975.align 32
3976.Ladd_proceedx:
3977 movq 0+64(%rsp),%rdx
3978 movq 8+64(%rsp),%r14
3979 leaq -128+64(%rsp),%rsi
3980 movq 16+64(%rsp),%r15
3981 movq 24+64(%rsp),%r8
3982 leaq 96(%rsp),%rdi
3983 call __ecp_nistz256_sqr_montx
3984
3985 movq 448(%rsp),%rdx
3986 leaq 448(%rsp),%rbx
3987 movq 0+0(%rsp),%r9
3988 movq 8+0(%rsp),%r10
3989 leaq -128+0(%rsp),%rsi
3990 movq 16+0(%rsp),%r11
3991 movq 24+0(%rsp),%r12
3992 leaq 352(%rsp),%rdi
3993 call __ecp_nistz256_mul_montx
3994
3995 movq 0+0(%rsp),%rdx
3996 movq 8+0(%rsp),%r14
3997 leaq -128+0(%rsp),%rsi
3998 movq 16+0(%rsp),%r15
3999 movq 24+0(%rsp),%r8
4000 leaq 32(%rsp),%rdi
4001 call __ecp_nistz256_sqr_montx
4002
4003 movq 544(%rsp),%rdx
4004 leaq 544(%rsp),%rbx
4005 movq 0+352(%rsp),%r9
4006 movq 8+352(%rsp),%r10
4007 leaq -128+352(%rsp),%rsi
4008 movq 16+352(%rsp),%r11
4009 movq 24+352(%rsp),%r12
4010 leaq 352(%rsp),%rdi
4011 call __ecp_nistz256_mul_montx
4012
4013 movq 0(%rsp),%rdx
4014 leaq 0(%rsp),%rbx
4015 movq 0+32(%rsp),%r9
4016 movq 8+32(%rsp),%r10
4017 leaq -128+32(%rsp),%rsi
4018 movq 16+32(%rsp),%r11
4019 movq 24+32(%rsp),%r12
4020 leaq 128(%rsp),%rdi
4021 call __ecp_nistz256_mul_montx
4022
4023 movq 160(%rsp),%rdx
4024 leaq 160(%rsp),%rbx
4025 movq 0+32(%rsp),%r9
4026 movq 8+32(%rsp),%r10
4027 leaq -128+32(%rsp),%rsi
4028 movq 16+32(%rsp),%r11
4029 movq 24+32(%rsp),%r12
4030 leaq 192(%rsp),%rdi
4031 call __ecp_nistz256_mul_montx
4032
4033
4034
4035
4036 xorq %r11,%r11
4037 addq %r12,%r12
4038 leaq 96(%rsp),%rsi
4039 adcq %r13,%r13
4040 movq %r12,%rax
4041 adcq %r8,%r8
4042 adcq %r9,%r9
4043 movq %r13,%rbp
4044 adcq $0,%r11
4045
4046 subq $-1,%r12
4047 movq %r8,%rcx
4048 sbbq %r14,%r13
4049 sbbq $0,%r8
4050 movq %r9,%r10
4051 sbbq %r15,%r9
4052 sbbq $0,%r11
4053
4054 cmovcq %rax,%r12
4055 movq 0(%rsi),%rax
4056 cmovcq %rbp,%r13
4057 movq 8(%rsi),%rbp
4058 cmovcq %rcx,%r8
4059 movq 16(%rsi),%rcx
4060 cmovcq %r10,%r9
4061 movq 24(%rsi),%r10
4062
4063 call __ecp_nistz256_subx
4064
4065 leaq 128(%rsp),%rbx
4066 leaq 288(%rsp),%rdi
4067 call __ecp_nistz256_sub_fromx
4068
4069 movq 192+0(%rsp),%rax
4070 movq 192+8(%rsp),%rbp
4071 movq 192+16(%rsp),%rcx
4072 movq 192+24(%rsp),%r10
4073 leaq 320(%rsp),%rdi
4074
4075 call __ecp_nistz256_subx
4076
4077 movq %r12,0(%rdi)
4078 movq %r13,8(%rdi)
4079 movq %r8,16(%rdi)
4080 movq %r9,24(%rdi)
4081 movq 128(%rsp),%rdx
4082 leaq 128(%rsp),%rbx
4083 movq 0+224(%rsp),%r9
4084 movq 8+224(%rsp),%r10
4085 leaq -128+224(%rsp),%rsi
4086 movq 16+224(%rsp),%r11
4087 movq 24+224(%rsp),%r12
4088 leaq 256(%rsp),%rdi
4089 call __ecp_nistz256_mul_montx
4090
4091 movq 320(%rsp),%rdx
4092 leaq 320(%rsp),%rbx
4093 movq 0+64(%rsp),%r9
4094 movq 8+64(%rsp),%r10
4095 leaq -128+64(%rsp),%rsi
4096 movq 16+64(%rsp),%r11
4097 movq 24+64(%rsp),%r12
4098 leaq 320(%rsp),%rdi
4099 call __ecp_nistz256_mul_montx
4100
4101 leaq 256(%rsp),%rbx
4102 leaq 320(%rsp),%rdi
4103 call __ecp_nistz256_sub_fromx
4104
4105.byte 102,72,15,126,199
4106
4107 movdqa %xmm5,%xmm0
4108 movdqa %xmm5,%xmm1
4109 pandn 352(%rsp),%xmm0
4110 movdqa %xmm5,%xmm2
4111 pandn 352+16(%rsp),%xmm1
4112 movdqa %xmm5,%xmm3
4113 pand 544(%rsp),%xmm2
4114 pand 544+16(%rsp),%xmm3
4115 por %xmm0,%xmm2
4116 por %xmm1,%xmm3
4117
4118 movdqa %xmm4,%xmm0
4119 movdqa %xmm4,%xmm1
4120 pandn %xmm2,%xmm0
4121 movdqa %xmm4,%xmm2
4122 pandn %xmm3,%xmm1
4123 movdqa %xmm4,%xmm3
4124 pand 448(%rsp),%xmm2
4125 pand 448+16(%rsp),%xmm3
4126 por %xmm0,%xmm2
4127 por %xmm1,%xmm3
4128 movdqu %xmm2,64(%rdi)
4129 movdqu %xmm3,80(%rdi)
4130
4131 movdqa %xmm5,%xmm0
4132 movdqa %xmm5,%xmm1
4133 pandn 288(%rsp),%xmm0
4134 movdqa %xmm5,%xmm2
4135 pandn 288+16(%rsp),%xmm1
4136 movdqa %xmm5,%xmm3
4137 pand 480(%rsp),%xmm2
4138 pand 480+16(%rsp),%xmm3
4139 por %xmm0,%xmm2
4140 por %xmm1,%xmm3
4141
4142 movdqa %xmm4,%xmm0
4143 movdqa %xmm4,%xmm1
4144 pandn %xmm2,%xmm0
4145 movdqa %xmm4,%xmm2
4146 pandn %xmm3,%xmm1
4147 movdqa %xmm4,%xmm3
4148 pand 384(%rsp),%xmm2
4149 pand 384+16(%rsp),%xmm3
4150 por %xmm0,%xmm2
4151 por %xmm1,%xmm3
4152 movdqu %xmm2,0(%rdi)
4153 movdqu %xmm3,16(%rdi)
4154
4155 movdqa %xmm5,%xmm0
4156 movdqa %xmm5,%xmm1
4157 pandn 320(%rsp),%xmm0
4158 movdqa %xmm5,%xmm2
4159 pandn 320+16(%rsp),%xmm1
4160 movdqa %xmm5,%xmm3
4161 pand 512(%rsp),%xmm2
4162 pand 512+16(%rsp),%xmm3
4163 por %xmm0,%xmm2
4164 por %xmm1,%xmm3
4165
4166 movdqa %xmm4,%xmm0
4167 movdqa %xmm4,%xmm1
4168 pandn %xmm2,%xmm0
4169 movdqa %xmm4,%xmm2
4170 pandn %xmm3,%xmm1
4171 movdqa %xmm4,%xmm3
4172 pand 416(%rsp),%xmm2
4173 pand 416+16(%rsp),%xmm3
4174 por %xmm0,%xmm2
4175 por %xmm1,%xmm3
4176 movdqu %xmm2,32(%rdi)
4177 movdqu %xmm3,48(%rdi)
4178
4179.Ladd_donex:
4180 leaq 576+56(%rsp),%rsi
4181.cfi_def_cfa %rsi,8
4182 movq -48(%rsi),%r15
4183.cfi_restore %r15
4184 movq -40(%rsi),%r14
4185.cfi_restore %r14
4186 movq -32(%rsi),%r13
4187.cfi_restore %r13
4188 movq -24(%rsi),%r12
4189.cfi_restore %r12
4190 movq -16(%rsi),%rbx
4191.cfi_restore %rbx
4192 movq -8(%rsi),%rbp
4193.cfi_restore %rbp
4194 leaq (%rsi),%rsp
4195.cfi_def_cfa_register %rsp
4196.Lpoint_addx_epilogue:
4197 .byte 0xf3,0xc3
4198.cfi_endproc
4199.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
4200.type ecp_nistz256_point_add_affinex,@function
4201.align 32
4202ecp_nistz256_point_add_affinex:
4203.cfi_startproc
4204.Lpoint_add_affinex:
4205 pushq %rbp
4206.cfi_adjust_cfa_offset 8
4207.cfi_offset %rbp,-16
4208 pushq %rbx
4209.cfi_adjust_cfa_offset 8
4210.cfi_offset %rbx,-24
4211 pushq %r12
4212.cfi_adjust_cfa_offset 8
4213.cfi_offset %r12,-32
4214 pushq %r13
4215.cfi_adjust_cfa_offset 8
4216.cfi_offset %r13,-40
4217 pushq %r14
4218.cfi_adjust_cfa_offset 8
4219.cfi_offset %r14,-48
4220 pushq %r15
4221.cfi_adjust_cfa_offset 8
4222.cfi_offset %r15,-56
4223 subq $480+8,%rsp
4224.cfi_adjust_cfa_offset 32*15+8
4225.Ladd_affinex_body:
4226
4227 movdqu 0(%rsi),%xmm0
4228 movq %rdx,%rbx
4229 movdqu 16(%rsi),%xmm1
4230 movdqu 32(%rsi),%xmm2
4231 movdqu 48(%rsi),%xmm3
4232 movdqu 64(%rsi),%xmm4
4233 movdqu 80(%rsi),%xmm5
4234 movq 64+0(%rsi),%rdx
4235 movq 64+8(%rsi),%r14
4236 movq 64+16(%rsi),%r15
4237 movq 64+24(%rsi),%r8
4238 movdqa %xmm0,320(%rsp)
4239 movdqa %xmm1,320+16(%rsp)
4240 movdqa %xmm2,352(%rsp)
4241 movdqa %xmm3,352+16(%rsp)
4242 movdqa %xmm4,384(%rsp)
4243 movdqa %xmm5,384+16(%rsp)
4244 por %xmm4,%xmm5
4245
4246 movdqu 0(%rbx),%xmm0
4247 pshufd $0xb1,%xmm5,%xmm3
4248 movdqu 16(%rbx),%xmm1
4249 movdqu 32(%rbx),%xmm2
4250 por %xmm3,%xmm5
4251 movdqu 48(%rbx),%xmm3
4252 movdqa %xmm0,416(%rsp)
4253 pshufd $0x1e,%xmm5,%xmm4
4254 movdqa %xmm1,416+16(%rsp)
4255 por %xmm0,%xmm1
4256.byte 102,72,15,110,199
4257 movdqa %xmm2,448(%rsp)
4258 movdqa %xmm3,448+16(%rsp)
4259 por %xmm2,%xmm3
4260 por %xmm4,%xmm5
4261 pxor %xmm4,%xmm4
4262 por %xmm1,%xmm3
4263
4264 leaq 64-128(%rsi),%rsi
4265 leaq 32(%rsp),%rdi
4266 call __ecp_nistz256_sqr_montx
4267
4268 pcmpeqd %xmm4,%xmm5
4269 pshufd $0xb1,%xmm3,%xmm4
4270 movq 0(%rbx),%rdx
4271
4272 movq %r12,%r9
4273 por %xmm3,%xmm4
4274 pshufd $0,%xmm5,%xmm5
4275 pshufd $0x1e,%xmm4,%xmm3
4276 movq %r13,%r10
4277 por %xmm3,%xmm4
4278 pxor %xmm3,%xmm3
4279 movq %r14,%r11
4280 pcmpeqd %xmm3,%xmm4
4281 pshufd $0,%xmm4,%xmm4
4282
4283 leaq 32-128(%rsp),%rsi
4284 movq %r15,%r12
4285 leaq 0(%rsp),%rdi
4286 call __ecp_nistz256_mul_montx
4287
4288 leaq 320(%rsp),%rbx
4289 leaq 64(%rsp),%rdi
4290 call __ecp_nistz256_sub_fromx
4291
4292 movq 384(%rsp),%rdx
4293 leaq 384(%rsp),%rbx
4294 movq 0+32(%rsp),%r9
4295 movq 8+32(%rsp),%r10
4296 leaq -128+32(%rsp),%rsi
4297 movq 16+32(%rsp),%r11
4298 movq 24+32(%rsp),%r12
4299 leaq 32(%rsp),%rdi
4300 call __ecp_nistz256_mul_montx
4301
4302 movq 384(%rsp),%rdx
4303 leaq 384(%rsp),%rbx
4304 movq 0+64(%rsp),%r9
4305 movq 8+64(%rsp),%r10
4306 leaq -128+64(%rsp),%rsi
4307 movq 16+64(%rsp),%r11
4308 movq 24+64(%rsp),%r12
4309 leaq 288(%rsp),%rdi
4310 call __ecp_nistz256_mul_montx
4311
4312 movq 448(%rsp),%rdx
4313 leaq 448(%rsp),%rbx
4314 movq 0+32(%rsp),%r9
4315 movq 8+32(%rsp),%r10
4316 leaq -128+32(%rsp),%rsi
4317 movq 16+32(%rsp),%r11
4318 movq 24+32(%rsp),%r12
4319 leaq 32(%rsp),%rdi
4320 call __ecp_nistz256_mul_montx
4321
4322 leaq 352(%rsp),%rbx
4323 leaq 96(%rsp),%rdi
4324 call __ecp_nistz256_sub_fromx
4325
4326 movq 0+64(%rsp),%rdx
4327 movq 8+64(%rsp),%r14
4328 leaq -128+64(%rsp),%rsi
4329 movq 16+64(%rsp),%r15
4330 movq 24+64(%rsp),%r8
4331 leaq 128(%rsp),%rdi
4332 call __ecp_nistz256_sqr_montx
4333
4334 movq 0+96(%rsp),%rdx
4335 movq 8+96(%rsp),%r14
4336 leaq -128+96(%rsp),%rsi
4337 movq 16+96(%rsp),%r15
4338 movq 24+96(%rsp),%r8
4339 leaq 192(%rsp),%rdi
4340 call __ecp_nistz256_sqr_montx
4341
4342 movq 128(%rsp),%rdx
4343 leaq 128(%rsp),%rbx
4344 movq 0+64(%rsp),%r9
4345 movq 8+64(%rsp),%r10
4346 leaq -128+64(%rsp),%rsi
4347 movq 16+64(%rsp),%r11
4348 movq 24+64(%rsp),%r12
4349 leaq 160(%rsp),%rdi
4350 call __ecp_nistz256_mul_montx
4351
4352 movq 320(%rsp),%rdx
4353 leaq 320(%rsp),%rbx
4354 movq 0+128(%rsp),%r9
4355 movq 8+128(%rsp),%r10
4356 leaq -128+128(%rsp),%rsi
4357 movq 16+128(%rsp),%r11
4358 movq 24+128(%rsp),%r12
4359 leaq 0(%rsp),%rdi
4360 call __ecp_nistz256_mul_montx
4361
4362
4363
4364
4365 xorq %r11,%r11
4366 addq %r12,%r12
4367 leaq 192(%rsp),%rsi
4368 adcq %r13,%r13
4369 movq %r12,%rax
4370 adcq %r8,%r8
4371 adcq %r9,%r9
4372 movq %r13,%rbp
4373 adcq $0,%r11
4374
4375 subq $-1,%r12
4376 movq %r8,%rcx
4377 sbbq %r14,%r13
4378 sbbq $0,%r8
4379 movq %r9,%r10
4380 sbbq %r15,%r9
4381 sbbq $0,%r11
4382
4383 cmovcq %rax,%r12
4384 movq 0(%rsi),%rax
4385 cmovcq %rbp,%r13
4386 movq 8(%rsi),%rbp
4387 cmovcq %rcx,%r8
4388 movq 16(%rsi),%rcx
4389 cmovcq %r10,%r9
4390 movq 24(%rsi),%r10
4391
4392 call __ecp_nistz256_subx
4393
4394 leaq 160(%rsp),%rbx
4395 leaq 224(%rsp),%rdi
4396 call __ecp_nistz256_sub_fromx
4397
4398 movq 0+0(%rsp),%rax
4399 movq 0+8(%rsp),%rbp
4400 movq 0+16(%rsp),%rcx
4401 movq 0+24(%rsp),%r10
4402 leaq 64(%rsp),%rdi
4403
4404 call __ecp_nistz256_subx
4405
4406 movq %r12,0(%rdi)
4407 movq %r13,8(%rdi)
4408 movq %r8,16(%rdi)
4409 movq %r9,24(%rdi)
4410 movq 352(%rsp),%rdx
4411 leaq 352(%rsp),%rbx
4412 movq 0+160(%rsp),%r9
4413 movq 8+160(%rsp),%r10
4414 leaq -128+160(%rsp),%rsi
4415 movq 16+160(%rsp),%r11
4416 movq 24+160(%rsp),%r12
4417 leaq 32(%rsp),%rdi
4418 call __ecp_nistz256_mul_montx
4419
4420 movq 96(%rsp),%rdx
4421 leaq 96(%rsp),%rbx
4422 movq 0+64(%rsp),%r9
4423 movq 8+64(%rsp),%r10
4424 leaq -128+64(%rsp),%rsi
4425 movq 16+64(%rsp),%r11
4426 movq 24+64(%rsp),%r12
4427 leaq 64(%rsp),%rdi
4428 call __ecp_nistz256_mul_montx
4429
4430 leaq 32(%rsp),%rbx
4431 leaq 256(%rsp),%rdi
4432 call __ecp_nistz256_sub_fromx
4433
4434.byte 102,72,15,126,199
4435
4436 movdqa %xmm5,%xmm0
4437 movdqa %xmm5,%xmm1
4438 pandn 288(%rsp),%xmm0
4439 movdqa %xmm5,%xmm2
4440 pandn 288+16(%rsp),%xmm1
4441 movdqa %xmm5,%xmm3
4442 pand .LONE_mont(%rip),%xmm2
4443 pand .LONE_mont+16(%rip),%xmm3
4444 por %xmm0,%xmm2
4445 por %xmm1,%xmm3
4446
4447 movdqa %xmm4,%xmm0
4448 movdqa %xmm4,%xmm1
4449 pandn %xmm2,%xmm0
4450 movdqa %xmm4,%xmm2
4451 pandn %xmm3,%xmm1
4452 movdqa %xmm4,%xmm3
4453 pand 384(%rsp),%xmm2
4454 pand 384+16(%rsp),%xmm3
4455 por %xmm0,%xmm2
4456 por %xmm1,%xmm3
4457 movdqu %xmm2,64(%rdi)
4458 movdqu %xmm3,80(%rdi)
4459
4460 movdqa %xmm5,%xmm0
4461 movdqa %xmm5,%xmm1
4462 pandn 224(%rsp),%xmm0
4463 movdqa %xmm5,%xmm2
4464 pandn 224+16(%rsp),%xmm1
4465 movdqa %xmm5,%xmm3
4466 pand 416(%rsp),%xmm2
4467 pand 416+16(%rsp),%xmm3
4468 por %xmm0,%xmm2
4469 por %xmm1,%xmm3
4470
4471 movdqa %xmm4,%xmm0
4472 movdqa %xmm4,%xmm1
4473 pandn %xmm2,%xmm0
4474 movdqa %xmm4,%xmm2
4475 pandn %xmm3,%xmm1
4476 movdqa %xmm4,%xmm3
4477 pand 320(%rsp),%xmm2
4478 pand 320+16(%rsp),%xmm3
4479 por %xmm0,%xmm2
4480 por %xmm1,%xmm3
4481 movdqu %xmm2,0(%rdi)
4482 movdqu %xmm3,16(%rdi)
4483
4484 movdqa %xmm5,%xmm0
4485 movdqa %xmm5,%xmm1
4486 pandn 256(%rsp),%xmm0
4487 movdqa %xmm5,%xmm2
4488 pandn 256+16(%rsp),%xmm1
4489 movdqa %xmm5,%xmm3
4490 pand 448(%rsp),%xmm2
4491 pand 448+16(%rsp),%xmm3
4492 por %xmm0,%xmm2
4493 por %xmm1,%xmm3
4494
4495 movdqa %xmm4,%xmm0
4496 movdqa %xmm4,%xmm1
4497 pandn %xmm2,%xmm0
4498 movdqa %xmm4,%xmm2
4499 pandn %xmm3,%xmm1
4500 movdqa %xmm4,%xmm3
4501 pand 352(%rsp),%xmm2
4502 pand 352+16(%rsp),%xmm3
4503 por %xmm0,%xmm2
4504 por %xmm1,%xmm3
4505 movdqu %xmm2,32(%rdi)
4506 movdqu %xmm3,48(%rdi)
4507
4508 leaq 480+56(%rsp),%rsi
4509.cfi_def_cfa %rsi,8
4510 movq -48(%rsi),%r15
4511.cfi_restore %r15
4512 movq -40(%rsi),%r14
4513.cfi_restore %r14
4514 movq -32(%rsi),%r13
4515.cfi_restore %r13
4516 movq -24(%rsi),%r12
4517.cfi_restore %r12
4518 movq -16(%rsi),%rbx
4519.cfi_restore %rbx
4520 movq -8(%rsi),%rbp
4521.cfi_restore %rbp
4522 leaq (%rsi),%rsp
4523.cfi_def_cfa_register %rsp
4524.Ladd_affinex_epilogue:
4525 .byte 0xf3,0xc3
4526.cfi_endproc
4527.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
Adam Langleyfad63272015-11-12 12:15:39 -08004528#endif