blob: 82953469514ff21ce94005388608fc5e9696a362 [file] [log] [blame]
Robert Sloan6f79a502017-04-03 09:16:40 -07001#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
Adam Langleyfad63272015-11-12 12:15:39 -08002.text
3
4
5
6.p2align 6
7L$poly:
8.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
9
Adam Langleyfad63272015-11-12 12:15:39 -080010L$One:
11.long 1,1,1,1,1,1,1,1
12L$Two:
13.long 2,2,2,2,2,2,2,2
14L$Three:
15.long 3,3,3,3,3,3,3,3
16L$ONE_mont:
17.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
18
Adam Langleyfad63272015-11-12 12:15:39 -080019
Adam Langleyfad63272015-11-12 12:15:39 -080020
Adam Langleyfad63272015-11-12 12:15:39 -080021.globl _ecp_nistz256_neg
22.private_extern _ecp_nistz256_neg
23
24.p2align 5
25_ecp_nistz256_neg:
Robert Sloanab8b8882018-03-26 11:39:51 -070026
Adam Langleyfad63272015-11-12 12:15:39 -080027 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -070028
Adam Langleyfad63272015-11-12 12:15:39 -080029 pushq %r13
30
Robert Sloanab8b8882018-03-26 11:39:51 -070031L$neg_body:
32
Adam Langleyfad63272015-11-12 12:15:39 -080033 xorq %r8,%r8
34 xorq %r9,%r9
35 xorq %r10,%r10
36 xorq %r11,%r11
37 xorq %r13,%r13
38
39 subq 0(%rsi),%r8
40 sbbq 8(%rsi),%r9
41 sbbq 16(%rsi),%r10
42 movq %r8,%rax
43 sbbq 24(%rsi),%r11
44 leaq L$poly(%rip),%rsi
45 movq %r9,%rdx
46 sbbq $0,%r13
47
48 addq 0(%rsi),%r8
49 movq %r10,%rcx
50 adcq 8(%rsi),%r9
51 adcq 16(%rsi),%r10
52 movq %r11,%r12
53 adcq 24(%rsi),%r11
54 testq %r13,%r13
55
56 cmovzq %rax,%r8
57 cmovzq %rdx,%r9
58 movq %r8,0(%rdi)
59 cmovzq %rcx,%r10
60 movq %r9,8(%rdi)
61 cmovzq %r12,%r11
62 movq %r10,16(%rdi)
63 movq %r11,24(%rdi)
64
Robert Sloanab8b8882018-03-26 11:39:51 -070065 movq 0(%rsp),%r13
66
67 movq 8(%rsp),%r12
68
69 leaq 16(%rsp),%rsp
70
71L$neg_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -080072 .byte 0xf3,0xc3
73
74
75
76
77
Adam Langleyfad63272015-11-12 12:15:39 -080078
79
Robert Sloanab8b8882018-03-26 11:39:51 -070080
Adam Langleyfad63272015-11-12 12:15:39 -080081.globl _ecp_nistz256_mul_mont
82.private_extern _ecp_nistz256_mul_mont
83
84.p2align 5
85_ecp_nistz256_mul_mont:
Robert Sloanab8b8882018-03-26 11:39:51 -070086
Adam Langleyfad63272015-11-12 12:15:39 -080087L$mul_mont:
88 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -070089
Adam Langleyfad63272015-11-12 12:15:39 -080090 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -070091
Adam Langleyfad63272015-11-12 12:15:39 -080092 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -070093
Adam Langleyfad63272015-11-12 12:15:39 -080094 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -070095
Adam Langleyfad63272015-11-12 12:15:39 -080096 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -070097
Adam Langleyfad63272015-11-12 12:15:39 -080098 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -070099
100L$mul_body:
Adam Langleyfad63272015-11-12 12:15:39 -0800101 movq %rdx,%rbx
102 movq 0(%rdx),%rax
103 movq 0(%rsi),%r9
104 movq 8(%rsi),%r10
105 movq 16(%rsi),%r11
106 movq 24(%rsi),%r12
107
108 call __ecp_nistz256_mul_montq
109L$mul_mont_done:
Robert Sloanab8b8882018-03-26 11:39:51 -0700110 movq 0(%rsp),%r15
111
112 movq 8(%rsp),%r14
113
114 movq 16(%rsp),%r13
115
116 movq 24(%rsp),%r12
117
118 movq 32(%rsp),%rbx
119
120 movq 40(%rsp),%rbp
121
122 leaq 48(%rsp),%rsp
123
124L$mul_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -0800125 .byte 0xf3,0xc3
126
127
128
Robert Sloanab8b8882018-03-26 11:39:51 -0700129
Adam Langleyfad63272015-11-12 12:15:39 -0800130.p2align 5
131__ecp_nistz256_mul_montq:
132
133
134 movq %rax,%rbp
135 mulq %r9
136 movq L$poly+8(%rip),%r14
137 movq %rax,%r8
138 movq %rbp,%rax
139 movq %rdx,%r9
140
141 mulq %r10
142 movq L$poly+24(%rip),%r15
143 addq %rax,%r9
144 movq %rbp,%rax
145 adcq $0,%rdx
146 movq %rdx,%r10
147
148 mulq %r11
149 addq %rax,%r10
150 movq %rbp,%rax
151 adcq $0,%rdx
152 movq %rdx,%r11
153
154 mulq %r12
155 addq %rax,%r11
156 movq %r8,%rax
157 adcq $0,%rdx
158 xorq %r13,%r13
159 movq %rdx,%r12
160
161
162
163
164
165
166
167
168
169
170 movq %r8,%rbp
171 shlq $32,%r8
172 mulq %r15
173 shrq $32,%rbp
174 addq %r8,%r9
175 adcq %rbp,%r10
176 adcq %rax,%r11
177 movq 8(%rbx),%rax
178 adcq %rdx,%r12
179 adcq $0,%r13
180 xorq %r8,%r8
181
182
183
184 movq %rax,%rbp
185 mulq 0(%rsi)
186 addq %rax,%r9
187 movq %rbp,%rax
188 adcq $0,%rdx
189 movq %rdx,%rcx
190
191 mulq 8(%rsi)
192 addq %rcx,%r10
193 adcq $0,%rdx
194 addq %rax,%r10
195 movq %rbp,%rax
196 adcq $0,%rdx
197 movq %rdx,%rcx
198
199 mulq 16(%rsi)
200 addq %rcx,%r11
201 adcq $0,%rdx
202 addq %rax,%r11
203 movq %rbp,%rax
204 adcq $0,%rdx
205 movq %rdx,%rcx
206
207 mulq 24(%rsi)
208 addq %rcx,%r12
209 adcq $0,%rdx
210 addq %rax,%r12
211 movq %r9,%rax
212 adcq %rdx,%r13
213 adcq $0,%r8
214
215
216
217 movq %r9,%rbp
218 shlq $32,%r9
219 mulq %r15
220 shrq $32,%rbp
221 addq %r9,%r10
222 adcq %rbp,%r11
223 adcq %rax,%r12
224 movq 16(%rbx),%rax
225 adcq %rdx,%r13
226 adcq $0,%r8
227 xorq %r9,%r9
228
229
230
231 movq %rax,%rbp
232 mulq 0(%rsi)
233 addq %rax,%r10
234 movq %rbp,%rax
235 adcq $0,%rdx
236 movq %rdx,%rcx
237
238 mulq 8(%rsi)
239 addq %rcx,%r11
240 adcq $0,%rdx
241 addq %rax,%r11
242 movq %rbp,%rax
243 adcq $0,%rdx
244 movq %rdx,%rcx
245
246 mulq 16(%rsi)
247 addq %rcx,%r12
248 adcq $0,%rdx
249 addq %rax,%r12
250 movq %rbp,%rax
251 adcq $0,%rdx
252 movq %rdx,%rcx
253
254 mulq 24(%rsi)
255 addq %rcx,%r13
256 adcq $0,%rdx
257 addq %rax,%r13
258 movq %r10,%rax
259 adcq %rdx,%r8
260 adcq $0,%r9
261
262
263
264 movq %r10,%rbp
265 shlq $32,%r10
266 mulq %r15
267 shrq $32,%rbp
268 addq %r10,%r11
269 adcq %rbp,%r12
270 adcq %rax,%r13
271 movq 24(%rbx),%rax
272 adcq %rdx,%r8
273 adcq $0,%r9
274 xorq %r10,%r10
275
276
277
278 movq %rax,%rbp
279 mulq 0(%rsi)
280 addq %rax,%r11
281 movq %rbp,%rax
282 adcq $0,%rdx
283 movq %rdx,%rcx
284
285 mulq 8(%rsi)
286 addq %rcx,%r12
287 adcq $0,%rdx
288 addq %rax,%r12
289 movq %rbp,%rax
290 adcq $0,%rdx
291 movq %rdx,%rcx
292
293 mulq 16(%rsi)
294 addq %rcx,%r13
295 adcq $0,%rdx
296 addq %rax,%r13
297 movq %rbp,%rax
298 adcq $0,%rdx
299 movq %rdx,%rcx
300
301 mulq 24(%rsi)
302 addq %rcx,%r8
303 adcq $0,%rdx
304 addq %rax,%r8
305 movq %r11,%rax
306 adcq %rdx,%r9
307 adcq $0,%r10
308
309
310
311 movq %r11,%rbp
312 shlq $32,%r11
313 mulq %r15
314 shrq $32,%rbp
315 addq %r11,%r12
316 adcq %rbp,%r13
317 movq %r12,%rcx
318 adcq %rax,%r8
319 adcq %rdx,%r9
320 movq %r13,%rbp
321 adcq $0,%r10
322
323
324
325 subq $-1,%r12
326 movq %r8,%rbx
327 sbbq %r14,%r13
328 sbbq $0,%r8
329 movq %r9,%rdx
330 sbbq %r15,%r9
331 sbbq $0,%r10
332
333 cmovcq %rcx,%r12
334 cmovcq %rbp,%r13
335 movq %r12,0(%rdi)
336 cmovcq %rbx,%r8
337 movq %r13,8(%rdi)
338 cmovcq %rdx,%r9
339 movq %r8,16(%rdi)
340 movq %r9,24(%rdi)
341
342 .byte 0xf3,0xc3
343
344
345
346
347
348
349
350
351
352.globl _ecp_nistz256_sqr_mont
353.private_extern _ecp_nistz256_sqr_mont
354
355.p2align 5
356_ecp_nistz256_sqr_mont:
Robert Sloanab8b8882018-03-26 11:39:51 -0700357
Adam Langleyfad63272015-11-12 12:15:39 -0800358 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -0700359
Adam Langleyfad63272015-11-12 12:15:39 -0800360 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -0700361
Adam Langleyfad63272015-11-12 12:15:39 -0800362 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -0700363
Adam Langleyfad63272015-11-12 12:15:39 -0800364 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -0700365
Adam Langleyfad63272015-11-12 12:15:39 -0800366 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -0700367
Adam Langleyfad63272015-11-12 12:15:39 -0800368 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -0700369
370L$sqr_body:
Adam Langleyfad63272015-11-12 12:15:39 -0800371 movq 0(%rsi),%rax
372 movq 8(%rsi),%r14
373 movq 16(%rsi),%r15
374 movq 24(%rsi),%r8
375
376 call __ecp_nistz256_sqr_montq
377L$sqr_mont_done:
Robert Sloanab8b8882018-03-26 11:39:51 -0700378 movq 0(%rsp),%r15
379
380 movq 8(%rsp),%r14
381
382 movq 16(%rsp),%r13
383
384 movq 24(%rsp),%r12
385
386 movq 32(%rsp),%rbx
387
388 movq 40(%rsp),%rbp
389
390 leaq 48(%rsp),%rsp
391
392L$sqr_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -0800393 .byte 0xf3,0xc3
394
395
396
Robert Sloanab8b8882018-03-26 11:39:51 -0700397
Adam Langleyfad63272015-11-12 12:15:39 -0800398.p2align 5
399__ecp_nistz256_sqr_montq:
400 movq %rax,%r13
401 mulq %r14
402 movq %rax,%r9
403 movq %r15,%rax
404 movq %rdx,%r10
405
406 mulq %r13
407 addq %rax,%r10
408 movq %r8,%rax
409 adcq $0,%rdx
410 movq %rdx,%r11
411
412 mulq %r13
413 addq %rax,%r11
414 movq %r15,%rax
415 adcq $0,%rdx
416 movq %rdx,%r12
417
418
419 mulq %r14
420 addq %rax,%r11
421 movq %r8,%rax
422 adcq $0,%rdx
423 movq %rdx,%rbp
424
425 mulq %r14
426 addq %rax,%r12
427 movq %r8,%rax
428 adcq $0,%rdx
429 addq %rbp,%r12
430 movq %rdx,%r13
431 adcq $0,%r13
432
433
434 mulq %r15
435 xorq %r15,%r15
436 addq %rax,%r13
437 movq 0(%rsi),%rax
438 movq %rdx,%r14
439 adcq $0,%r14
440
441 addq %r9,%r9
442 adcq %r10,%r10
443 adcq %r11,%r11
444 adcq %r12,%r12
445 adcq %r13,%r13
446 adcq %r14,%r14
447 adcq $0,%r15
448
449 mulq %rax
450 movq %rax,%r8
451 movq 8(%rsi),%rax
452 movq %rdx,%rcx
453
454 mulq %rax
455 addq %rcx,%r9
456 adcq %rax,%r10
457 movq 16(%rsi),%rax
458 adcq $0,%rdx
459 movq %rdx,%rcx
460
461 mulq %rax
462 addq %rcx,%r11
463 adcq %rax,%r12
464 movq 24(%rsi),%rax
465 adcq $0,%rdx
466 movq %rdx,%rcx
467
468 mulq %rax
469 addq %rcx,%r13
470 adcq %rax,%r14
471 movq %r8,%rax
472 adcq %rdx,%r15
473
474 movq L$poly+8(%rip),%rsi
475 movq L$poly+24(%rip),%rbp
476
477
478
479
480 movq %r8,%rcx
481 shlq $32,%r8
482 mulq %rbp
483 shrq $32,%rcx
484 addq %r8,%r9
485 adcq %rcx,%r10
486 adcq %rax,%r11
487 movq %r9,%rax
488 adcq $0,%rdx
489
490
491
492 movq %r9,%rcx
493 shlq $32,%r9
494 movq %rdx,%r8
495 mulq %rbp
496 shrq $32,%rcx
497 addq %r9,%r10
498 adcq %rcx,%r11
499 adcq %rax,%r8
500 movq %r10,%rax
501 adcq $0,%rdx
502
503
504
505 movq %r10,%rcx
506 shlq $32,%r10
507 movq %rdx,%r9
508 mulq %rbp
509 shrq $32,%rcx
510 addq %r10,%r11
511 adcq %rcx,%r8
512 adcq %rax,%r9
513 movq %r11,%rax
514 adcq $0,%rdx
515
516
517
518 movq %r11,%rcx
519 shlq $32,%r11
520 movq %rdx,%r10
521 mulq %rbp
522 shrq $32,%rcx
523 addq %r11,%r8
524 adcq %rcx,%r9
525 adcq %rax,%r10
526 adcq $0,%rdx
527 xorq %r11,%r11
528
529
530
531 addq %r8,%r12
532 adcq %r9,%r13
533 movq %r12,%r8
534 adcq %r10,%r14
535 adcq %rdx,%r15
536 movq %r13,%r9
537 adcq $0,%r11
538
539 subq $-1,%r12
540 movq %r14,%r10
541 sbbq %rsi,%r13
542 sbbq $0,%r14
543 movq %r15,%rcx
544 sbbq %rbp,%r15
545 sbbq $0,%r11
546
547 cmovcq %r8,%r12
548 cmovcq %r9,%r13
549 movq %r12,0(%rdi)
550 cmovcq %r10,%r14
551 movq %r13,8(%rdi)
552 cmovcq %rcx,%r15
553 movq %r14,16(%rdi)
554 movq %r15,24(%rdi)
555
556 .byte 0xf3,0xc3
557
558
559
Adam Langleyfad63272015-11-12 12:15:39 -0800560.globl _ecp_nistz256_select_w5
561.private_extern _ecp_nistz256_select_w5
562
563.p2align 5
564_ecp_nistz256_select_w5:
Robert Sloan8f860b12017-08-28 07:37:06 -0700565 leaq _OPENSSL_ia32cap_P(%rip),%rax
566 movq 8(%rax),%rax
567 testl $32,%eax
568 jnz L$avx2_select_w5
Adam Langleyfad63272015-11-12 12:15:39 -0800569 movdqa L$One(%rip),%xmm0
570 movd %edx,%xmm1
571
572 pxor %xmm2,%xmm2
573 pxor %xmm3,%xmm3
574 pxor %xmm4,%xmm4
575 pxor %xmm5,%xmm5
576 pxor %xmm6,%xmm6
577 pxor %xmm7,%xmm7
578
579 movdqa %xmm0,%xmm8
580 pshufd $0,%xmm1,%xmm1
581
582 movq $16,%rax
583L$select_loop_sse_w5:
584
585 movdqa %xmm8,%xmm15
586 paddd %xmm0,%xmm8
587 pcmpeqd %xmm1,%xmm15
588
589 movdqa 0(%rsi),%xmm9
590 movdqa 16(%rsi),%xmm10
591 movdqa 32(%rsi),%xmm11
592 movdqa 48(%rsi),%xmm12
593 movdqa 64(%rsi),%xmm13
594 movdqa 80(%rsi),%xmm14
595 leaq 96(%rsi),%rsi
596
597 pand %xmm15,%xmm9
598 pand %xmm15,%xmm10
599 por %xmm9,%xmm2
600 pand %xmm15,%xmm11
601 por %xmm10,%xmm3
602 pand %xmm15,%xmm12
603 por %xmm11,%xmm4
604 pand %xmm15,%xmm13
605 por %xmm12,%xmm5
606 pand %xmm15,%xmm14
607 por %xmm13,%xmm6
608 por %xmm14,%xmm7
609
610 decq %rax
611 jnz L$select_loop_sse_w5
612
613 movdqu %xmm2,0(%rdi)
614 movdqu %xmm3,16(%rdi)
615 movdqu %xmm4,32(%rdi)
616 movdqu %xmm5,48(%rdi)
617 movdqu %xmm6,64(%rdi)
618 movdqu %xmm7,80(%rdi)
619 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -0700620L$SEH_end_ecp_nistz256_select_w5:
Adam Langleyfad63272015-11-12 12:15:39 -0800621
622
623
624
625.globl _ecp_nistz256_select_w7
626.private_extern _ecp_nistz256_select_w7
627
628.p2align 5
629_ecp_nistz256_select_w7:
Robert Sloan8f860b12017-08-28 07:37:06 -0700630 leaq _OPENSSL_ia32cap_P(%rip),%rax
631 movq 8(%rax),%rax
632 testl $32,%eax
633 jnz L$avx2_select_w7
Adam Langleyfad63272015-11-12 12:15:39 -0800634 movdqa L$One(%rip),%xmm8
635 movd %edx,%xmm1
636
637 pxor %xmm2,%xmm2
638 pxor %xmm3,%xmm3
639 pxor %xmm4,%xmm4
640 pxor %xmm5,%xmm5
641
642 movdqa %xmm8,%xmm0
643 pshufd $0,%xmm1,%xmm1
644 movq $64,%rax
645
646L$select_loop_sse_w7:
647 movdqa %xmm8,%xmm15
648 paddd %xmm0,%xmm8
649 movdqa 0(%rsi),%xmm9
650 movdqa 16(%rsi),%xmm10
651 pcmpeqd %xmm1,%xmm15
652 movdqa 32(%rsi),%xmm11
653 movdqa 48(%rsi),%xmm12
654 leaq 64(%rsi),%rsi
655
656 pand %xmm15,%xmm9
657 pand %xmm15,%xmm10
658 por %xmm9,%xmm2
659 pand %xmm15,%xmm11
660 por %xmm10,%xmm3
661 pand %xmm15,%xmm12
662 por %xmm11,%xmm4
663 prefetcht0 255(%rsi)
664 por %xmm12,%xmm5
665
666 decq %rax
667 jnz L$select_loop_sse_w7
668
669 movdqu %xmm2,0(%rdi)
670 movdqu %xmm3,16(%rdi)
671 movdqu %xmm4,32(%rdi)
672 movdqu %xmm5,48(%rdi)
673 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -0700674L$SEH_end_ecp_nistz256_select_w7:
Adam Langleyfad63272015-11-12 12:15:39 -0800675
Robert Sloan8f860b12017-08-28 07:37:06 -0700676
677
678
679.p2align 5
680ecp_nistz256_avx2_select_w5:
681L$avx2_select_w5:
682 vzeroupper
683 vmovdqa L$Two(%rip),%ymm0
684
685 vpxor %ymm2,%ymm2,%ymm2
686 vpxor %ymm3,%ymm3,%ymm3
687 vpxor %ymm4,%ymm4,%ymm4
688
689 vmovdqa L$One(%rip),%ymm5
690 vmovdqa L$Two(%rip),%ymm10
691
692 vmovd %edx,%xmm1
693 vpermd %ymm1,%ymm2,%ymm1
694
695 movq $8,%rax
696L$select_loop_avx2_w5:
697
698 vmovdqa 0(%rsi),%ymm6
699 vmovdqa 32(%rsi),%ymm7
700 vmovdqa 64(%rsi),%ymm8
701
702 vmovdqa 96(%rsi),%ymm11
703 vmovdqa 128(%rsi),%ymm12
704 vmovdqa 160(%rsi),%ymm13
705
706 vpcmpeqd %ymm1,%ymm5,%ymm9
707 vpcmpeqd %ymm1,%ymm10,%ymm14
708
709 vpaddd %ymm0,%ymm5,%ymm5
710 vpaddd %ymm0,%ymm10,%ymm10
711 leaq 192(%rsi),%rsi
712
713 vpand %ymm9,%ymm6,%ymm6
714 vpand %ymm9,%ymm7,%ymm7
715 vpand %ymm9,%ymm8,%ymm8
716 vpand %ymm14,%ymm11,%ymm11
717 vpand %ymm14,%ymm12,%ymm12
718 vpand %ymm14,%ymm13,%ymm13
719
720 vpxor %ymm6,%ymm2,%ymm2
721 vpxor %ymm7,%ymm3,%ymm3
722 vpxor %ymm8,%ymm4,%ymm4
723 vpxor %ymm11,%ymm2,%ymm2
724 vpxor %ymm12,%ymm3,%ymm3
725 vpxor %ymm13,%ymm4,%ymm4
726
727 decq %rax
728 jnz L$select_loop_avx2_w5
729
730 vmovdqu %ymm2,0(%rdi)
731 vmovdqu %ymm3,32(%rdi)
732 vmovdqu %ymm4,64(%rdi)
733 vzeroupper
734 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -0700735L$SEH_end_ecp_nistz256_avx2_select_w5:
Robert Sloan8f860b12017-08-28 07:37:06 -0700736
737
738
739
Adam Langleyfad63272015-11-12 12:15:39 -0800740.globl _ecp_nistz256_avx2_select_w7
741.private_extern _ecp_nistz256_avx2_select_w7
742
743.p2align 5
744_ecp_nistz256_avx2_select_w7:
Robert Sloan8f860b12017-08-28 07:37:06 -0700745L$avx2_select_w7:
746 vzeroupper
747 vmovdqa L$Three(%rip),%ymm0
748
749 vpxor %ymm2,%ymm2,%ymm2
750 vpxor %ymm3,%ymm3,%ymm3
751
752 vmovdqa L$One(%rip),%ymm4
753 vmovdqa L$Two(%rip),%ymm8
754 vmovdqa L$Three(%rip),%ymm12
755
756 vmovd %edx,%xmm1
757 vpermd %ymm1,%ymm2,%ymm1
758
759
760 movq $21,%rax
761L$select_loop_avx2_w7:
762
763 vmovdqa 0(%rsi),%ymm5
764 vmovdqa 32(%rsi),%ymm6
765
766 vmovdqa 64(%rsi),%ymm9
767 vmovdqa 96(%rsi),%ymm10
768
769 vmovdqa 128(%rsi),%ymm13
770 vmovdqa 160(%rsi),%ymm14
771
772 vpcmpeqd %ymm1,%ymm4,%ymm7
773 vpcmpeqd %ymm1,%ymm8,%ymm11
774 vpcmpeqd %ymm1,%ymm12,%ymm15
775
776 vpaddd %ymm0,%ymm4,%ymm4
777 vpaddd %ymm0,%ymm8,%ymm8
778 vpaddd %ymm0,%ymm12,%ymm12
779 leaq 192(%rsi),%rsi
780
781 vpand %ymm7,%ymm5,%ymm5
782 vpand %ymm7,%ymm6,%ymm6
783 vpand %ymm11,%ymm9,%ymm9
784 vpand %ymm11,%ymm10,%ymm10
785 vpand %ymm15,%ymm13,%ymm13
786 vpand %ymm15,%ymm14,%ymm14
787
788 vpxor %ymm5,%ymm2,%ymm2
789 vpxor %ymm6,%ymm3,%ymm3
790 vpxor %ymm9,%ymm2,%ymm2
791 vpxor %ymm10,%ymm3,%ymm3
792 vpxor %ymm13,%ymm2,%ymm2
793 vpxor %ymm14,%ymm3,%ymm3
794
795 decq %rax
796 jnz L$select_loop_avx2_w7
797
798
799 vmovdqa 0(%rsi),%ymm5
800 vmovdqa 32(%rsi),%ymm6
801
802 vpcmpeqd %ymm1,%ymm4,%ymm7
803
804 vpand %ymm7,%ymm5,%ymm5
805 vpand %ymm7,%ymm6,%ymm6
806
807 vpxor %ymm5,%ymm2,%ymm2
808 vpxor %ymm6,%ymm3,%ymm3
809
810 vmovdqu %ymm2,0(%rdi)
811 vmovdqu %ymm3,32(%rdi)
812 vzeroupper
Adam Langleyfad63272015-11-12 12:15:39 -0800813 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -0700814L$SEH_end_ecp_nistz256_avx2_select_w7:
Adam Langleyfad63272015-11-12 12:15:39 -0800815
816
817.p2align 5
818__ecp_nistz256_add_toq:
Steven Valdez909b19f2016-11-21 15:35:44 -0500819 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -0800820 addq 0(%rbx),%r12
821 adcq 8(%rbx),%r13
822 movq %r12,%rax
823 adcq 16(%rbx),%r8
824 adcq 24(%rbx),%r9
825 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -0500826 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -0800827
828 subq $-1,%r12
829 movq %r8,%rcx
830 sbbq %r14,%r13
831 sbbq $0,%r8
832 movq %r9,%r10
833 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -0500834 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -0800835
Steven Valdez909b19f2016-11-21 15:35:44 -0500836 cmovcq %rax,%r12
837 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -0800838 movq %r12,0(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -0500839 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -0800840 movq %r13,8(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -0500841 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -0800842 movq %r8,16(%rdi)
843 movq %r9,24(%rdi)
844
845 .byte 0xf3,0xc3
846
847
848
849.p2align 5
850__ecp_nistz256_sub_fromq:
851 subq 0(%rbx),%r12
852 sbbq 8(%rbx),%r13
853 movq %r12,%rax
854 sbbq 16(%rbx),%r8
855 sbbq 24(%rbx),%r9
856 movq %r13,%rbp
857 sbbq %r11,%r11
858
859 addq $-1,%r12
860 movq %r8,%rcx
861 adcq %r14,%r13
862 adcq $0,%r8
863 movq %r9,%r10
864 adcq %r15,%r9
865 testq %r11,%r11
866
867 cmovzq %rax,%r12
868 cmovzq %rbp,%r13
869 movq %r12,0(%rdi)
870 cmovzq %rcx,%r8
871 movq %r13,8(%rdi)
872 cmovzq %r10,%r9
873 movq %r8,16(%rdi)
874 movq %r9,24(%rdi)
875
876 .byte 0xf3,0xc3
877
878
879
880.p2align 5
881__ecp_nistz256_subq:
882 subq %r12,%rax
883 sbbq %r13,%rbp
884 movq %rax,%r12
885 sbbq %r8,%rcx
886 sbbq %r9,%r10
887 movq %rbp,%r13
888 sbbq %r11,%r11
889
890 addq $-1,%rax
891 movq %rcx,%r8
892 adcq %r14,%rbp
893 adcq $0,%rcx
894 movq %r10,%r9
895 adcq %r15,%r10
896 testq %r11,%r11
897
898 cmovnzq %rax,%r12
899 cmovnzq %rbp,%r13
900 cmovnzq %rcx,%r8
901 cmovnzq %r10,%r9
902
903 .byte 0xf3,0xc3
904
905
906
907.p2align 5
908__ecp_nistz256_mul_by_2q:
Steven Valdez909b19f2016-11-21 15:35:44 -0500909 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -0800910 addq %r12,%r12
911 adcq %r13,%r13
912 movq %r12,%rax
913 adcq %r8,%r8
914 adcq %r9,%r9
915 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -0500916 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -0800917
918 subq $-1,%r12
919 movq %r8,%rcx
920 sbbq %r14,%r13
921 sbbq $0,%r8
922 movq %r9,%r10
923 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -0500924 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -0800925
Steven Valdez909b19f2016-11-21 15:35:44 -0500926 cmovcq %rax,%r12
927 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -0800928 movq %r12,0(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -0500929 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -0800930 movq %r13,8(%rdi)
Steven Valdez909b19f2016-11-21 15:35:44 -0500931 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -0800932 movq %r8,16(%rdi)
933 movq %r9,24(%rdi)
934
935 .byte 0xf3,0xc3
936
937.globl _ecp_nistz256_point_double
938.private_extern _ecp_nistz256_point_double
939
940.p2align 5
941_ecp_nistz256_point_double:
Robert Sloanab8b8882018-03-26 11:39:51 -0700942
Adam Langleyfad63272015-11-12 12:15:39 -0800943 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -0700944
Adam Langleyfad63272015-11-12 12:15:39 -0800945 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -0700946
Adam Langleyfad63272015-11-12 12:15:39 -0800947 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -0700948
Adam Langleyfad63272015-11-12 12:15:39 -0800949 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -0700950
Adam Langleyfad63272015-11-12 12:15:39 -0800951 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -0700952
Adam Langleyfad63272015-11-12 12:15:39 -0800953 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -0700954
Adam Langleyfad63272015-11-12 12:15:39 -0800955 subq $160+8,%rsp
956
Robert Sloanab8b8882018-03-26 11:39:51 -0700957L$point_doubleq_body:
958
David Benjamin4969cc92016-04-22 15:02:23 -0400959L$point_double_shortcutq:
Adam Langleyfad63272015-11-12 12:15:39 -0800960 movdqu 0(%rsi),%xmm0
961 movq %rsi,%rbx
962 movdqu 16(%rsi),%xmm1
963 movq 32+0(%rsi),%r12
964 movq 32+8(%rsi),%r13
965 movq 32+16(%rsi),%r8
966 movq 32+24(%rsi),%r9
967 movq L$poly+8(%rip),%r14
968 movq L$poly+24(%rip),%r15
969 movdqa %xmm0,96(%rsp)
970 movdqa %xmm1,96+16(%rsp)
971 leaq 32(%rdi),%r10
972 leaq 64(%rdi),%r11
973.byte 102,72,15,110,199
974.byte 102,73,15,110,202
975.byte 102,73,15,110,211
976
977 leaq 0(%rsp),%rdi
978 call __ecp_nistz256_mul_by_2q
979
980 movq 64+0(%rsi),%rax
981 movq 64+8(%rsi),%r14
982 movq 64+16(%rsi),%r15
983 movq 64+24(%rsi),%r8
984 leaq 64-0(%rsi),%rsi
985 leaq 64(%rsp),%rdi
986 call __ecp_nistz256_sqr_montq
987
988 movq 0+0(%rsp),%rax
989 movq 8+0(%rsp),%r14
990 leaq 0+0(%rsp),%rsi
991 movq 16+0(%rsp),%r15
992 movq 24+0(%rsp),%r8
993 leaq 0(%rsp),%rdi
994 call __ecp_nistz256_sqr_montq
995
996 movq 32(%rbx),%rax
997 movq 64+0(%rbx),%r9
998 movq 64+8(%rbx),%r10
999 movq 64+16(%rbx),%r11
1000 movq 64+24(%rbx),%r12
1001 leaq 64-0(%rbx),%rsi
1002 leaq 32(%rbx),%rbx
1003.byte 102,72,15,126,215
1004 call __ecp_nistz256_mul_montq
1005 call __ecp_nistz256_mul_by_2q
1006
1007 movq 96+0(%rsp),%r12
1008 movq 96+8(%rsp),%r13
1009 leaq 64(%rsp),%rbx
1010 movq 96+16(%rsp),%r8
1011 movq 96+24(%rsp),%r9
1012 leaq 32(%rsp),%rdi
1013 call __ecp_nistz256_add_toq
1014
1015 movq 96+0(%rsp),%r12
1016 movq 96+8(%rsp),%r13
1017 leaq 64(%rsp),%rbx
1018 movq 96+16(%rsp),%r8
1019 movq 96+24(%rsp),%r9
1020 leaq 64(%rsp),%rdi
1021 call __ecp_nistz256_sub_fromq
1022
1023 movq 0+0(%rsp),%rax
1024 movq 8+0(%rsp),%r14
1025 leaq 0+0(%rsp),%rsi
1026 movq 16+0(%rsp),%r15
1027 movq 24+0(%rsp),%r8
1028.byte 102,72,15,126,207
1029 call __ecp_nistz256_sqr_montq
1030 xorq %r9,%r9
1031 movq %r12,%rax
1032 addq $-1,%r12
1033 movq %r13,%r10
1034 adcq %rsi,%r13
1035 movq %r14,%rcx
1036 adcq $0,%r14
1037 movq %r15,%r8
1038 adcq %rbp,%r15
1039 adcq $0,%r9
1040 xorq %rsi,%rsi
1041 testq $1,%rax
1042
1043 cmovzq %rax,%r12
1044 cmovzq %r10,%r13
1045 cmovzq %rcx,%r14
1046 cmovzq %r8,%r15
1047 cmovzq %rsi,%r9
1048
1049 movq %r13,%rax
1050 shrq $1,%r12
1051 shlq $63,%rax
1052 movq %r14,%r10
1053 shrq $1,%r13
1054 orq %rax,%r12
1055 shlq $63,%r10
1056 movq %r15,%rcx
1057 shrq $1,%r14
1058 orq %r10,%r13
1059 shlq $63,%rcx
1060 movq %r12,0(%rdi)
1061 shrq $1,%r15
1062 movq %r13,8(%rdi)
1063 shlq $63,%r9
1064 orq %rcx,%r14
1065 orq %r9,%r15
1066 movq %r14,16(%rdi)
1067 movq %r15,24(%rdi)
1068 movq 64(%rsp),%rax
1069 leaq 64(%rsp),%rbx
1070 movq 0+32(%rsp),%r9
1071 movq 8+32(%rsp),%r10
1072 leaq 0+32(%rsp),%rsi
1073 movq 16+32(%rsp),%r11
1074 movq 24+32(%rsp),%r12
1075 leaq 32(%rsp),%rdi
1076 call __ecp_nistz256_mul_montq
1077
1078 leaq 128(%rsp),%rdi
1079 call __ecp_nistz256_mul_by_2q
1080
1081 leaq 32(%rsp),%rbx
1082 leaq 32(%rsp),%rdi
1083 call __ecp_nistz256_add_toq
1084
1085 movq 96(%rsp),%rax
1086 leaq 96(%rsp),%rbx
1087 movq 0+0(%rsp),%r9
1088 movq 8+0(%rsp),%r10
1089 leaq 0+0(%rsp),%rsi
1090 movq 16+0(%rsp),%r11
1091 movq 24+0(%rsp),%r12
1092 leaq 0(%rsp),%rdi
1093 call __ecp_nistz256_mul_montq
1094
1095 leaq 128(%rsp),%rdi
1096 call __ecp_nistz256_mul_by_2q
1097
1098 movq 0+32(%rsp),%rax
1099 movq 8+32(%rsp),%r14
1100 leaq 0+32(%rsp),%rsi
1101 movq 16+32(%rsp),%r15
1102 movq 24+32(%rsp),%r8
1103.byte 102,72,15,126,199
1104 call __ecp_nistz256_sqr_montq
1105
1106 leaq 128(%rsp),%rbx
1107 movq %r14,%r8
1108 movq %r15,%r9
1109 movq %rsi,%r14
1110 movq %rbp,%r15
1111 call __ecp_nistz256_sub_fromq
1112
1113 movq 0+0(%rsp),%rax
1114 movq 0+8(%rsp),%rbp
1115 movq 0+16(%rsp),%rcx
1116 movq 0+24(%rsp),%r10
1117 leaq 0(%rsp),%rdi
1118 call __ecp_nistz256_subq
1119
1120 movq 32(%rsp),%rax
1121 leaq 32(%rsp),%rbx
1122 movq %r12,%r14
1123 xorl %ecx,%ecx
1124 movq %r12,0+0(%rsp)
1125 movq %r13,%r10
1126 movq %r13,0+8(%rsp)
1127 cmovzq %r8,%r11
1128 movq %r8,0+16(%rsp)
1129 leaq 0-0(%rsp),%rsi
1130 cmovzq %r9,%r12
1131 movq %r9,0+24(%rsp)
1132 movq %r14,%r9
1133 leaq 0(%rsp),%rdi
1134 call __ecp_nistz256_mul_montq
1135
1136.byte 102,72,15,126,203
1137.byte 102,72,15,126,207
1138 call __ecp_nistz256_sub_fromq
1139
Robert Sloanab8b8882018-03-26 11:39:51 -07001140 leaq 160+56(%rsp),%rsi
1141
1142 movq -48(%rsi),%r15
1143
1144 movq -40(%rsi),%r14
1145
1146 movq -32(%rsi),%r13
1147
1148 movq -24(%rsi),%r12
1149
1150 movq -16(%rsi),%rbx
1151
1152 movq -8(%rsi),%rbp
1153
1154 leaq (%rsi),%rsp
1155
1156L$point_doubleq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08001157 .byte 0xf3,0xc3
1158
Robert Sloanab8b8882018-03-26 11:39:51 -07001159
Adam Langleyfad63272015-11-12 12:15:39 -08001160.globl _ecp_nistz256_point_add
1161.private_extern _ecp_nistz256_point_add
1162
1163.p2align 5
1164_ecp_nistz256_point_add:
Robert Sloanab8b8882018-03-26 11:39:51 -07001165
Adam Langleyfad63272015-11-12 12:15:39 -08001166 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001167
Adam Langleyfad63272015-11-12 12:15:39 -08001168 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07001169
Adam Langleyfad63272015-11-12 12:15:39 -08001170 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -07001171
Adam Langleyfad63272015-11-12 12:15:39 -08001172 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -07001173
Adam Langleyfad63272015-11-12 12:15:39 -08001174 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -07001175
Adam Langleyfad63272015-11-12 12:15:39 -08001176 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -07001177
Adam Langleyfad63272015-11-12 12:15:39 -08001178 subq $576+8,%rsp
1179
Robert Sloanab8b8882018-03-26 11:39:51 -07001180L$point_addq_body:
1181
Adam Langleyfad63272015-11-12 12:15:39 -08001182 movdqu 0(%rsi),%xmm0
1183 movdqu 16(%rsi),%xmm1
1184 movdqu 32(%rsi),%xmm2
1185 movdqu 48(%rsi),%xmm3
1186 movdqu 64(%rsi),%xmm4
1187 movdqu 80(%rsi),%xmm5
1188 movq %rsi,%rbx
1189 movq %rdx,%rsi
1190 movdqa %xmm0,384(%rsp)
1191 movdqa %xmm1,384+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08001192 movdqa %xmm2,416(%rsp)
1193 movdqa %xmm3,416+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08001194 movdqa %xmm4,448(%rsp)
1195 movdqa %xmm5,448+16(%rsp)
Steven Valdez909b19f2016-11-21 15:35:44 -05001196 por %xmm4,%xmm5
Adam Langleyfad63272015-11-12 12:15:39 -08001197
1198 movdqu 0(%rsi),%xmm0
Steven Valdez909b19f2016-11-21 15:35:44 -05001199 pshufd $0xb1,%xmm5,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08001200 movdqu 16(%rsi),%xmm1
1201 movdqu 32(%rsi),%xmm2
1202 por %xmm3,%xmm5
1203 movdqu 48(%rsi),%xmm3
1204 movq 64+0(%rsi),%rax
1205 movq 64+8(%rsi),%r14
1206 movq 64+16(%rsi),%r15
1207 movq 64+24(%rsi),%r8
1208 movdqa %xmm0,480(%rsp)
David Benjamin4969cc92016-04-22 15:02:23 -04001209 pshufd $0x1e,%xmm5,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08001210 movdqa %xmm1,480+16(%rsp)
Steven Valdez909b19f2016-11-21 15:35:44 -05001211 movdqu 64(%rsi),%xmm0
1212 movdqu 80(%rsi),%xmm1
Adam Langleyfad63272015-11-12 12:15:39 -08001213 movdqa %xmm2,512(%rsp)
1214 movdqa %xmm3,512+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08001215 por %xmm4,%xmm5
1216 pxor %xmm4,%xmm4
Steven Valdez909b19f2016-11-21 15:35:44 -05001217 por %xmm0,%xmm1
1218.byte 102,72,15,110,199
Adam Langleyfad63272015-11-12 12:15:39 -08001219
1220 leaq 64-0(%rsi),%rsi
1221 movq %rax,544+0(%rsp)
1222 movq %r14,544+8(%rsp)
1223 movq %r15,544+16(%rsp)
1224 movq %r8,544+24(%rsp)
1225 leaq 96(%rsp),%rdi
1226 call __ecp_nistz256_sqr_montq
1227
1228 pcmpeqd %xmm4,%xmm5
Steven Valdez909b19f2016-11-21 15:35:44 -05001229 pshufd $0xb1,%xmm1,%xmm4
1230 por %xmm1,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08001231 pshufd $0,%xmm5,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -04001232 pshufd $0x1e,%xmm4,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08001233 por %xmm3,%xmm4
1234 pxor %xmm3,%xmm3
1235 pcmpeqd %xmm3,%xmm4
1236 pshufd $0,%xmm4,%xmm4
1237 movq 64+0(%rbx),%rax
1238 movq 64+8(%rbx),%r14
1239 movq 64+16(%rbx),%r15
1240 movq 64+24(%rbx),%r8
David Benjamin4969cc92016-04-22 15:02:23 -04001241.byte 102,72,15,110,203
Adam Langleyfad63272015-11-12 12:15:39 -08001242
1243 leaq 64-0(%rbx),%rsi
1244 leaq 32(%rsp),%rdi
1245 call __ecp_nistz256_sqr_montq
1246
1247 movq 544(%rsp),%rax
1248 leaq 544(%rsp),%rbx
1249 movq 0+96(%rsp),%r9
1250 movq 8+96(%rsp),%r10
1251 leaq 0+96(%rsp),%rsi
1252 movq 16+96(%rsp),%r11
1253 movq 24+96(%rsp),%r12
1254 leaq 224(%rsp),%rdi
1255 call __ecp_nistz256_mul_montq
1256
1257 movq 448(%rsp),%rax
1258 leaq 448(%rsp),%rbx
1259 movq 0+32(%rsp),%r9
1260 movq 8+32(%rsp),%r10
1261 leaq 0+32(%rsp),%rsi
1262 movq 16+32(%rsp),%r11
1263 movq 24+32(%rsp),%r12
1264 leaq 256(%rsp),%rdi
1265 call __ecp_nistz256_mul_montq
1266
1267 movq 416(%rsp),%rax
1268 leaq 416(%rsp),%rbx
1269 movq 0+224(%rsp),%r9
1270 movq 8+224(%rsp),%r10
1271 leaq 0+224(%rsp),%rsi
1272 movq 16+224(%rsp),%r11
1273 movq 24+224(%rsp),%r12
1274 leaq 224(%rsp),%rdi
1275 call __ecp_nistz256_mul_montq
1276
1277 movq 512(%rsp),%rax
1278 leaq 512(%rsp),%rbx
1279 movq 0+256(%rsp),%r9
1280 movq 8+256(%rsp),%r10
1281 leaq 0+256(%rsp),%rsi
1282 movq 16+256(%rsp),%r11
1283 movq 24+256(%rsp),%r12
1284 leaq 256(%rsp),%rdi
1285 call __ecp_nistz256_mul_montq
1286
1287 leaq 224(%rsp),%rbx
1288 leaq 64(%rsp),%rdi
1289 call __ecp_nistz256_sub_fromq
1290
1291 orq %r13,%r12
1292 movdqa %xmm4,%xmm2
1293 orq %r8,%r12
1294 orq %r9,%r12
1295 por %xmm5,%xmm2
1296.byte 102,73,15,110,220
1297
1298 movq 384(%rsp),%rax
1299 leaq 384(%rsp),%rbx
1300 movq 0+96(%rsp),%r9
1301 movq 8+96(%rsp),%r10
1302 leaq 0+96(%rsp),%rsi
1303 movq 16+96(%rsp),%r11
1304 movq 24+96(%rsp),%r12
1305 leaq 160(%rsp),%rdi
1306 call __ecp_nistz256_mul_montq
1307
1308 movq 480(%rsp),%rax
1309 leaq 480(%rsp),%rbx
1310 movq 0+32(%rsp),%r9
1311 movq 8+32(%rsp),%r10
1312 leaq 0+32(%rsp),%rsi
1313 movq 16+32(%rsp),%r11
1314 movq 24+32(%rsp),%r12
1315 leaq 192(%rsp),%rdi
1316 call __ecp_nistz256_mul_montq
1317
1318 leaq 160(%rsp),%rbx
1319 leaq 0(%rsp),%rdi
1320 call __ecp_nistz256_sub_fromq
1321
1322 orq %r13,%r12
1323 orq %r8,%r12
1324 orq %r9,%r12
1325
1326.byte 0x3e
1327 jnz L$add_proceedq
1328.byte 102,73,15,126,208
1329.byte 102,73,15,126,217
1330 testq %r8,%r8
1331 jnz L$add_proceedq
1332 testq %r9,%r9
David Benjamin4969cc92016-04-22 15:02:23 -04001333 jz L$add_doubleq
Adam Langleyfad63272015-11-12 12:15:39 -08001334
1335.byte 102,72,15,126,199
1336 pxor %xmm0,%xmm0
1337 movdqu %xmm0,0(%rdi)
1338 movdqu %xmm0,16(%rdi)
1339 movdqu %xmm0,32(%rdi)
1340 movdqu %xmm0,48(%rdi)
1341 movdqu %xmm0,64(%rdi)
1342 movdqu %xmm0,80(%rdi)
1343 jmp L$add_doneq
1344
1345.p2align 5
David Benjamin4969cc92016-04-22 15:02:23 -04001346L$add_doubleq:
1347.byte 102,72,15,126,206
1348.byte 102,72,15,126,199
1349 addq $416,%rsp
1350 jmp L$point_double_shortcutq
1351
1352.p2align 5
Adam Langleyfad63272015-11-12 12:15:39 -08001353L$add_proceedq:
1354 movq 0+64(%rsp),%rax
1355 movq 8+64(%rsp),%r14
1356 leaq 0+64(%rsp),%rsi
1357 movq 16+64(%rsp),%r15
1358 movq 24+64(%rsp),%r8
1359 leaq 96(%rsp),%rdi
1360 call __ecp_nistz256_sqr_montq
1361
1362 movq 448(%rsp),%rax
1363 leaq 448(%rsp),%rbx
1364 movq 0+0(%rsp),%r9
1365 movq 8+0(%rsp),%r10
1366 leaq 0+0(%rsp),%rsi
1367 movq 16+0(%rsp),%r11
1368 movq 24+0(%rsp),%r12
1369 leaq 352(%rsp),%rdi
1370 call __ecp_nistz256_mul_montq
1371
1372 movq 0+0(%rsp),%rax
1373 movq 8+0(%rsp),%r14
1374 leaq 0+0(%rsp),%rsi
1375 movq 16+0(%rsp),%r15
1376 movq 24+0(%rsp),%r8
1377 leaq 32(%rsp),%rdi
1378 call __ecp_nistz256_sqr_montq
1379
1380 movq 544(%rsp),%rax
1381 leaq 544(%rsp),%rbx
1382 movq 0+352(%rsp),%r9
1383 movq 8+352(%rsp),%r10
1384 leaq 0+352(%rsp),%rsi
1385 movq 16+352(%rsp),%r11
1386 movq 24+352(%rsp),%r12
1387 leaq 352(%rsp),%rdi
1388 call __ecp_nistz256_mul_montq
1389
1390 movq 0(%rsp),%rax
1391 leaq 0(%rsp),%rbx
1392 movq 0+32(%rsp),%r9
1393 movq 8+32(%rsp),%r10
1394 leaq 0+32(%rsp),%rsi
1395 movq 16+32(%rsp),%r11
1396 movq 24+32(%rsp),%r12
1397 leaq 128(%rsp),%rdi
1398 call __ecp_nistz256_mul_montq
1399
1400 movq 160(%rsp),%rax
1401 leaq 160(%rsp),%rbx
1402 movq 0+32(%rsp),%r9
1403 movq 8+32(%rsp),%r10
1404 leaq 0+32(%rsp),%rsi
1405 movq 16+32(%rsp),%r11
1406 movq 24+32(%rsp),%r12
1407 leaq 192(%rsp),%rdi
1408 call __ecp_nistz256_mul_montq
1409
1410
1411
1412
Steven Valdez909b19f2016-11-21 15:35:44 -05001413 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08001414 addq %r12,%r12
1415 leaq 96(%rsp),%rsi
1416 adcq %r13,%r13
1417 movq %r12,%rax
1418 adcq %r8,%r8
1419 adcq %r9,%r9
1420 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05001421 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08001422
1423 subq $-1,%r12
1424 movq %r8,%rcx
1425 sbbq %r14,%r13
1426 sbbq $0,%r8
1427 movq %r9,%r10
1428 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -05001429 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08001430
Steven Valdez909b19f2016-11-21 15:35:44 -05001431 cmovcq %rax,%r12
Adam Langleyfad63272015-11-12 12:15:39 -08001432 movq 0(%rsi),%rax
Steven Valdez909b19f2016-11-21 15:35:44 -05001433 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -08001434 movq 8(%rsi),%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05001435 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -08001436 movq 16(%rsi),%rcx
Steven Valdez909b19f2016-11-21 15:35:44 -05001437 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -08001438 movq 24(%rsi),%r10
1439
1440 call __ecp_nistz256_subq
1441
1442 leaq 128(%rsp),%rbx
1443 leaq 288(%rsp),%rdi
1444 call __ecp_nistz256_sub_fromq
1445
1446 movq 192+0(%rsp),%rax
1447 movq 192+8(%rsp),%rbp
1448 movq 192+16(%rsp),%rcx
1449 movq 192+24(%rsp),%r10
1450 leaq 320(%rsp),%rdi
1451
1452 call __ecp_nistz256_subq
1453
1454 movq %r12,0(%rdi)
1455 movq %r13,8(%rdi)
1456 movq %r8,16(%rdi)
1457 movq %r9,24(%rdi)
1458 movq 128(%rsp),%rax
1459 leaq 128(%rsp),%rbx
1460 movq 0+224(%rsp),%r9
1461 movq 8+224(%rsp),%r10
1462 leaq 0+224(%rsp),%rsi
1463 movq 16+224(%rsp),%r11
1464 movq 24+224(%rsp),%r12
1465 leaq 256(%rsp),%rdi
1466 call __ecp_nistz256_mul_montq
1467
1468 movq 320(%rsp),%rax
1469 leaq 320(%rsp),%rbx
1470 movq 0+64(%rsp),%r9
1471 movq 8+64(%rsp),%r10
1472 leaq 0+64(%rsp),%rsi
1473 movq 16+64(%rsp),%r11
1474 movq 24+64(%rsp),%r12
1475 leaq 320(%rsp),%rdi
1476 call __ecp_nistz256_mul_montq
1477
1478 leaq 256(%rsp),%rbx
1479 leaq 320(%rsp),%rdi
1480 call __ecp_nistz256_sub_fromq
1481
1482.byte 102,72,15,126,199
1483
1484 movdqa %xmm5,%xmm0
1485 movdqa %xmm5,%xmm1
1486 pandn 352(%rsp),%xmm0
1487 movdqa %xmm5,%xmm2
1488 pandn 352+16(%rsp),%xmm1
1489 movdqa %xmm5,%xmm3
1490 pand 544(%rsp),%xmm2
1491 pand 544+16(%rsp),%xmm3
1492 por %xmm0,%xmm2
1493 por %xmm1,%xmm3
1494
1495 movdqa %xmm4,%xmm0
1496 movdqa %xmm4,%xmm1
1497 pandn %xmm2,%xmm0
1498 movdqa %xmm4,%xmm2
1499 pandn %xmm3,%xmm1
1500 movdqa %xmm4,%xmm3
1501 pand 448(%rsp),%xmm2
1502 pand 448+16(%rsp),%xmm3
1503 por %xmm0,%xmm2
1504 por %xmm1,%xmm3
1505 movdqu %xmm2,64(%rdi)
1506 movdqu %xmm3,80(%rdi)
1507
1508 movdqa %xmm5,%xmm0
1509 movdqa %xmm5,%xmm1
1510 pandn 288(%rsp),%xmm0
1511 movdqa %xmm5,%xmm2
1512 pandn 288+16(%rsp),%xmm1
1513 movdqa %xmm5,%xmm3
1514 pand 480(%rsp),%xmm2
1515 pand 480+16(%rsp),%xmm3
1516 por %xmm0,%xmm2
1517 por %xmm1,%xmm3
1518
1519 movdqa %xmm4,%xmm0
1520 movdqa %xmm4,%xmm1
1521 pandn %xmm2,%xmm0
1522 movdqa %xmm4,%xmm2
1523 pandn %xmm3,%xmm1
1524 movdqa %xmm4,%xmm3
1525 pand 384(%rsp),%xmm2
1526 pand 384+16(%rsp),%xmm3
1527 por %xmm0,%xmm2
1528 por %xmm1,%xmm3
1529 movdqu %xmm2,0(%rdi)
1530 movdqu %xmm3,16(%rdi)
1531
1532 movdqa %xmm5,%xmm0
1533 movdqa %xmm5,%xmm1
1534 pandn 320(%rsp),%xmm0
1535 movdqa %xmm5,%xmm2
1536 pandn 320+16(%rsp),%xmm1
1537 movdqa %xmm5,%xmm3
1538 pand 512(%rsp),%xmm2
1539 pand 512+16(%rsp),%xmm3
1540 por %xmm0,%xmm2
1541 por %xmm1,%xmm3
1542
1543 movdqa %xmm4,%xmm0
1544 movdqa %xmm4,%xmm1
1545 pandn %xmm2,%xmm0
1546 movdqa %xmm4,%xmm2
1547 pandn %xmm3,%xmm1
1548 movdqa %xmm4,%xmm3
1549 pand 416(%rsp),%xmm2
1550 pand 416+16(%rsp),%xmm3
1551 por %xmm0,%xmm2
1552 por %xmm1,%xmm3
1553 movdqu %xmm2,32(%rdi)
1554 movdqu %xmm3,48(%rdi)
1555
1556L$add_doneq:
Robert Sloanab8b8882018-03-26 11:39:51 -07001557 leaq 576+56(%rsp),%rsi
1558
1559 movq -48(%rsi),%r15
1560
1561 movq -40(%rsi),%r14
1562
1563 movq -32(%rsi),%r13
1564
1565 movq -24(%rsi),%r12
1566
1567 movq -16(%rsi),%rbx
1568
1569 movq -8(%rsi),%rbp
1570
1571 leaq (%rsi),%rsp
1572
1573L$point_addq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08001574 .byte 0xf3,0xc3
1575
Robert Sloanab8b8882018-03-26 11:39:51 -07001576
Adam Langleyfad63272015-11-12 12:15:39 -08001577.globl _ecp_nistz256_point_add_affine
1578.private_extern _ecp_nistz256_point_add_affine
1579
1580.p2align 5
1581_ecp_nistz256_point_add_affine:
Robert Sloanab8b8882018-03-26 11:39:51 -07001582
Adam Langleyfad63272015-11-12 12:15:39 -08001583 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001584
Adam Langleyfad63272015-11-12 12:15:39 -08001585 pushq %rbx
Robert Sloanab8b8882018-03-26 11:39:51 -07001586
Adam Langleyfad63272015-11-12 12:15:39 -08001587 pushq %r12
Robert Sloanab8b8882018-03-26 11:39:51 -07001588
Adam Langleyfad63272015-11-12 12:15:39 -08001589 pushq %r13
Robert Sloanab8b8882018-03-26 11:39:51 -07001590
Adam Langleyfad63272015-11-12 12:15:39 -08001591 pushq %r14
Robert Sloanab8b8882018-03-26 11:39:51 -07001592
Adam Langleyfad63272015-11-12 12:15:39 -08001593 pushq %r15
Robert Sloanab8b8882018-03-26 11:39:51 -07001594
Adam Langleyfad63272015-11-12 12:15:39 -08001595 subq $480+8,%rsp
1596
Robert Sloanab8b8882018-03-26 11:39:51 -07001597L$add_affineq_body:
1598
Adam Langleyfad63272015-11-12 12:15:39 -08001599 movdqu 0(%rsi),%xmm0
1600 movq %rdx,%rbx
1601 movdqu 16(%rsi),%xmm1
1602 movdqu 32(%rsi),%xmm2
1603 movdqu 48(%rsi),%xmm3
1604 movdqu 64(%rsi),%xmm4
1605 movdqu 80(%rsi),%xmm5
1606 movq 64+0(%rsi),%rax
1607 movq 64+8(%rsi),%r14
1608 movq 64+16(%rsi),%r15
1609 movq 64+24(%rsi),%r8
1610 movdqa %xmm0,320(%rsp)
1611 movdqa %xmm1,320+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08001612 movdqa %xmm2,352(%rsp)
1613 movdqa %xmm3,352+16(%rsp)
Adam Langleyfad63272015-11-12 12:15:39 -08001614 movdqa %xmm4,384(%rsp)
1615 movdqa %xmm5,384+16(%rsp)
Steven Valdez909b19f2016-11-21 15:35:44 -05001616 por %xmm4,%xmm5
Adam Langleyfad63272015-11-12 12:15:39 -08001617
1618 movdqu 0(%rbx),%xmm0
Steven Valdez909b19f2016-11-21 15:35:44 -05001619 pshufd $0xb1,%xmm5,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08001620 movdqu 16(%rbx),%xmm1
1621 movdqu 32(%rbx),%xmm2
1622 por %xmm3,%xmm5
1623 movdqu 48(%rbx),%xmm3
1624 movdqa %xmm0,416(%rsp)
David Benjamin4969cc92016-04-22 15:02:23 -04001625 pshufd $0x1e,%xmm5,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08001626 movdqa %xmm1,416+16(%rsp)
1627 por %xmm0,%xmm1
1628.byte 102,72,15,110,199
1629 movdqa %xmm2,448(%rsp)
1630 movdqa %xmm3,448+16(%rsp)
1631 por %xmm2,%xmm3
1632 por %xmm4,%xmm5
1633 pxor %xmm4,%xmm4
1634 por %xmm1,%xmm3
1635
1636 leaq 64-0(%rsi),%rsi
1637 leaq 32(%rsp),%rdi
1638 call __ecp_nistz256_sqr_montq
1639
1640 pcmpeqd %xmm4,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -04001641 pshufd $0xb1,%xmm3,%xmm4
Adam Langleyfad63272015-11-12 12:15:39 -08001642 movq 0(%rbx),%rax
1643
1644 movq %r12,%r9
1645 por %xmm3,%xmm4
1646 pshufd $0,%xmm5,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -04001647 pshufd $0x1e,%xmm4,%xmm3
Adam Langleyfad63272015-11-12 12:15:39 -08001648 movq %r13,%r10
1649 por %xmm3,%xmm4
1650 pxor %xmm3,%xmm3
1651 movq %r14,%r11
1652 pcmpeqd %xmm3,%xmm4
1653 pshufd $0,%xmm4,%xmm4
1654
1655 leaq 32-0(%rsp),%rsi
1656 movq %r15,%r12
1657 leaq 0(%rsp),%rdi
1658 call __ecp_nistz256_mul_montq
1659
1660 leaq 320(%rsp),%rbx
1661 leaq 64(%rsp),%rdi
1662 call __ecp_nistz256_sub_fromq
1663
1664 movq 384(%rsp),%rax
1665 leaq 384(%rsp),%rbx
1666 movq 0+32(%rsp),%r9
1667 movq 8+32(%rsp),%r10
1668 leaq 0+32(%rsp),%rsi
1669 movq 16+32(%rsp),%r11
1670 movq 24+32(%rsp),%r12
1671 leaq 32(%rsp),%rdi
1672 call __ecp_nistz256_mul_montq
1673
1674 movq 384(%rsp),%rax
1675 leaq 384(%rsp),%rbx
1676 movq 0+64(%rsp),%r9
1677 movq 8+64(%rsp),%r10
1678 leaq 0+64(%rsp),%rsi
1679 movq 16+64(%rsp),%r11
1680 movq 24+64(%rsp),%r12
1681 leaq 288(%rsp),%rdi
1682 call __ecp_nistz256_mul_montq
1683
1684 movq 448(%rsp),%rax
1685 leaq 448(%rsp),%rbx
1686 movq 0+32(%rsp),%r9
1687 movq 8+32(%rsp),%r10
1688 leaq 0+32(%rsp),%rsi
1689 movq 16+32(%rsp),%r11
1690 movq 24+32(%rsp),%r12
1691 leaq 32(%rsp),%rdi
1692 call __ecp_nistz256_mul_montq
1693
1694 leaq 352(%rsp),%rbx
1695 leaq 96(%rsp),%rdi
1696 call __ecp_nistz256_sub_fromq
1697
1698 movq 0+64(%rsp),%rax
1699 movq 8+64(%rsp),%r14
1700 leaq 0+64(%rsp),%rsi
1701 movq 16+64(%rsp),%r15
1702 movq 24+64(%rsp),%r8
1703 leaq 128(%rsp),%rdi
1704 call __ecp_nistz256_sqr_montq
1705
1706 movq 0+96(%rsp),%rax
1707 movq 8+96(%rsp),%r14
1708 leaq 0+96(%rsp),%rsi
1709 movq 16+96(%rsp),%r15
1710 movq 24+96(%rsp),%r8
1711 leaq 192(%rsp),%rdi
1712 call __ecp_nistz256_sqr_montq
1713
1714 movq 128(%rsp),%rax
1715 leaq 128(%rsp),%rbx
1716 movq 0+64(%rsp),%r9
1717 movq 8+64(%rsp),%r10
1718 leaq 0+64(%rsp),%rsi
1719 movq 16+64(%rsp),%r11
1720 movq 24+64(%rsp),%r12
1721 leaq 160(%rsp),%rdi
1722 call __ecp_nistz256_mul_montq
1723
1724 movq 320(%rsp),%rax
1725 leaq 320(%rsp),%rbx
1726 movq 0+128(%rsp),%r9
1727 movq 8+128(%rsp),%r10
1728 leaq 0+128(%rsp),%rsi
1729 movq 16+128(%rsp),%r11
1730 movq 24+128(%rsp),%r12
1731 leaq 0(%rsp),%rdi
1732 call __ecp_nistz256_mul_montq
1733
1734
1735
1736
Steven Valdez909b19f2016-11-21 15:35:44 -05001737 xorq %r11,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08001738 addq %r12,%r12
1739 leaq 192(%rsp),%rsi
1740 adcq %r13,%r13
1741 movq %r12,%rax
1742 adcq %r8,%r8
1743 adcq %r9,%r9
1744 movq %r13,%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05001745 adcq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08001746
1747 subq $-1,%r12
1748 movq %r8,%rcx
1749 sbbq %r14,%r13
1750 sbbq $0,%r8
1751 movq %r9,%r10
1752 sbbq %r15,%r9
Steven Valdez909b19f2016-11-21 15:35:44 -05001753 sbbq $0,%r11
Adam Langleyfad63272015-11-12 12:15:39 -08001754
Steven Valdez909b19f2016-11-21 15:35:44 -05001755 cmovcq %rax,%r12
Adam Langleyfad63272015-11-12 12:15:39 -08001756 movq 0(%rsi),%rax
Steven Valdez909b19f2016-11-21 15:35:44 -05001757 cmovcq %rbp,%r13
Adam Langleyfad63272015-11-12 12:15:39 -08001758 movq 8(%rsi),%rbp
Steven Valdez909b19f2016-11-21 15:35:44 -05001759 cmovcq %rcx,%r8
Adam Langleyfad63272015-11-12 12:15:39 -08001760 movq 16(%rsi),%rcx
Steven Valdez909b19f2016-11-21 15:35:44 -05001761 cmovcq %r10,%r9
Adam Langleyfad63272015-11-12 12:15:39 -08001762 movq 24(%rsi),%r10
1763
1764 call __ecp_nistz256_subq
1765
1766 leaq 160(%rsp),%rbx
1767 leaq 224(%rsp),%rdi
1768 call __ecp_nistz256_sub_fromq
1769
1770 movq 0+0(%rsp),%rax
1771 movq 0+8(%rsp),%rbp
1772 movq 0+16(%rsp),%rcx
1773 movq 0+24(%rsp),%r10
1774 leaq 64(%rsp),%rdi
1775
1776 call __ecp_nistz256_subq
1777
1778 movq %r12,0(%rdi)
1779 movq %r13,8(%rdi)
1780 movq %r8,16(%rdi)
1781 movq %r9,24(%rdi)
1782 movq 352(%rsp),%rax
1783 leaq 352(%rsp),%rbx
1784 movq 0+160(%rsp),%r9
1785 movq 8+160(%rsp),%r10
1786 leaq 0+160(%rsp),%rsi
1787 movq 16+160(%rsp),%r11
1788 movq 24+160(%rsp),%r12
1789 leaq 32(%rsp),%rdi
1790 call __ecp_nistz256_mul_montq
1791
1792 movq 96(%rsp),%rax
1793 leaq 96(%rsp),%rbx
1794 movq 0+64(%rsp),%r9
1795 movq 8+64(%rsp),%r10
1796 leaq 0+64(%rsp),%rsi
1797 movq 16+64(%rsp),%r11
1798 movq 24+64(%rsp),%r12
1799 leaq 64(%rsp),%rdi
1800 call __ecp_nistz256_mul_montq
1801
1802 leaq 32(%rsp),%rbx
1803 leaq 256(%rsp),%rdi
1804 call __ecp_nistz256_sub_fromq
1805
1806.byte 102,72,15,126,199
1807
1808 movdqa %xmm5,%xmm0
1809 movdqa %xmm5,%xmm1
1810 pandn 288(%rsp),%xmm0
1811 movdqa %xmm5,%xmm2
1812 pandn 288+16(%rsp),%xmm1
1813 movdqa %xmm5,%xmm3
1814 pand L$ONE_mont(%rip),%xmm2
1815 pand L$ONE_mont+16(%rip),%xmm3
1816 por %xmm0,%xmm2
1817 por %xmm1,%xmm3
1818
1819 movdqa %xmm4,%xmm0
1820 movdqa %xmm4,%xmm1
1821 pandn %xmm2,%xmm0
1822 movdqa %xmm4,%xmm2
1823 pandn %xmm3,%xmm1
1824 movdqa %xmm4,%xmm3
1825 pand 384(%rsp),%xmm2
1826 pand 384+16(%rsp),%xmm3
1827 por %xmm0,%xmm2
1828 por %xmm1,%xmm3
1829 movdqu %xmm2,64(%rdi)
1830 movdqu %xmm3,80(%rdi)
1831
1832 movdqa %xmm5,%xmm0
1833 movdqa %xmm5,%xmm1
1834 pandn 224(%rsp),%xmm0
1835 movdqa %xmm5,%xmm2
1836 pandn 224+16(%rsp),%xmm1
1837 movdqa %xmm5,%xmm3
1838 pand 416(%rsp),%xmm2
1839 pand 416+16(%rsp),%xmm3
1840 por %xmm0,%xmm2
1841 por %xmm1,%xmm3
1842
1843 movdqa %xmm4,%xmm0
1844 movdqa %xmm4,%xmm1
1845 pandn %xmm2,%xmm0
1846 movdqa %xmm4,%xmm2
1847 pandn %xmm3,%xmm1
1848 movdqa %xmm4,%xmm3
1849 pand 320(%rsp),%xmm2
1850 pand 320+16(%rsp),%xmm3
1851 por %xmm0,%xmm2
1852 por %xmm1,%xmm3
1853 movdqu %xmm2,0(%rdi)
1854 movdqu %xmm3,16(%rdi)
1855
1856 movdqa %xmm5,%xmm0
1857 movdqa %xmm5,%xmm1
1858 pandn 256(%rsp),%xmm0
1859 movdqa %xmm5,%xmm2
1860 pandn 256+16(%rsp),%xmm1
1861 movdqa %xmm5,%xmm3
1862 pand 448(%rsp),%xmm2
1863 pand 448+16(%rsp),%xmm3
1864 por %xmm0,%xmm2
1865 por %xmm1,%xmm3
1866
1867 movdqa %xmm4,%xmm0
1868 movdqa %xmm4,%xmm1
1869 pandn %xmm2,%xmm0
1870 movdqa %xmm4,%xmm2
1871 pandn %xmm3,%xmm1
1872 movdqa %xmm4,%xmm3
1873 pand 352(%rsp),%xmm2
1874 pand 352+16(%rsp),%xmm3
1875 por %xmm0,%xmm2
1876 por %xmm1,%xmm3
1877 movdqu %xmm2,32(%rdi)
1878 movdqu %xmm3,48(%rdi)
1879
Robert Sloanab8b8882018-03-26 11:39:51 -07001880 leaq 480+56(%rsp),%rsi
1881
1882 movq -48(%rsi),%r15
1883
1884 movq -40(%rsi),%r14
1885
1886 movq -32(%rsi),%r13
1887
1888 movq -24(%rsi),%r12
1889
1890 movq -16(%rsi),%rbx
1891
1892 movq -8(%rsi),%rbp
1893
1894 leaq (%rsi),%rsp
1895
1896L$add_affineq_epilogue:
Adam Langleyfad63272015-11-12 12:15:39 -08001897 .byte 0xf3,0xc3
1898
Robert Sloanab8b8882018-03-26 11:39:51 -07001899
Adam Langleyfad63272015-11-12 12:15:39 -08001900#endif