blob: da49a401334adfb852f214786071fc7d2ee346c2 [file] [log] [blame]
Pete Bentley0c61efe2019-08-13 09:32:23 +01001// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16.section .rodata
17
18# p434 x 2
19.Lp434x2:
20.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
21.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
22.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
23
24# p434 + 1
25.Lp434p1:
26.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
27.quad 0x6CFC5FD681C52056, 0x0002341F27177344
28
29.text
30.globl sike_mpmul
31.hidden sike_mpmul
32.align 4
33sike_mpmul:
34 stp x29, x30, [sp,#-96]!
35 add x29, sp, #0
36 stp x19, x20, [sp,#16]
37 stp x21, x22, [sp,#32]
38 stp x23, x24, [sp,#48]
39 stp x25, x26, [sp,#64]
40 stp x27, x28, [sp,#80]
41
42 ldp x3, x4, [x0]
43 ldp x5, x6, [x0,#16]
44 ldp x7, x8, [x0,#32]
45 ldr x9, [x0,#48]
46 ldp x10, x11, [x1,#0]
47 ldp x12, x13, [x1,#16]
48 ldp x14, x15, [x1,#32]
49 ldr x16, [x1,#48]
50
51 // x3-x7 <- AH + AL, x7 <- carry
52 adds x3, x3, x7
53 adcs x4, x4, x8
54 adcs x5, x5, x9
55 adcs x6, x6, xzr
56 adc x7, xzr, xzr
57
58 // x10-x13 <- BH + BL, x8 <- carry
59 adds x10, x10, x14
60 adcs x11, x11, x15
61 adcs x12, x12, x16
62 adcs x13, x13, xzr
63 adc x8, xzr, xzr
64
65 // x9 <- combined carry
66 and x9, x7, x8
67 // x7-x8 <- mask
68 sub x7, xzr, x7
69 sub x8, xzr, x8
70
71 // x15-x19 <- masked (BH + BL)
72 and x14, x10, x7
73 and x15, x11, x7
74 and x16, x12, x7
75 and x17, x13, x7
76
77 // x20-x23 <- masked (AH + AL)
78 and x20, x3, x8
79 and x21, x4, x8
80 and x22, x5, x8
81 and x23, x6, x8
82
83 // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
84 adds x14, x14, x20
85 adcs x15, x15, x21
86 adcs x16, x16, x22
87 adcs x17, x17, x23
88 adc x7, x9, xzr
89
90 // x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
91 stp x3, x4, [x2,#0]
92 // A0-A1 <- AH + AL, T0 <- mask
93 adds x3, x3, x5
94 adcs x4, x4, x6
95 adc x25, xzr, xzr
96
97 // C6, T1 <- BH + BL, C7 <- mask
98 adds x23, x10, x12
99 adcs x26, x11, x13
100 adc x24, xzr, xzr
101
102 // C0-C1 <- masked (BH + BL)
103 sub x19, xzr, x25
104 sub x20, xzr, x24
105 and x8, x23, x19
106 and x9, x26, x19
107
108 // C4-C5 <- masked (AH + AL), T0 <- combined carry
109 and x21, x3, x20
110 and x22, x4, x20
111 mul x19, x3, x23
112 mul x20, x3, x26
113 and x25, x25, x24
114
115 // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
116 adds x8, x21, x8
117 umulh x21, x3, x26
118 adcs x9, x22, x9
119 umulh x22, x3, x23
120 adc x25, x25, xzr
121
122 // C2-C5 <- (AH+AL) x (BH+BL), low part
123 mul x3, x4, x23
124 umulh x23, x4, x23
125 adds x20, x20, x22
126 adc x21, x21, xzr
127
128 mul x24, x4, x26
129 umulh x26, x4, x26
130 adds x20, x20, x3
131 adcs x21, x21, x23
132 adc x22, xzr, xzr
133
134 adds x21, x21, x24
135 adc x22, x22, x26
136
137 ldp x3, x4, [x2,#0]
138
139 // C2-C5, T0 <- (AH+AL) x (BH+BL), final part
140 adds x21, x8, x21
141 umulh x24, x3, x10
142 umulh x26, x3, x11
143 adcs x22, x9, x22
144 mul x8, x3, x10
145 mul x9, x3, x11
146 adc x25, x25, xzr
147
148 // C0-C1, T1, C7 <- AL x BL
149 mul x3, x4, x10
150 umulh x10, x4, x10
151 adds x9, x9, x24
152 adc x26, x26, xzr
153
154 mul x23, x4, x11
155 umulh x11, x4, x11
156 adds x9, x9, x3
157 adcs x26, x26, x10
158 adc x24, xzr, xzr
159
160 adds x26, x26, x23
161 adc x24, x24, x11
162
163
164 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
165 mul x3, x5, x12
166 umulh x10, x5, x12
167 subs x19, x19, x8
168 sbcs x20, x20, x9
169 sbcs x21, x21, x26
170 mul x4, x5, x13
171 umulh x23, x5, x13
172 sbcs x22, x22, x24
173 sbc x25, x25, xzr
174
175 // A0, A1, C6, B0 <- AH x BH
176 mul x5, x6, x12
177 umulh x12, x6, x12
178 adds x4, x4, x10
179 adc x23, x23, xzr
180
181 mul x11, x6, x13
182 umulh x13, x6, x13
183 adds x4, x4, x5
184 adcs x23, x23, x12
185 adc x10, xzr, xzr
186
187 adds x23, x23, x11
188 adc x10, x10, x13
189
190
191 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
192 subs x19, x19, x3
193 sbcs x20, x20, x4
194 sbcs x21, x21, x23
195 sbcs x22, x22, x10
196 sbc x25, x25, xzr
197
198 adds x19, x19, x26
199 adcs x20, x20, x24
200 adcs x21, x21, x3
201 adcs x22, x22, x4
202 adcs x23, x25, x23
203 adc x24, x10, xzr
204
205
206 // x15-x19, x7 <- (AH+AL) x (BH+BL), final step
207 adds x14, x14, x21
208 adcs x15, x15, x22
209 adcs x16, x16, x23
210 adcs x17, x17, x24
211 adc x7, x7, xzr
212
213 // Load AL
214 ldp x3, x4, [x0]
215 ldp x5, x6, [x0,#16]
216 // Load BL
217 ldp x10, x11, [x1,#0]
218 ldp x12, x13, [x1,#16]
219
220 // Temporarily store x8 in x2
221 stp x8, x9, [x2,#0]
222 // x21-x28 <- AL x BL
223 // A0-A1 <- AH + AL, T0 <- mask
224 adds x3, x3, x5
225 adcs x4, x4, x6
226 adc x8, xzr, xzr
227
228 // C6, T1 <- BH + BL, C7 <- mask
229 adds x27, x10, x12
230 adcs x9, x11, x13
231 adc x28, xzr, xzr
232
233 // C0-C1 <- masked (BH + BL)
234 sub x23, xzr, x8
235 sub x24, xzr, x28
236 and x21, x27, x23
237 and x22, x9, x23
238
239 // C4-C5 <- masked (AH + AL), T0 <- combined carry
240 and x25, x3, x24
241 and x26, x4, x24
242 mul x23, x3, x27
243 mul x24, x3, x9
244 and x8, x8, x28
245
246 // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
247 adds x21, x25, x21
248 umulh x25, x3, x9
249 adcs x22, x26, x22
250 umulh x26, x3, x27
251 adc x8, x8, xzr
252
253 // C2-C5 <- (AH+AL) x (BH+BL), low part
254 mul x3, x4, x27
255 umulh x27, x4, x27
256 adds x24, x24, x26
257 adc x25, x25, xzr
258
259 mul x28, x4, x9
260 umulh x9, x4, x9
261 adds x24, x24, x3
262 adcs x25, x25, x27
263 adc x26, xzr, xzr
264
265 adds x25, x25, x28
266 adc x26, x26, x9
267
268 ldp x3, x4, [x0,#0]
269
270 // C2-C5, T0 <- (AH+AL) x (BH+BL), final part
271 adds x25, x21, x25
272 umulh x28, x3, x10
273 umulh x9, x3, x11
274 adcs x26, x22, x26
275 mul x21, x3, x10
276 mul x22, x3, x11
277 adc x8, x8, xzr
278
279 // C0-C1, T1, C7 <- AL x BL
280 mul x3, x4, x10
281 umulh x10, x4, x10
282 adds x22, x22, x28
283 adc x9, x9, xzr
284
285 mul x27, x4, x11
286 umulh x11, x4, x11
287 adds x22, x22, x3
288 adcs x9, x9, x10
289 adc x28, xzr, xzr
290
291 adds x9, x9, x27
292 adc x28, x28, x11
293
294
295 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
296 mul x3, x5, x12
297 umulh x10, x5, x12
298 subs x23, x23, x21
299 sbcs x24, x24, x22
300 sbcs x25, x25, x9
301 mul x4, x5, x13
302 umulh x27, x5, x13
303 sbcs x26, x26, x28
304 sbc x8, x8, xzr
305
306 // A0, A1, C6, B0 <- AH x BH
307 mul x5, x6, x12
308 umulh x12, x6, x12
309 adds x4, x4, x10
310 adc x27, x27, xzr
311
312 mul x11, x6, x13
313 umulh x13, x6, x13
314 adds x4, x4, x5
315 adcs x27, x27, x12
316 adc x10, xzr, xzr
317
318 adds x27, x27, x11
319 adc x10, x10, x13
320
321
322 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
323 subs x23, x23, x3
324 sbcs x24, x24, x4
325 sbcs x25, x25, x27
326 sbcs x26, x26, x10
327 sbc x8, x8, xzr
328
329 adds x23, x23, x9
330 adcs x24, x24, x28
331 adcs x25, x25, x3
332 adcs x26, x26, x4
333 adcs x27, x8, x27
334 adc x28, x10, xzr
335
336 // Restore x8
337 ldp x8, x9, [x2,#0]
338
339 // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
340 subs x8, x8, x21
341 sbcs x9, x9, x22
342 sbcs x19, x19, x23
343 sbcs x20, x20, x24
344 sbcs x14, x14, x25
345 sbcs x15, x15, x26
346 sbcs x16, x16, x27
347 sbcs x17, x17, x28
348 sbc x7, x7, xzr
349
350 // Store ALxBL, low
351 stp x21, x22, [x2]
352 stp x23, x24, [x2,#16]
353
354 // Load AH
355 ldp x3, x4, [x0,#32]
356 ldr x5, [x0,#48]
357 // Load BH
358 ldp x10, x11, [x1,#32]
359 ldr x12, [x1,#48]
360
361 adds x8, x8, x25
362 adcs x9, x9, x26
363 adcs x19, x19, x27
364 adcs x20, x20, x28
365 adc x1, xzr, xzr
366
367 add x0, x0, #32
368 // Temporarily store x8,x9 in x2
369 stp x8,x9, [x2,#32]
370 // x21-x28 <- AH x BH
371
372 // A0 * B0
373 mul x21, x3, x10 // C0
374 umulh x24, x3, x10
375
376 // A0 * B1
377 mul x22, x3, x11
378 umulh x23, x3, x11
379
380 // A1 * B0
381 mul x8, x4, x10
382 umulh x9, x4, x10
383 adds x22, x22, x24
384 adc x23, x23, xzr
385
386 // A0 * B2
387 mul x27, x3, x12
388 umulh x28, x3, x12
389 adds x22, x22, x8 // C1
390 adcs x23, x23, x9
391 adc x24, xzr, xzr
392
393 // A2 * B0
394 mul x8, x5, x10
395 umulh x25, x5, x10
396 adds x23, x23, x27
397 adcs x24, x24, x25
398 adc x25, xzr, xzr
399
400 // A1 * B1
401 mul x27, x4, x11
402 umulh x9, x4, x11
403 adds x23, x23, x8
404 adcs x24, x24, x28
405 adc x25, x25, xzr
406
407 // A1 * B2
408 mul x8, x4, x12
409 umulh x28, x4, x12
410 adds x23, x23, x27 // C2
411 adcs x24, x24, x9
412 adc x25, x25, xzr
413
414 // A2 * B1
415 mul x27, x5, x11
416 umulh x9, x5, x11
417 adds x24, x24, x8
418 adcs x25, x25, x28
419 adc x26, xzr, xzr
420
421 // A2 * B2
422 mul x8, x5, x12
423 umulh x28, x5, x12
424 adds x24, x24, x27 // C3
425 adcs x25, x25, x9
426 adc x26, x26, xzr
427
428 adds x25, x25, x8 // C4
429 adc x26, x26, x28 // C5
430
431 // Restore x8,x9
432 ldp x8,x9, [x2,#32]
433
434 neg x1, x1
435
436 // x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
437 subs x8, x8, x21
438 sbcs x9, x9, x22
439 sbcs x19, x19, x23
440 sbcs x20, x20, x24
441 sbcs x14, x14, x25
442 sbcs x15, x15, x26
443 sbcs x16, x16, xzr
444 sbcs x17, x17, xzr
445 sbc x7, x7, xzr
446
447 // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
448 stp x8, x9, [x2,#32]
449 stp x19, x20, [x2,#48]
450
451 adds x1, x1, #1
452 adcs x14, x14, x21
453 adcs x15, x15, x22
454 adcs x16, x16, x23
455 adcs x17, x17, x24
456 adcs x25, x7, x25
457 adc x26, x26, xzr
458
459 stp x14, x15, [x2,#64]
460 stp x16, x17, [x2,#80]
461 stp x25, x26, [x2,#96]
462
463 ldp x19, x20, [x29,#16]
464 ldp x21, x22, [x29,#32]
465 ldp x23, x24, [x29,#48]
466 ldp x25, x26, [x29,#64]
467 ldp x27, x28, [x29,#80]
468 ldp x29, x30, [sp],#96
469 ret
470.globl sike_fprdc
471.hidden sike_fprdc
472.align 4
473sike_fprdc:
474 stp x29, x30, [sp, #-96]!
475 add x29, sp, xzr
476 stp x19, x20, [sp,#16]
477 stp x21, x22, [sp,#32]
478 stp x23, x24, [sp,#48]
479 stp x25, x26, [sp,#64]
480 stp x27, x28, [sp,#80]
481
482 ldp x2, x3, [x0,#0] // a[0-1]
483
484 // Load the prime constant
485 adrp x26, .Lp434p1
486 add x26, x26, :lo12:.Lp434p1
487 ldp x23, x24, [x26, #0x0]
488 ldp x25, x26, [x26,#0x10]
489
490 // a[0-1] * p434+1
491 mul x4, x2, x23 // C0
492 umulh x7, x2, x23
493
494 mul x5, x2, x24
495 umulh x6, x2, x24
496
497 mul x10, x3, x23
498 umulh x11, x3, x23
499 adds x5, x5, x7
500 adc x6, x6, xzr
501
502 mul x27, x2, x25
503 umulh x28, x2, x25
504 adds x5, x5, x10 // C1
505 adcs x6, x6, x11
506 adc x7, xzr, xzr
507
508 mul x10, x3, x24
509 umulh x11, x3, x24
510 adds x6, x6, x27
511 adcs x7, x7, x28
512 adc x8, xzr, xzr
513
514 mul x27, x2, x26
515 umulh x28, x2, x26
516 adds x6, x6, x10 // C2
517 adcs x7, x7, x11
518 adc x8, x8, xzr
519
520 mul x10, x3, x25
521 umulh x11, x3, x25
522 adds x7, x7, x27
523 adcs x8, x8, x28
524 adc x9, xzr, xzr
525
526 mul x27, x3, x26
527 umulh x28, x3, x26
528 adds x7, x7, x10 // C3
529 adcs x8, x8, x11
530 adc x9, x9, xzr
531 adds x8, x8, x27 // C4
532 adc x9, x9, x28 // C5
533
534
535
536 ldp x10, x11, [x0, #0x18]
537 ldp x12, x13, [x0, #0x28]
538 ldp x14, x15, [x0, #0x38]
539 ldp x16, x17, [x0, #0x48]
540 ldp x19, x20, [x0, #0x58]
541 ldr x21, [x0, #0x68]
542
543 adds x10, x10, x4
544 adcs x11, x11, x5
545 adcs x12, x12, x6
546 adcs x13, x13, x7
547 adcs x14, x14, x8
548 adcs x15, x15, x9
549 adcs x22, x16, xzr
550 adcs x17, x17, xzr
551 adcs x19, x19, xzr
552 adcs x20, x20, xzr
553 adc x21, x21, xzr
554
555 ldr x2, [x0,#0x10] // a[2]
556 // a[2-3] * p434+1
557 mul x4, x2, x23 // C0
558 umulh x7, x2, x23
559
560 mul x5, x2, x24
561 umulh x6, x2, x24
562
563 mul x0, x10, x23
564 umulh x3, x10, x23
565 adds x5, x5, x7
566 adc x6, x6, xzr
567
568 mul x27, x2, x25
569 umulh x28, x2, x25
570 adds x5, x5, x0 // C1
571 adcs x6, x6, x3
572 adc x7, xzr, xzr
573
574 mul x0, x10, x24
575 umulh x3, x10, x24
576 adds x6, x6, x27
577 adcs x7, x7, x28
578 adc x8, xzr, xzr
579
580 mul x27, x2, x26
581 umulh x28, x2, x26
582 adds x6, x6, x0 // C2
583 adcs x7, x7, x3
584 adc x8, x8, xzr
585
586 mul x0, x10, x25
587 umulh x3, x10, x25
588 adds x7, x7, x27
589 adcs x8, x8, x28
590 adc x9, xzr, xzr
591
592 mul x27, x10, x26
593 umulh x28, x10, x26
594 adds x7, x7, x0 // C3
595 adcs x8, x8, x3
596 adc x9, x9, xzr
597 adds x8, x8, x27 // C4
598 adc x9, x9, x28 // C5
599
600
601
602 adds x12, x12, x4
603 adcs x13, x13, x5
604 adcs x14, x14, x6
605 adcs x15, x15, x7
606 adcs x16, x22, x8
607 adcs x17, x17, x9
608 adcs x22, x19, xzr
609 adcs x20, x20, xzr
610 adc x21, x21, xzr
611
612 mul x4, x11, x23 // C0
613 umulh x7, x11, x23
614
615 mul x5, x11, x24
616 umulh x6, x11, x24
617
618 mul x10, x12, x23
619 umulh x3, x12, x23
620 adds x5, x5, x7
621 adc x6, x6, xzr
622
623 mul x27, x11, x25
624 umulh x28, x11, x25
625 adds x5, x5, x10 // C1
626 adcs x6, x6, x3
627 adc x7, xzr, xzr
628
629 mul x10, x12, x24
630 umulh x3, x12, x24
631 adds x6, x6, x27
632 adcs x7, x7, x28
633 adc x8, xzr, xzr
634
635 mul x27, x11, x26
636 umulh x28, x11, x26
637 adds x6, x6, x10 // C2
638 adcs x7, x7, x3
639 adc x8, x8, xzr
640
641 mul x10, x12, x25
642 umulh x3, x12, x25
643 adds x7, x7, x27
644 adcs x8, x8, x28
645 adc x9, xzr, xzr
646
647 mul x27, x12, x26
648 umulh x28, x12, x26
649 adds x7, x7, x10 // C3
650 adcs x8, x8, x3
651 adc x9, x9, xzr
652 adds x8, x8, x27 // C4
653 adc x9, x9, x28 // C5
654
655
656 adds x14, x14, x4
657 adcs x15, x15, x5
658 adcs x16, x16, x6
659 adcs x17, x17, x7
660 adcs x19, x22, x8
661 adcs x20, x20, x9
662 adc x22, x21, xzr
663
664 stp x14, x15, [x1, #0x0] // C0, C1
665
666 mul x4, x13, x23 // C0
667 umulh x10, x13, x23
668
669 mul x5, x13, x24
670 umulh x27, x13, x24
671 adds x5, x5, x10 // C1
672 adc x10, xzr, xzr
673
674 mul x6, x13, x25
675 umulh x28, x13, x25
676 adds x27, x10, x27
677 adcs x6, x6, x27 // C2
678 adc x10, xzr, xzr
679
680 mul x7, x13, x26
681 umulh x8, x13, x26
682 adds x28, x10, x28
683 adcs x7, x7, x28 // C3
684 adc x8, x8, xzr // C4
685
686 adds x16, x16, x4
687 adcs x17, x17, x5
688 adcs x19, x19, x6
689 adcs x20, x20, x7
690 adc x21, x22, x8
691
692 str x16, [x1, #0x10]
693 stp x17, x19, [x1, #0x18]
694 stp x20, x21, [x1, #0x28]
695
696 ldp x19, x20, [x29,#16]
697 ldp x21, x22, [x29,#32]
698 ldp x23, x24, [x29,#48]
699 ldp x25, x26, [x29,#64]
700 ldp x27, x28, [x29,#80]
701 ldp x29, x30, [sp],#96
702 ret
703.globl sike_fpadd
704.hidden sike_fpadd
705.align 4
706sike_fpadd:
707 stp x29,x30, [sp,#-16]!
708 add x29, sp, #0
709
710 ldp x3, x4, [x0,#0]
711 ldp x5, x6, [x0,#16]
712 ldp x7, x8, [x0,#32]
713 ldr x9, [x0,#48]
714 ldp x11, x12, [x1,#0]
715 ldp x13, x14, [x1,#16]
716 ldp x15, x16, [x1,#32]
717 ldr x17, [x1,#48]
718
719 // Add a + b
720 adds x3, x3, x11
721 adcs x4, x4, x12
722 adcs x5, x5, x13
723 adcs x6, x6, x14
724 adcs x7, x7, x15
725 adcs x8, x8, x16
726 adc x9, x9, x17
727
728 // Subtract 2xp434
729 adrp x17, .Lp434x2
730 add x17, x17, :lo12:.Lp434x2
731 ldp x11, x12, [x17, #0]
732 ldp x13, x14, [x17, #16]
733 ldp x15, x16, [x17, #32]
734 subs x3, x3, x11
735 sbcs x4, x4, x12
736 sbcs x5, x5, x12
737 sbcs x6, x6, x13
738 sbcs x7, x7, x14
739 sbcs x8, x8, x15
740 sbcs x9, x9, x16
741 sbc x0, xzr, xzr // x0 can be reused now
742
743 // Add 2xp434 anded with the mask in x0
744 and x11, x11, x0
745 and x12, x12, x0
746 and x13, x13, x0
747 and x14, x14, x0
748 and x15, x15, x0
749 and x16, x16, x0
750
751 adds x3, x3, x11
752 adcs x4, x4, x12
753 adcs x5, x5, x12
754 adcs x6, x6, x13
755 adcs x7, x7, x14
756 adcs x8, x8, x15
757 adc x9, x9, x16
758
759 stp x3, x4, [x2,#0]
760 stp x5, x6, [x2,#16]
761 stp x7, x8, [x2,#32]
762 str x9, [x2,#48]
763
764 ldp x29, x30, [sp],#16
765 ret
766.globl sike_fpsub
767.hidden sike_fpsub
768.align 4
769sike_fpsub:
770 stp x29, x30, [sp,#-16]!
771 add x29, sp, #0
772
773 ldp x3, x4, [x0,#0]
774 ldp x5, x6, [x0,#16]
775 ldp x7, x8, [x0,#32]
776 ldr x9, [x0,#48]
777 ldp x11, x12, [x1,#0]
778 ldp x13, x14, [x1,#16]
779 ldp x15, x16, [x1,#32]
780 ldr x17, [x1,#48]
781
782 // Subtract a - b
783 subs x3, x3, x11
784 sbcs x4, x4, x12
785 sbcs x5, x5, x13
786 sbcs x6, x6, x14
787 sbcs x7, x7, x15
788 sbcs x8, x8, x16
789 sbcs x9, x9, x17
790 sbc x0, xzr, xzr
791
792 // Add 2xp434 anded with the mask in x0
793 adrp x17, .Lp434x2
794 add x17, x17, :lo12:.Lp434x2
795
796 // First half
797 ldp x11, x12, [x17, #0]
798 ldp x13, x14, [x17, #16]
799 ldp x15, x16, [x17, #32]
800
801 // Add 2xp434 anded with the mask in x0
802 and x11, x11, x0
803 and x12, x12, x0
804 and x13, x13, x0
805 and x14, x14, x0
806 and x15, x15, x0
807 and x16, x16, x0
808
809 adds x3, x3, x11
810 adcs x4, x4, x12
811 adcs x5, x5, x12
812 adcs x6, x6, x13
813 adcs x7, x7, x14
814 adcs x8, x8, x15
815 adc x9, x9, x16
816
817 stp x3, x4, [x2,#0]
818 stp x5, x6, [x2,#16]
819 stp x7, x8, [x2,#32]
820 str x9, [x2,#48]
821
822 ldp x29, x30, [sp],#16
823 ret
824.globl sike_mpadd_asm
825.hidden sike_mpadd_asm
826.align 4
827sike_mpadd_asm:
828 stp x29, x30, [sp,#-16]!
829 add x29, sp, #0
830
831 ldp x3, x4, [x0,#0]
832 ldp x5, x6, [x0,#16]
833 ldp x7, x8, [x0,#32]
834 ldr x9, [x0,#48]
835 ldp x11, x12, [x1,#0]
836 ldp x13, x14, [x1,#16]
837 ldp x15, x16, [x1,#32]
838 ldr x17, [x1,#48]
839
840 adds x3, x3, x11
841 adcs x4, x4, x12
842 adcs x5, x5, x13
843 adcs x6, x6, x14
844 adcs x7, x7, x15
845 adcs x8, x8, x16
846 adc x9, x9, x17
847
848 stp x3, x4, [x2,#0]
849 stp x5, x6, [x2,#16]
850 stp x7, x8, [x2,#32]
851 str x9, [x2,#48]
852
853 ldp x29, x30, [sp],#16
854 ret
855.globl sike_mpsubx2_asm
856.hidden sike_mpsubx2_asm
857.align 4
858sike_mpsubx2_asm:
859 stp x29, x30, [sp,#-16]!
860 add x29, sp, #0
861
862 ldp x3, x4, [x0,#0]
863 ldp x5, x6, [x0,#16]
864 ldp x11, x12, [x1,#0]
865 ldp x13, x14, [x1,#16]
866 subs x3, x3, x11
867 sbcs x4, x4, x12
868 sbcs x5, x5, x13
869 sbcs x6, x6, x14
870 ldp x7, x8, [x0,#32]
871 ldp x9, x10, [x0,#48]
872 ldp x11, x12, [x1,#32]
873 ldp x13, x14, [x1,#48]
874 sbcs x7, x7, x11
875 sbcs x8, x8, x12
876 sbcs x9, x9, x13
877 sbcs x10, x10, x14
878
879 stp x3, x4, [x2,#0]
880 stp x5, x6, [x2,#16]
881 stp x7, x8, [x2,#32]
882 stp x9, x10, [x2,#48]
883
884 ldp x3, x4, [x0,#64]
885 ldp x5, x6, [x0,#80]
886 ldp x11, x12, [x1,#64]
887 ldp x13, x14, [x1,#80]
888 sbcs x3, x3, x11
889 sbcs x4, x4, x12
890 sbcs x5, x5, x13
891 sbcs x6, x6, x14
892 ldp x7, x8, [x0,#96]
893 ldp x11, x12, [x1,#96]
894 sbcs x7, x7, x11
895 sbcs x8, x8, x12
896 sbc x0, xzr, xzr
897
898 stp x3, x4, [x2,#64]
899 stp x5, x6, [x2,#80]
900 stp x7, x8, [x2,#96]
901
902 ldp x29, x30, [sp],#16
903 ret
904.globl sike_mpdblsubx2_asm
905.hidden sike_mpdblsubx2_asm
906.align 4
907sike_mpdblsubx2_asm:
908 stp x29, x30, [sp, #-16]!
909 add x29, sp, #0
910
911 ldp x3, x4, [x2, #0]
912 ldp x5, x6, [x2,#16]
913 ldp x7, x8, [x2,#32]
914
915 ldp x11, x12, [x0, #0]
916 ldp x13, x14, [x0,#16]
917 ldp x15, x16, [x0,#32]
918
919 subs x3, x3, x11
920 sbcs x4, x4, x12
921 sbcs x5, x5, x13
922 sbcs x6, x6, x14
923 sbcs x7, x7, x15
924 sbcs x8, x8, x16
925
926 // x9 stores carry
927 adc x9, xzr, xzr
928
929 ldp x11, x12, [x1, #0]
930 ldp x13, x14, [x1,#16]
931 ldp x15, x16, [x1,#32]
932 subs x3, x3, x11
933 sbcs x4, x4, x12
934 sbcs x5, x5, x13
935 sbcs x6, x6, x14
936 sbcs x7, x7, x15
937 sbcs x8, x8, x16
938 adc x9, x9, xzr
939
940 stp x3, x4, [x2, #0]
941 stp x5, x6, [x2,#16]
942 stp x7, x8, [x2,#32]
943
944 ldp x3, x4, [x2,#48]
945 ldp x5, x6, [x2,#64]
946 ldp x7, x8, [x2,#80]
947
948 ldp x11, x12, [x0,#48]
949 ldp x13, x14, [x0,#64]
950 ldp x15, x16, [x0,#80]
951
952 // x9 = 2 - x9
953 neg x9, x9
954 add x9, x9, #2
955
956 subs x3, x3, x9
957 sbcs x3, x3, x11
958 sbcs x4, x4, x12
959 sbcs x5, x5, x13
960 sbcs x6, x6, x14
961 sbcs x7, x7, x15
962 sbcs x8, x8, x16
963 adc x9, xzr, xzr
964
965 ldp x11, x12, [x1,#48]
966 ldp x13, x14, [x1,#64]
967 ldp x15, x16, [x1,#80]
968 subs x3, x3, x11
969 sbcs x4, x4, x12
970 sbcs x5, x5, x13
971 sbcs x6, x6, x14
972 sbcs x7, x7, x15
973 sbcs x8, x8, x16
974 adc x9, x9, xzr
975
976 stp x3, x4, [x2,#48]
977 stp x5, x6, [x2,#64]
978 stp x7, x8, [x2,#80]
979
980 ldp x3, x4, [x2,#96]
981 ldp x11, x12, [x0,#96]
982 ldp x13, x14, [x1,#96]
983
984 // x9 = 2 - x9
985 neg x9, x9
986 add x9, x9, #2
987
988 subs x3, x3, x9
989 sbcs x3, x3, x11
990 sbcs x4, x4, x12
991 subs x3, x3, x13
992 sbc x4, x4, x14
993 stp x3, x4, [x2,#96]
994
995 ldp x29, x30, [sp],#16
996 ret
997#endif
998#endif // !OPENSSL_NO_ASM