blob: c48863f65ffaa867ad2a324f25d9d9ba18552ffa [file] [log] [blame]
Pete Bentley0c61efe2019-08-13 09:32:23 +01001// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15.section __TEXT,__const
16
17# p434 x 2
18Lp434x2:
19.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
20.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
21.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
22
23# p434 + 1
24Lp434p1:
25.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
26.quad 0x6CFC5FD681C52056, 0x0002341F27177344
27
28.text
29.globl _sike_mpmul
30.private_extern _sike_mpmul
31.align 4
32_sike_mpmul:
33 stp x29, x30, [sp,#-96]!
34 add x29, sp, #0
35 stp x19, x20, [sp,#16]
36 stp x21, x22, [sp,#32]
37 stp x23, x24, [sp,#48]
38 stp x25, x26, [sp,#64]
39 stp x27, x28, [sp,#80]
40
41 ldp x3, x4, [x0]
42 ldp x5, x6, [x0,#16]
43 ldp x7, x8, [x0,#32]
44 ldr x9, [x0,#48]
45 ldp x10, x11, [x1,#0]
46 ldp x12, x13, [x1,#16]
47 ldp x14, x15, [x1,#32]
48 ldr x16, [x1,#48]
49
50 // x3-x7 <- AH + AL, x7 <- carry
51 adds x3, x3, x7
52 adcs x4, x4, x8
53 adcs x5, x5, x9
54 adcs x6, x6, xzr
55 adc x7, xzr, xzr
56
57 // x10-x13 <- BH + BL, x8 <- carry
58 adds x10, x10, x14
59 adcs x11, x11, x15
60 adcs x12, x12, x16
61 adcs x13, x13, xzr
62 adc x8, xzr, xzr
63
64 // x9 <- combined carry
65 and x9, x7, x8
66 // x7-x8 <- mask
67 sub x7, xzr, x7
68 sub x8, xzr, x8
69
70 // x15-x19 <- masked (BH + BL)
71 and x14, x10, x7
72 and x15, x11, x7
73 and x16, x12, x7
74 and x17, x13, x7
75
76 // x20-x23 <- masked (AH + AL)
77 and x20, x3, x8
78 and x21, x4, x8
79 and x22, x5, x8
80 and x23, x6, x8
81
82 // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
83 adds x14, x14, x20
84 adcs x15, x15, x21
85 adcs x16, x16, x22
86 adcs x17, x17, x23
87 adc x7, x9, xzr
88
89 // x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
90 stp x3, x4, [x2,#0]
91 // A0-A1 <- AH + AL, T0 <- mask
92 adds x3, x3, x5
93 adcs x4, x4, x6
94 adc x25, xzr, xzr
95
96 // C6, T1 <- BH + BL, C7 <- mask
97 adds x23, x10, x12
98 adcs x26, x11, x13
99 adc x24, xzr, xzr
100
101 // C0-C1 <- masked (BH + BL)
102 sub x19, xzr, x25
103 sub x20, xzr, x24
104 and x8, x23, x19
105 and x9, x26, x19
106
107 // C4-C5 <- masked (AH + AL), T0 <- combined carry
108 and x21, x3, x20
109 and x22, x4, x20
110 mul x19, x3, x23
111 mul x20, x3, x26
112 and x25, x25, x24
113
114 // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
115 adds x8, x21, x8
116 umulh x21, x3, x26
117 adcs x9, x22, x9
118 umulh x22, x3, x23
119 adc x25, x25, xzr
120
121 // C2-C5 <- (AH+AL) x (BH+BL), low part
122 mul x3, x4, x23
123 umulh x23, x4, x23
124 adds x20, x20, x22
125 adc x21, x21, xzr
126
127 mul x24, x4, x26
128 umulh x26, x4, x26
129 adds x20, x20, x3
130 adcs x21, x21, x23
131 adc x22, xzr, xzr
132
133 adds x21, x21, x24
134 adc x22, x22, x26
135
136 ldp x3, x4, [x2,#0]
137
138 // C2-C5, T0 <- (AH+AL) x (BH+BL), final part
139 adds x21, x8, x21
140 umulh x24, x3, x10
141 umulh x26, x3, x11
142 adcs x22, x9, x22
143 mul x8, x3, x10
144 mul x9, x3, x11
145 adc x25, x25, xzr
146
147 // C0-C1, T1, C7 <- AL x BL
148 mul x3, x4, x10
149 umulh x10, x4, x10
150 adds x9, x9, x24
151 adc x26, x26, xzr
152
153 mul x23, x4, x11
154 umulh x11, x4, x11
155 adds x9, x9, x3
156 adcs x26, x26, x10
157 adc x24, xzr, xzr
158
159 adds x26, x26, x23
160 adc x24, x24, x11
161
162
163 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
164 mul x3, x5, x12
165 umulh x10, x5, x12
166 subs x19, x19, x8
167 sbcs x20, x20, x9
168 sbcs x21, x21, x26
169 mul x4, x5, x13
170 umulh x23, x5, x13
171 sbcs x22, x22, x24
172 sbc x25, x25, xzr
173
174 // A0, A1, C6, B0 <- AH x BH
175 mul x5, x6, x12
176 umulh x12, x6, x12
177 adds x4, x4, x10
178 adc x23, x23, xzr
179
180 mul x11, x6, x13
181 umulh x13, x6, x13
182 adds x4, x4, x5
183 adcs x23, x23, x12
184 adc x10, xzr, xzr
185
186 adds x23, x23, x11
187 adc x10, x10, x13
188
189
190 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
191 subs x19, x19, x3
192 sbcs x20, x20, x4
193 sbcs x21, x21, x23
194 sbcs x22, x22, x10
195 sbc x25, x25, xzr
196
197 adds x19, x19, x26
198 adcs x20, x20, x24
199 adcs x21, x21, x3
200 adcs x22, x22, x4
201 adcs x23, x25, x23
202 adc x24, x10, xzr
203
204
205 // x15-x19, x7 <- (AH+AL) x (BH+BL), final step
206 adds x14, x14, x21
207 adcs x15, x15, x22
208 adcs x16, x16, x23
209 adcs x17, x17, x24
210 adc x7, x7, xzr
211
212 // Load AL
213 ldp x3, x4, [x0]
214 ldp x5, x6, [x0,#16]
215 // Load BL
216 ldp x10, x11, [x1,#0]
217 ldp x12, x13, [x1,#16]
218
219 // Temporarily store x8 in x2
220 stp x8, x9, [x2,#0]
221 // x21-x28 <- AL x BL
222 // A0-A1 <- AH + AL, T0 <- mask
223 adds x3, x3, x5
224 adcs x4, x4, x6
225 adc x8, xzr, xzr
226
227 // C6, T1 <- BH + BL, C7 <- mask
228 adds x27, x10, x12
229 adcs x9, x11, x13
230 adc x28, xzr, xzr
231
232 // C0-C1 <- masked (BH + BL)
233 sub x23, xzr, x8
234 sub x24, xzr, x28
235 and x21, x27, x23
236 and x22, x9, x23
237
238 // C4-C5 <- masked (AH + AL), T0 <- combined carry
239 and x25, x3, x24
240 and x26, x4, x24
241 mul x23, x3, x27
242 mul x24, x3, x9
243 and x8, x8, x28
244
245 // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
246 adds x21, x25, x21
247 umulh x25, x3, x9
248 adcs x22, x26, x22
249 umulh x26, x3, x27
250 adc x8, x8, xzr
251
252 // C2-C5 <- (AH+AL) x (BH+BL), low part
253 mul x3, x4, x27
254 umulh x27, x4, x27
255 adds x24, x24, x26
256 adc x25, x25, xzr
257
258 mul x28, x4, x9
259 umulh x9, x4, x9
260 adds x24, x24, x3
261 adcs x25, x25, x27
262 adc x26, xzr, xzr
263
264 adds x25, x25, x28
265 adc x26, x26, x9
266
267 ldp x3, x4, [x0,#0]
268
269 // C2-C5, T0 <- (AH+AL) x (BH+BL), final part
270 adds x25, x21, x25
271 umulh x28, x3, x10
272 umulh x9, x3, x11
273 adcs x26, x22, x26
274 mul x21, x3, x10
275 mul x22, x3, x11
276 adc x8, x8, xzr
277
278 // C0-C1, T1, C7 <- AL x BL
279 mul x3, x4, x10
280 umulh x10, x4, x10
281 adds x22, x22, x28
282 adc x9, x9, xzr
283
284 mul x27, x4, x11
285 umulh x11, x4, x11
286 adds x22, x22, x3
287 adcs x9, x9, x10
288 adc x28, xzr, xzr
289
290 adds x9, x9, x27
291 adc x28, x28, x11
292
293
294 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
295 mul x3, x5, x12
296 umulh x10, x5, x12
297 subs x23, x23, x21
298 sbcs x24, x24, x22
299 sbcs x25, x25, x9
300 mul x4, x5, x13
301 umulh x27, x5, x13
302 sbcs x26, x26, x28
303 sbc x8, x8, xzr
304
305 // A0, A1, C6, B0 <- AH x BH
306 mul x5, x6, x12
307 umulh x12, x6, x12
308 adds x4, x4, x10
309 adc x27, x27, xzr
310
311 mul x11, x6, x13
312 umulh x13, x6, x13
313 adds x4, x4, x5
314 adcs x27, x27, x12
315 adc x10, xzr, xzr
316
317 adds x27, x27, x11
318 adc x10, x10, x13
319
320
321 // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
322 subs x23, x23, x3
323 sbcs x24, x24, x4
324 sbcs x25, x25, x27
325 sbcs x26, x26, x10
326 sbc x8, x8, xzr
327
328 adds x23, x23, x9
329 adcs x24, x24, x28
330 adcs x25, x25, x3
331 adcs x26, x26, x4
332 adcs x27, x8, x27
333 adc x28, x10, xzr
334
335 // Restore x8
336 ldp x8, x9, [x2,#0]
337
338 // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
339 subs x8, x8, x21
340 sbcs x9, x9, x22
341 sbcs x19, x19, x23
342 sbcs x20, x20, x24
343 sbcs x14, x14, x25
344 sbcs x15, x15, x26
345 sbcs x16, x16, x27
346 sbcs x17, x17, x28
347 sbc x7, x7, xzr
348
349 // Store ALxBL, low
350 stp x21, x22, [x2]
351 stp x23, x24, [x2,#16]
352
353 // Load AH
354 ldp x3, x4, [x0,#32]
355 ldr x5, [x0,#48]
356 // Load BH
357 ldp x10, x11, [x1,#32]
358 ldr x12, [x1,#48]
359
360 adds x8, x8, x25
361 adcs x9, x9, x26
362 adcs x19, x19, x27
363 adcs x20, x20, x28
364 adc x1, xzr, xzr
365
366 add x0, x0, #32
367 // Temporarily store x8,x9 in x2
368 stp x8,x9, [x2,#32]
369 // x21-x28 <- AH x BH
370
371 // A0 * B0
372 mul x21, x3, x10 // C0
373 umulh x24, x3, x10
374
375 // A0 * B1
376 mul x22, x3, x11
377 umulh x23, x3, x11
378
379 // A1 * B0
380 mul x8, x4, x10
381 umulh x9, x4, x10
382 adds x22, x22, x24
383 adc x23, x23, xzr
384
385 // A0 * B2
386 mul x27, x3, x12
387 umulh x28, x3, x12
388 adds x22, x22, x8 // C1
389 adcs x23, x23, x9
390 adc x24, xzr, xzr
391
392 // A2 * B0
393 mul x8, x5, x10
394 umulh x25, x5, x10
395 adds x23, x23, x27
396 adcs x24, x24, x25
397 adc x25, xzr, xzr
398
399 // A1 * B1
400 mul x27, x4, x11
401 umulh x9, x4, x11
402 adds x23, x23, x8
403 adcs x24, x24, x28
404 adc x25, x25, xzr
405
406 // A1 * B2
407 mul x8, x4, x12
408 umulh x28, x4, x12
409 adds x23, x23, x27 // C2
410 adcs x24, x24, x9
411 adc x25, x25, xzr
412
413 // A2 * B1
414 mul x27, x5, x11
415 umulh x9, x5, x11
416 adds x24, x24, x8
417 adcs x25, x25, x28
418 adc x26, xzr, xzr
419
420 // A2 * B2
421 mul x8, x5, x12
422 umulh x28, x5, x12
423 adds x24, x24, x27 // C3
424 adcs x25, x25, x9
425 adc x26, x26, xzr
426
427 adds x25, x25, x8 // C4
428 adc x26, x26, x28 // C5
429
430 // Restore x8,x9
431 ldp x8,x9, [x2,#32]
432
433 neg x1, x1
434
435 // x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
436 subs x8, x8, x21
437 sbcs x9, x9, x22
438 sbcs x19, x19, x23
439 sbcs x20, x20, x24
440 sbcs x14, x14, x25
441 sbcs x15, x15, x26
442 sbcs x16, x16, xzr
443 sbcs x17, x17, xzr
444 sbc x7, x7, xzr
445
446 // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
447 stp x8, x9, [x2,#32]
448 stp x19, x20, [x2,#48]
449
450 adds x1, x1, #1
451 adcs x14, x14, x21
452 adcs x15, x15, x22
453 adcs x16, x16, x23
454 adcs x17, x17, x24
455 adcs x25, x7, x25
456 adc x26, x26, xzr
457
458 stp x14, x15, [x2,#64]
459 stp x16, x17, [x2,#80]
460 stp x25, x26, [x2,#96]
461
462 ldp x19, x20, [x29,#16]
463 ldp x21, x22, [x29,#32]
464 ldp x23, x24, [x29,#48]
465 ldp x25, x26, [x29,#64]
466 ldp x27, x28, [x29,#80]
467 ldp x29, x30, [sp],#96
468 ret
469.globl _sike_fprdc
470.private_extern _sike_fprdc
471.align 4
472_sike_fprdc:
473 stp x29, x30, [sp, #-96]!
474 add x29, sp, xzr
475 stp x19, x20, [sp,#16]
476 stp x21, x22, [sp,#32]
477 stp x23, x24, [sp,#48]
478 stp x25, x26, [sp,#64]
479 stp x27, x28, [sp,#80]
480
481 ldp x2, x3, [x0,#0] // a[0-1]
482
483 // Load the prime constant
484 adrp x26, Lp434p1@PAGE
485 add x26, x26, Lp434p1@PAGEOFF
486 ldp x23, x24, [x26, #0x0]
487 ldp x25, x26, [x26,#0x10]
488
489 // a[0-1] * p434+1
490 mul x4, x2, x23 // C0
491 umulh x7, x2, x23
492
493 mul x5, x2, x24
494 umulh x6, x2, x24
495
496 mul x10, x3, x23
497 umulh x11, x3, x23
498 adds x5, x5, x7
499 adc x6, x6, xzr
500
501 mul x27, x2, x25
502 umulh x28, x2, x25
503 adds x5, x5, x10 // C1
504 adcs x6, x6, x11
505 adc x7, xzr, xzr
506
507 mul x10, x3, x24
508 umulh x11, x3, x24
509 adds x6, x6, x27
510 adcs x7, x7, x28
511 adc x8, xzr, xzr
512
513 mul x27, x2, x26
514 umulh x28, x2, x26
515 adds x6, x6, x10 // C2
516 adcs x7, x7, x11
517 adc x8, x8, xzr
518
519 mul x10, x3, x25
520 umulh x11, x3, x25
521 adds x7, x7, x27
522 adcs x8, x8, x28
523 adc x9, xzr, xzr
524
525 mul x27, x3, x26
526 umulh x28, x3, x26
527 adds x7, x7, x10 // C3
528 adcs x8, x8, x11
529 adc x9, x9, xzr
530 adds x8, x8, x27 // C4
531 adc x9, x9, x28 // C5
532
533
534
535 ldp x10, x11, [x0, #0x18]
536 ldp x12, x13, [x0, #0x28]
537 ldp x14, x15, [x0, #0x38]
538 ldp x16, x17, [x0, #0x48]
539 ldp x19, x20, [x0, #0x58]
540 ldr x21, [x0, #0x68]
541
542 adds x10, x10, x4
543 adcs x11, x11, x5
544 adcs x12, x12, x6
545 adcs x13, x13, x7
546 adcs x14, x14, x8
547 adcs x15, x15, x9
548 adcs x22, x16, xzr
549 adcs x17, x17, xzr
550 adcs x19, x19, xzr
551 adcs x20, x20, xzr
552 adc x21, x21, xzr
553
554 ldr x2, [x0,#0x10] // a[2]
555 // a[2-3] * p434+1
556 mul x4, x2, x23 // C0
557 umulh x7, x2, x23
558
559 mul x5, x2, x24
560 umulh x6, x2, x24
561
562 mul x0, x10, x23
563 umulh x3, x10, x23
564 adds x5, x5, x7
565 adc x6, x6, xzr
566
567 mul x27, x2, x25
568 umulh x28, x2, x25
569 adds x5, x5, x0 // C1
570 adcs x6, x6, x3
571 adc x7, xzr, xzr
572
573 mul x0, x10, x24
574 umulh x3, x10, x24
575 adds x6, x6, x27
576 adcs x7, x7, x28
577 adc x8, xzr, xzr
578
579 mul x27, x2, x26
580 umulh x28, x2, x26
581 adds x6, x6, x0 // C2
582 adcs x7, x7, x3
583 adc x8, x8, xzr
584
585 mul x0, x10, x25
586 umulh x3, x10, x25
587 adds x7, x7, x27
588 adcs x8, x8, x28
589 adc x9, xzr, xzr
590
591 mul x27, x10, x26
592 umulh x28, x10, x26
593 adds x7, x7, x0 // C3
594 adcs x8, x8, x3
595 adc x9, x9, xzr
596 adds x8, x8, x27 // C4
597 adc x9, x9, x28 // C5
598
599
600
601 adds x12, x12, x4
602 adcs x13, x13, x5
603 adcs x14, x14, x6
604 adcs x15, x15, x7
605 adcs x16, x22, x8
606 adcs x17, x17, x9
607 adcs x22, x19, xzr
608 adcs x20, x20, xzr
609 adc x21, x21, xzr
610
611 mul x4, x11, x23 // C0
612 umulh x7, x11, x23
613
614 mul x5, x11, x24
615 umulh x6, x11, x24
616
617 mul x10, x12, x23
618 umulh x3, x12, x23
619 adds x5, x5, x7
620 adc x6, x6, xzr
621
622 mul x27, x11, x25
623 umulh x28, x11, x25
624 adds x5, x5, x10 // C1
625 adcs x6, x6, x3
626 adc x7, xzr, xzr
627
628 mul x10, x12, x24
629 umulh x3, x12, x24
630 adds x6, x6, x27
631 adcs x7, x7, x28
632 adc x8, xzr, xzr
633
634 mul x27, x11, x26
635 umulh x28, x11, x26
636 adds x6, x6, x10 // C2
637 adcs x7, x7, x3
638 adc x8, x8, xzr
639
640 mul x10, x12, x25
641 umulh x3, x12, x25
642 adds x7, x7, x27
643 adcs x8, x8, x28
644 adc x9, xzr, xzr
645
646 mul x27, x12, x26
647 umulh x28, x12, x26
648 adds x7, x7, x10 // C3
649 adcs x8, x8, x3
650 adc x9, x9, xzr
651 adds x8, x8, x27 // C4
652 adc x9, x9, x28 // C5
653
654
655 adds x14, x14, x4
656 adcs x15, x15, x5
657 adcs x16, x16, x6
658 adcs x17, x17, x7
659 adcs x19, x22, x8
660 adcs x20, x20, x9
661 adc x22, x21, xzr
662
663 stp x14, x15, [x1, #0x0] // C0, C1
664
665 mul x4, x13, x23 // C0
666 umulh x10, x13, x23
667
668 mul x5, x13, x24
669 umulh x27, x13, x24
670 adds x5, x5, x10 // C1
671 adc x10, xzr, xzr
672
673 mul x6, x13, x25
674 umulh x28, x13, x25
675 adds x27, x10, x27
676 adcs x6, x6, x27 // C2
677 adc x10, xzr, xzr
678
679 mul x7, x13, x26
680 umulh x8, x13, x26
681 adds x28, x10, x28
682 adcs x7, x7, x28 // C3
683 adc x8, x8, xzr // C4
684
685 adds x16, x16, x4
686 adcs x17, x17, x5
687 adcs x19, x19, x6
688 adcs x20, x20, x7
689 adc x21, x22, x8
690
691 str x16, [x1, #0x10]
692 stp x17, x19, [x1, #0x18]
693 stp x20, x21, [x1, #0x28]
694
695 ldp x19, x20, [x29,#16]
696 ldp x21, x22, [x29,#32]
697 ldp x23, x24, [x29,#48]
698 ldp x25, x26, [x29,#64]
699 ldp x27, x28, [x29,#80]
700 ldp x29, x30, [sp],#96
701 ret
702.globl _sike_fpadd
703.private_extern _sike_fpadd
704.align 4
705_sike_fpadd:
706 stp x29,x30, [sp,#-16]!
707 add x29, sp, #0
708
709 ldp x3, x4, [x0,#0]
710 ldp x5, x6, [x0,#16]
711 ldp x7, x8, [x0,#32]
712 ldr x9, [x0,#48]
713 ldp x11, x12, [x1,#0]
714 ldp x13, x14, [x1,#16]
715 ldp x15, x16, [x1,#32]
716 ldr x17, [x1,#48]
717
718 // Add a + b
719 adds x3, x3, x11
720 adcs x4, x4, x12
721 adcs x5, x5, x13
722 adcs x6, x6, x14
723 adcs x7, x7, x15
724 adcs x8, x8, x16
725 adc x9, x9, x17
726
727 // Subtract 2xp434
728 adrp x17, Lp434x2@PAGE
729 add x17, x17, Lp434x2@PAGEOFF
730 ldp x11, x12, [x17, #0]
731 ldp x13, x14, [x17, #16]
732 ldp x15, x16, [x17, #32]
733 subs x3, x3, x11
734 sbcs x4, x4, x12
735 sbcs x5, x5, x12
736 sbcs x6, x6, x13
737 sbcs x7, x7, x14
738 sbcs x8, x8, x15
739 sbcs x9, x9, x16
740 sbc x0, xzr, xzr // x0 can be reused now
741
742 // Add 2xp434 anded with the mask in x0
743 and x11, x11, x0
744 and x12, x12, x0
745 and x13, x13, x0
746 and x14, x14, x0
747 and x15, x15, x0
748 and x16, x16, x0
749
750 adds x3, x3, x11
751 adcs x4, x4, x12
752 adcs x5, x5, x12
753 adcs x6, x6, x13
754 adcs x7, x7, x14
755 adcs x8, x8, x15
756 adc x9, x9, x16
757
758 stp x3, x4, [x2,#0]
759 stp x5, x6, [x2,#16]
760 stp x7, x8, [x2,#32]
761 str x9, [x2,#48]
762
763 ldp x29, x30, [sp],#16
764 ret
765.globl _sike_fpsub
766.private_extern _sike_fpsub
767.align 4
768_sike_fpsub:
769 stp x29, x30, [sp,#-16]!
770 add x29, sp, #0
771
772 ldp x3, x4, [x0,#0]
773 ldp x5, x6, [x0,#16]
774 ldp x7, x8, [x0,#32]
775 ldr x9, [x0,#48]
776 ldp x11, x12, [x1,#0]
777 ldp x13, x14, [x1,#16]
778 ldp x15, x16, [x1,#32]
779 ldr x17, [x1,#48]
780
781 // Subtract a - b
782 subs x3, x3, x11
783 sbcs x4, x4, x12
784 sbcs x5, x5, x13
785 sbcs x6, x6, x14
786 sbcs x7, x7, x15
787 sbcs x8, x8, x16
788 sbcs x9, x9, x17
789 sbc x0, xzr, xzr
790
791 // Add 2xp434 anded with the mask in x0
792 adrp x17, Lp434x2@PAGE
793 add x17, x17, Lp434x2@PAGEOFF
794
795 // First half
796 ldp x11, x12, [x17, #0]
797 ldp x13, x14, [x17, #16]
798 ldp x15, x16, [x17, #32]
799
800 // Add 2xp434 anded with the mask in x0
801 and x11, x11, x0
802 and x12, x12, x0
803 and x13, x13, x0
804 and x14, x14, x0
805 and x15, x15, x0
806 and x16, x16, x0
807
808 adds x3, x3, x11
809 adcs x4, x4, x12
810 adcs x5, x5, x12
811 adcs x6, x6, x13
812 adcs x7, x7, x14
813 adcs x8, x8, x15
814 adc x9, x9, x16
815
816 stp x3, x4, [x2,#0]
817 stp x5, x6, [x2,#16]
818 stp x7, x8, [x2,#32]
819 str x9, [x2,#48]
820
821 ldp x29, x30, [sp],#16
822 ret
823.globl _sike_mpadd_asm
824.private_extern _sike_mpadd_asm
825.align 4
826_sike_mpadd_asm:
827 stp x29, x30, [sp,#-16]!
828 add x29, sp, #0
829
830 ldp x3, x4, [x0,#0]
831 ldp x5, x6, [x0,#16]
832 ldp x7, x8, [x0,#32]
833 ldr x9, [x0,#48]
834 ldp x11, x12, [x1,#0]
835 ldp x13, x14, [x1,#16]
836 ldp x15, x16, [x1,#32]
837 ldr x17, [x1,#48]
838
839 adds x3, x3, x11
840 adcs x4, x4, x12
841 adcs x5, x5, x13
842 adcs x6, x6, x14
843 adcs x7, x7, x15
844 adcs x8, x8, x16
845 adc x9, x9, x17
846
847 stp x3, x4, [x2,#0]
848 stp x5, x6, [x2,#16]
849 stp x7, x8, [x2,#32]
850 str x9, [x2,#48]
851
852 ldp x29, x30, [sp],#16
853 ret
854.globl _sike_mpsubx2_asm
855.private_extern _sike_mpsubx2_asm
856.align 4
857_sike_mpsubx2_asm:
858 stp x29, x30, [sp,#-16]!
859 add x29, sp, #0
860
861 ldp x3, x4, [x0,#0]
862 ldp x5, x6, [x0,#16]
863 ldp x11, x12, [x1,#0]
864 ldp x13, x14, [x1,#16]
865 subs x3, x3, x11
866 sbcs x4, x4, x12
867 sbcs x5, x5, x13
868 sbcs x6, x6, x14
869 ldp x7, x8, [x0,#32]
870 ldp x9, x10, [x0,#48]
871 ldp x11, x12, [x1,#32]
872 ldp x13, x14, [x1,#48]
873 sbcs x7, x7, x11
874 sbcs x8, x8, x12
875 sbcs x9, x9, x13
876 sbcs x10, x10, x14
877
878 stp x3, x4, [x2,#0]
879 stp x5, x6, [x2,#16]
880 stp x7, x8, [x2,#32]
881 stp x9, x10, [x2,#48]
882
883 ldp x3, x4, [x0,#64]
884 ldp x5, x6, [x0,#80]
885 ldp x11, x12, [x1,#64]
886 ldp x13, x14, [x1,#80]
887 sbcs x3, x3, x11
888 sbcs x4, x4, x12
889 sbcs x5, x5, x13
890 sbcs x6, x6, x14
891 ldp x7, x8, [x0,#96]
892 ldp x11, x12, [x1,#96]
893 sbcs x7, x7, x11
894 sbcs x8, x8, x12
895 sbc x0, xzr, xzr
896
897 stp x3, x4, [x2,#64]
898 stp x5, x6, [x2,#80]
899 stp x7, x8, [x2,#96]
900
901 ldp x29, x30, [sp],#16
902 ret
903.globl _sike_mpdblsubx2_asm
904.private_extern _sike_mpdblsubx2_asm
905.align 4
906_sike_mpdblsubx2_asm:
907 stp x29, x30, [sp, #-16]!
908 add x29, sp, #0
909
910 ldp x3, x4, [x2, #0]
911 ldp x5, x6, [x2,#16]
912 ldp x7, x8, [x2,#32]
913
914 ldp x11, x12, [x0, #0]
915 ldp x13, x14, [x0,#16]
916 ldp x15, x16, [x0,#32]
917
918 subs x3, x3, x11
919 sbcs x4, x4, x12
920 sbcs x5, x5, x13
921 sbcs x6, x6, x14
922 sbcs x7, x7, x15
923 sbcs x8, x8, x16
924
925 // x9 stores carry
926 adc x9, xzr, xzr
927
928 ldp x11, x12, [x1, #0]
929 ldp x13, x14, [x1,#16]
930 ldp x15, x16, [x1,#32]
931 subs x3, x3, x11
932 sbcs x4, x4, x12
933 sbcs x5, x5, x13
934 sbcs x6, x6, x14
935 sbcs x7, x7, x15
936 sbcs x8, x8, x16
937 adc x9, x9, xzr
938
939 stp x3, x4, [x2, #0]
940 stp x5, x6, [x2,#16]
941 stp x7, x8, [x2,#32]
942
943 ldp x3, x4, [x2,#48]
944 ldp x5, x6, [x2,#64]
945 ldp x7, x8, [x2,#80]
946
947 ldp x11, x12, [x0,#48]
948 ldp x13, x14, [x0,#64]
949 ldp x15, x16, [x0,#80]
950
951 // x9 = 2 - x9
952 neg x9, x9
953 add x9, x9, #2
954
955 subs x3, x3, x9
956 sbcs x3, x3, x11
957 sbcs x4, x4, x12
958 sbcs x5, x5, x13
959 sbcs x6, x6, x14
960 sbcs x7, x7, x15
961 sbcs x8, x8, x16
962 adc x9, xzr, xzr
963
964 ldp x11, x12, [x1,#48]
965 ldp x13, x14, [x1,#64]
966 ldp x15, x16, [x1,#80]
967 subs x3, x3, x11
968 sbcs x4, x4, x12
969 sbcs x5, x5, x13
970 sbcs x6, x6, x14
971 sbcs x7, x7, x15
972 sbcs x8, x8, x16
973 adc x9, x9, xzr
974
975 stp x3, x4, [x2,#48]
976 stp x5, x6, [x2,#64]
977 stp x7, x8, [x2,#80]
978
979 ldp x3, x4, [x2,#96]
980 ldp x11, x12, [x0,#96]
981 ldp x13, x14, [x1,#96]
982
983 // x9 = 2 - x9
984 neg x9, x9
985 add x9, x9, #2
986
987 subs x3, x3, x9
988 sbcs x3, x3, x11
989 sbcs x4, x4, x12
990 subs x3, x3, x13
991 sbc x4, x4, x14
992 stp x3, x4, [x2,#96]
993
994 ldp x29, x30, [sp],#16
995 ret
996#endif // !OPENSSL_NO_ASM