blob: b222040acae93edbe589764b2e1f375670c90686 [file] [log] [blame]
Adam Langleye9ada862015-05-11 17:20:37 -07001%ifidn __OUTPUT_FORMAT__,obj
2section code use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4%ifdef __YASM_VERSION_ID__
5%if __YASM_VERSION_ID__ < 01010000h
6%error yasm version 1.1.0 or later needed.
7%endif
8; Yasm automatically includes .00 and complains about redefining it.
9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10%else
11$@feat.00 equ 1
12%endif
13section .text code align=64
14%else
15section .text code
16%endif
17;extern _OPENSSL_ia32cap_P
18global _bn_mul_add_words
19align 16
20_bn_mul_add_words:
21L$_bn_mul_add_words_begin:
22 lea eax,[_OPENSSL_ia32cap_P]
23 bt DWORD [eax],26
24 jnc NEAR L$000maw_non_sse2
25 mov eax,DWORD [4+esp]
26 mov edx,DWORD [8+esp]
27 mov ecx,DWORD [12+esp]
28 movd mm0,DWORD [16+esp]
29 pxor mm1,mm1
30 jmp NEAR L$001maw_sse2_entry
31align 16
32L$002maw_sse2_unrolled:
33 movd mm3,DWORD [eax]
34 paddq mm1,mm3
35 movd mm2,DWORD [edx]
36 pmuludq mm2,mm0
37 movd mm4,DWORD [4+edx]
38 pmuludq mm4,mm0
39 movd mm6,DWORD [8+edx]
40 pmuludq mm6,mm0
41 movd mm7,DWORD [12+edx]
42 pmuludq mm7,mm0
43 paddq mm1,mm2
44 movd mm3,DWORD [4+eax]
45 paddq mm3,mm4
46 movd mm5,DWORD [8+eax]
47 paddq mm5,mm6
48 movd mm4,DWORD [12+eax]
49 paddq mm7,mm4
50 movd DWORD [eax],mm1
51 movd mm2,DWORD [16+edx]
52 pmuludq mm2,mm0
53 psrlq mm1,32
54 movd mm4,DWORD [20+edx]
55 pmuludq mm4,mm0
56 paddq mm1,mm3
57 movd mm6,DWORD [24+edx]
58 pmuludq mm6,mm0
59 movd DWORD [4+eax],mm1
60 psrlq mm1,32
61 movd mm3,DWORD [28+edx]
62 add edx,32
63 pmuludq mm3,mm0
64 paddq mm1,mm5
65 movd mm5,DWORD [16+eax]
66 paddq mm2,mm5
67 movd DWORD [8+eax],mm1
68 psrlq mm1,32
69 paddq mm1,mm7
70 movd mm5,DWORD [20+eax]
71 paddq mm4,mm5
72 movd DWORD [12+eax],mm1
73 psrlq mm1,32
74 paddq mm1,mm2
75 movd mm5,DWORD [24+eax]
76 paddq mm6,mm5
77 movd DWORD [16+eax],mm1
78 psrlq mm1,32
79 paddq mm1,mm4
80 movd mm5,DWORD [28+eax]
81 paddq mm3,mm5
82 movd DWORD [20+eax],mm1
83 psrlq mm1,32
84 paddq mm1,mm6
85 movd DWORD [24+eax],mm1
86 psrlq mm1,32
87 paddq mm1,mm3
88 movd DWORD [28+eax],mm1
89 lea eax,[32+eax]
90 psrlq mm1,32
91 sub ecx,8
92 jz NEAR L$003maw_sse2_exit
93L$001maw_sse2_entry:
94 test ecx,4294967288
95 jnz NEAR L$002maw_sse2_unrolled
96align 4
97L$004maw_sse2_loop:
98 movd mm2,DWORD [edx]
99 movd mm3,DWORD [eax]
100 pmuludq mm2,mm0
101 lea edx,[4+edx]
102 paddq mm1,mm3
103 paddq mm1,mm2
104 movd DWORD [eax],mm1
105 sub ecx,1
106 psrlq mm1,32
107 lea eax,[4+eax]
108 jnz NEAR L$004maw_sse2_loop
109L$003maw_sse2_exit:
110 movd eax,mm1
111 emms
112 ret
113align 16
114L$000maw_non_sse2:
115 push ebp
116 push ebx
117 push esi
118 push edi
119 ;
120 xor esi,esi
121 mov edi,DWORD [20+esp]
122 mov ecx,DWORD [28+esp]
123 mov ebx,DWORD [24+esp]
124 and ecx,4294967288
125 mov ebp,DWORD [32+esp]
126 push ecx
127 jz NEAR L$005maw_finish
128align 16
129L$006maw_loop:
130 ; Round 0
131 mov eax,DWORD [ebx]
132 mul ebp
133 add eax,esi
134 adc edx,0
135 add eax,DWORD [edi]
136 adc edx,0
137 mov DWORD [edi],eax
138 mov esi,edx
139 ; Round 4
140 mov eax,DWORD [4+ebx]
141 mul ebp
142 add eax,esi
143 adc edx,0
144 add eax,DWORD [4+edi]
145 adc edx,0
146 mov DWORD [4+edi],eax
147 mov esi,edx
148 ; Round 8
149 mov eax,DWORD [8+ebx]
150 mul ebp
151 add eax,esi
152 adc edx,0
153 add eax,DWORD [8+edi]
154 adc edx,0
155 mov DWORD [8+edi],eax
156 mov esi,edx
157 ; Round 12
158 mov eax,DWORD [12+ebx]
159 mul ebp
160 add eax,esi
161 adc edx,0
162 add eax,DWORD [12+edi]
163 adc edx,0
164 mov DWORD [12+edi],eax
165 mov esi,edx
166 ; Round 16
167 mov eax,DWORD [16+ebx]
168 mul ebp
169 add eax,esi
170 adc edx,0
171 add eax,DWORD [16+edi]
172 adc edx,0
173 mov DWORD [16+edi],eax
174 mov esi,edx
175 ; Round 20
176 mov eax,DWORD [20+ebx]
177 mul ebp
178 add eax,esi
179 adc edx,0
180 add eax,DWORD [20+edi]
181 adc edx,0
182 mov DWORD [20+edi],eax
183 mov esi,edx
184 ; Round 24
185 mov eax,DWORD [24+ebx]
186 mul ebp
187 add eax,esi
188 adc edx,0
189 add eax,DWORD [24+edi]
190 adc edx,0
191 mov DWORD [24+edi],eax
192 mov esi,edx
193 ; Round 28
194 mov eax,DWORD [28+ebx]
195 mul ebp
196 add eax,esi
197 adc edx,0
198 add eax,DWORD [28+edi]
199 adc edx,0
200 mov DWORD [28+edi],eax
201 mov esi,edx
202 ;
203 sub ecx,8
204 lea ebx,[32+ebx]
205 lea edi,[32+edi]
206 jnz NEAR L$006maw_loop
207L$005maw_finish:
208 mov ecx,DWORD [32+esp]
209 and ecx,7
210 jnz NEAR L$007maw_finish2
211 jmp NEAR L$008maw_end
212L$007maw_finish2:
213 ; Tail Round 0
214 mov eax,DWORD [ebx]
215 mul ebp
216 add eax,esi
217 adc edx,0
218 add eax,DWORD [edi]
219 adc edx,0
220 dec ecx
221 mov DWORD [edi],eax
222 mov esi,edx
223 jz NEAR L$008maw_end
224 ; Tail Round 1
225 mov eax,DWORD [4+ebx]
226 mul ebp
227 add eax,esi
228 adc edx,0
229 add eax,DWORD [4+edi]
230 adc edx,0
231 dec ecx
232 mov DWORD [4+edi],eax
233 mov esi,edx
234 jz NEAR L$008maw_end
235 ; Tail Round 2
236 mov eax,DWORD [8+ebx]
237 mul ebp
238 add eax,esi
239 adc edx,0
240 add eax,DWORD [8+edi]
241 adc edx,0
242 dec ecx
243 mov DWORD [8+edi],eax
244 mov esi,edx
245 jz NEAR L$008maw_end
246 ; Tail Round 3
247 mov eax,DWORD [12+ebx]
248 mul ebp
249 add eax,esi
250 adc edx,0
251 add eax,DWORD [12+edi]
252 adc edx,0
253 dec ecx
254 mov DWORD [12+edi],eax
255 mov esi,edx
256 jz NEAR L$008maw_end
257 ; Tail Round 4
258 mov eax,DWORD [16+ebx]
259 mul ebp
260 add eax,esi
261 adc edx,0
262 add eax,DWORD [16+edi]
263 adc edx,0
264 dec ecx
265 mov DWORD [16+edi],eax
266 mov esi,edx
267 jz NEAR L$008maw_end
268 ; Tail Round 5
269 mov eax,DWORD [20+ebx]
270 mul ebp
271 add eax,esi
272 adc edx,0
273 add eax,DWORD [20+edi]
274 adc edx,0
275 dec ecx
276 mov DWORD [20+edi],eax
277 mov esi,edx
278 jz NEAR L$008maw_end
279 ; Tail Round 6
280 mov eax,DWORD [24+ebx]
281 mul ebp
282 add eax,esi
283 adc edx,0
284 add eax,DWORD [24+edi]
285 adc edx,0
286 mov DWORD [24+edi],eax
287 mov esi,edx
288L$008maw_end:
289 mov eax,esi
290 pop ecx
291 pop edi
292 pop esi
293 pop ebx
294 pop ebp
295 ret
296global _bn_mul_words
297align 16
298_bn_mul_words:
299L$_bn_mul_words_begin:
300 lea eax,[_OPENSSL_ia32cap_P]
301 bt DWORD [eax],26
302 jnc NEAR L$009mw_non_sse2
303 mov eax,DWORD [4+esp]
304 mov edx,DWORD [8+esp]
305 mov ecx,DWORD [12+esp]
306 movd mm0,DWORD [16+esp]
307 pxor mm1,mm1
308align 16
309L$010mw_sse2_loop:
310 movd mm2,DWORD [edx]
311 pmuludq mm2,mm0
312 lea edx,[4+edx]
313 paddq mm1,mm2
314 movd DWORD [eax],mm1
315 sub ecx,1
316 psrlq mm1,32
317 lea eax,[4+eax]
318 jnz NEAR L$010mw_sse2_loop
319 movd eax,mm1
320 emms
321 ret
322align 16
323L$009mw_non_sse2:
324 push ebp
325 push ebx
326 push esi
327 push edi
328 ;
329 xor esi,esi
330 mov edi,DWORD [20+esp]
331 mov ebx,DWORD [24+esp]
332 mov ebp,DWORD [28+esp]
333 mov ecx,DWORD [32+esp]
334 and ebp,4294967288
335 jz NEAR L$011mw_finish
336L$012mw_loop:
337 ; Round 0
338 mov eax,DWORD [ebx]
339 mul ecx
340 add eax,esi
341 adc edx,0
342 mov DWORD [edi],eax
343 mov esi,edx
344 ; Round 4
345 mov eax,DWORD [4+ebx]
346 mul ecx
347 add eax,esi
348 adc edx,0
349 mov DWORD [4+edi],eax
350 mov esi,edx
351 ; Round 8
352 mov eax,DWORD [8+ebx]
353 mul ecx
354 add eax,esi
355 adc edx,0
356 mov DWORD [8+edi],eax
357 mov esi,edx
358 ; Round 12
359 mov eax,DWORD [12+ebx]
360 mul ecx
361 add eax,esi
362 adc edx,0
363 mov DWORD [12+edi],eax
364 mov esi,edx
365 ; Round 16
366 mov eax,DWORD [16+ebx]
367 mul ecx
368 add eax,esi
369 adc edx,0
370 mov DWORD [16+edi],eax
371 mov esi,edx
372 ; Round 20
373 mov eax,DWORD [20+ebx]
374 mul ecx
375 add eax,esi
376 adc edx,0
377 mov DWORD [20+edi],eax
378 mov esi,edx
379 ; Round 24
380 mov eax,DWORD [24+ebx]
381 mul ecx
382 add eax,esi
383 adc edx,0
384 mov DWORD [24+edi],eax
385 mov esi,edx
386 ; Round 28
387 mov eax,DWORD [28+ebx]
388 mul ecx
389 add eax,esi
390 adc edx,0
391 mov DWORD [28+edi],eax
392 mov esi,edx
393 ;
394 add ebx,32
395 add edi,32
396 sub ebp,8
397 jz NEAR L$011mw_finish
398 jmp NEAR L$012mw_loop
399L$011mw_finish:
400 mov ebp,DWORD [28+esp]
401 and ebp,7
402 jnz NEAR L$013mw_finish2
403 jmp NEAR L$014mw_end
404L$013mw_finish2:
405 ; Tail Round 0
406 mov eax,DWORD [ebx]
407 mul ecx
408 add eax,esi
409 adc edx,0
410 mov DWORD [edi],eax
411 mov esi,edx
412 dec ebp
413 jz NEAR L$014mw_end
414 ; Tail Round 1
415 mov eax,DWORD [4+ebx]
416 mul ecx
417 add eax,esi
418 adc edx,0
419 mov DWORD [4+edi],eax
420 mov esi,edx
421 dec ebp
422 jz NEAR L$014mw_end
423 ; Tail Round 2
424 mov eax,DWORD [8+ebx]
425 mul ecx
426 add eax,esi
427 adc edx,0
428 mov DWORD [8+edi],eax
429 mov esi,edx
430 dec ebp
431 jz NEAR L$014mw_end
432 ; Tail Round 3
433 mov eax,DWORD [12+ebx]
434 mul ecx
435 add eax,esi
436 adc edx,0
437 mov DWORD [12+edi],eax
438 mov esi,edx
439 dec ebp
440 jz NEAR L$014mw_end
441 ; Tail Round 4
442 mov eax,DWORD [16+ebx]
443 mul ecx
444 add eax,esi
445 adc edx,0
446 mov DWORD [16+edi],eax
447 mov esi,edx
448 dec ebp
449 jz NEAR L$014mw_end
450 ; Tail Round 5
451 mov eax,DWORD [20+ebx]
452 mul ecx
453 add eax,esi
454 adc edx,0
455 mov DWORD [20+edi],eax
456 mov esi,edx
457 dec ebp
458 jz NEAR L$014mw_end
459 ; Tail Round 6
460 mov eax,DWORD [24+ebx]
461 mul ecx
462 add eax,esi
463 adc edx,0
464 mov DWORD [24+edi],eax
465 mov esi,edx
466L$014mw_end:
467 mov eax,esi
468 pop edi
469 pop esi
470 pop ebx
471 pop ebp
472 ret
473global _bn_sqr_words
474align 16
475_bn_sqr_words:
476L$_bn_sqr_words_begin:
477 lea eax,[_OPENSSL_ia32cap_P]
478 bt DWORD [eax],26
479 jnc NEAR L$015sqr_non_sse2
480 mov eax,DWORD [4+esp]
481 mov edx,DWORD [8+esp]
482 mov ecx,DWORD [12+esp]
483align 16
484L$016sqr_sse2_loop:
485 movd mm0,DWORD [edx]
486 pmuludq mm0,mm0
487 lea edx,[4+edx]
488 movq [eax],mm0
489 sub ecx,1
490 lea eax,[8+eax]
491 jnz NEAR L$016sqr_sse2_loop
492 emms
493 ret
494align 16
495L$015sqr_non_sse2:
496 push ebp
497 push ebx
498 push esi
499 push edi
500 ;
501 mov esi,DWORD [20+esp]
502 mov edi,DWORD [24+esp]
503 mov ebx,DWORD [28+esp]
504 and ebx,4294967288
505 jz NEAR L$017sw_finish
506L$018sw_loop:
507 ; Round 0
508 mov eax,DWORD [edi]
509 mul eax
510 mov DWORD [esi],eax
511 mov DWORD [4+esi],edx
512 ; Round 4
513 mov eax,DWORD [4+edi]
514 mul eax
515 mov DWORD [8+esi],eax
516 mov DWORD [12+esi],edx
517 ; Round 8
518 mov eax,DWORD [8+edi]
519 mul eax
520 mov DWORD [16+esi],eax
521 mov DWORD [20+esi],edx
522 ; Round 12
523 mov eax,DWORD [12+edi]
524 mul eax
525 mov DWORD [24+esi],eax
526 mov DWORD [28+esi],edx
527 ; Round 16
528 mov eax,DWORD [16+edi]
529 mul eax
530 mov DWORD [32+esi],eax
531 mov DWORD [36+esi],edx
532 ; Round 20
533 mov eax,DWORD [20+edi]
534 mul eax
535 mov DWORD [40+esi],eax
536 mov DWORD [44+esi],edx
537 ; Round 24
538 mov eax,DWORD [24+edi]
539 mul eax
540 mov DWORD [48+esi],eax
541 mov DWORD [52+esi],edx
542 ; Round 28
543 mov eax,DWORD [28+edi]
544 mul eax
545 mov DWORD [56+esi],eax
546 mov DWORD [60+esi],edx
547 ;
548 add edi,32
549 add esi,64
550 sub ebx,8
551 jnz NEAR L$018sw_loop
552L$017sw_finish:
553 mov ebx,DWORD [28+esp]
554 and ebx,7
555 jz NEAR L$019sw_end
556 ; Tail Round 0
557 mov eax,DWORD [edi]
558 mul eax
559 mov DWORD [esi],eax
560 dec ebx
561 mov DWORD [4+esi],edx
562 jz NEAR L$019sw_end
563 ; Tail Round 1
564 mov eax,DWORD [4+edi]
565 mul eax
566 mov DWORD [8+esi],eax
567 dec ebx
568 mov DWORD [12+esi],edx
569 jz NEAR L$019sw_end
570 ; Tail Round 2
571 mov eax,DWORD [8+edi]
572 mul eax
573 mov DWORD [16+esi],eax
574 dec ebx
575 mov DWORD [20+esi],edx
576 jz NEAR L$019sw_end
577 ; Tail Round 3
578 mov eax,DWORD [12+edi]
579 mul eax
580 mov DWORD [24+esi],eax
581 dec ebx
582 mov DWORD [28+esi],edx
583 jz NEAR L$019sw_end
584 ; Tail Round 4
585 mov eax,DWORD [16+edi]
586 mul eax
587 mov DWORD [32+esi],eax
588 dec ebx
589 mov DWORD [36+esi],edx
590 jz NEAR L$019sw_end
591 ; Tail Round 5
592 mov eax,DWORD [20+edi]
593 mul eax
594 mov DWORD [40+esi],eax
595 dec ebx
596 mov DWORD [44+esi],edx
597 jz NEAR L$019sw_end
598 ; Tail Round 6
599 mov eax,DWORD [24+edi]
600 mul eax
601 mov DWORD [48+esi],eax
602 mov DWORD [52+esi],edx
603L$019sw_end:
604 pop edi
605 pop esi
606 pop ebx
607 pop ebp
608 ret
609global _bn_div_words
610align 16
611_bn_div_words:
612L$_bn_div_words_begin:
613 mov edx,DWORD [4+esp]
614 mov eax,DWORD [8+esp]
615 mov ecx,DWORD [12+esp]
616 div ecx
617 ret
618global _bn_add_words
619align 16
620_bn_add_words:
621L$_bn_add_words_begin:
622 push ebp
623 push ebx
624 push esi
625 push edi
626 ;
627 mov ebx,DWORD [20+esp]
628 mov esi,DWORD [24+esp]
629 mov edi,DWORD [28+esp]
630 mov ebp,DWORD [32+esp]
631 xor eax,eax
632 and ebp,4294967288
633 jz NEAR L$020aw_finish
634L$021aw_loop:
635 ; Round 0
636 mov ecx,DWORD [esi]
637 mov edx,DWORD [edi]
638 add ecx,eax
639 mov eax,0
640 adc eax,eax
641 add ecx,edx
642 adc eax,0
643 mov DWORD [ebx],ecx
644 ; Round 1
645 mov ecx,DWORD [4+esi]
646 mov edx,DWORD [4+edi]
647 add ecx,eax
648 mov eax,0
649 adc eax,eax
650 add ecx,edx
651 adc eax,0
652 mov DWORD [4+ebx],ecx
653 ; Round 2
654 mov ecx,DWORD [8+esi]
655 mov edx,DWORD [8+edi]
656 add ecx,eax
657 mov eax,0
658 adc eax,eax
659 add ecx,edx
660 adc eax,0
661 mov DWORD [8+ebx],ecx
662 ; Round 3
663 mov ecx,DWORD [12+esi]
664 mov edx,DWORD [12+edi]
665 add ecx,eax
666 mov eax,0
667 adc eax,eax
668 add ecx,edx
669 adc eax,0
670 mov DWORD [12+ebx],ecx
671 ; Round 4
672 mov ecx,DWORD [16+esi]
673 mov edx,DWORD [16+edi]
674 add ecx,eax
675 mov eax,0
676 adc eax,eax
677 add ecx,edx
678 adc eax,0
679 mov DWORD [16+ebx],ecx
680 ; Round 5
681 mov ecx,DWORD [20+esi]
682 mov edx,DWORD [20+edi]
683 add ecx,eax
684 mov eax,0
685 adc eax,eax
686 add ecx,edx
687 adc eax,0
688 mov DWORD [20+ebx],ecx
689 ; Round 6
690 mov ecx,DWORD [24+esi]
691 mov edx,DWORD [24+edi]
692 add ecx,eax
693 mov eax,0
694 adc eax,eax
695 add ecx,edx
696 adc eax,0
697 mov DWORD [24+ebx],ecx
698 ; Round 7
699 mov ecx,DWORD [28+esi]
700 mov edx,DWORD [28+edi]
701 add ecx,eax
702 mov eax,0
703 adc eax,eax
704 add ecx,edx
705 adc eax,0
706 mov DWORD [28+ebx],ecx
707 ;
708 add esi,32
709 add edi,32
710 add ebx,32
711 sub ebp,8
712 jnz NEAR L$021aw_loop
713L$020aw_finish:
714 mov ebp,DWORD [32+esp]
715 and ebp,7
716 jz NEAR L$022aw_end
717 ; Tail Round 0
718 mov ecx,DWORD [esi]
719 mov edx,DWORD [edi]
720 add ecx,eax
721 mov eax,0
722 adc eax,eax
723 add ecx,edx
724 adc eax,0
725 dec ebp
726 mov DWORD [ebx],ecx
727 jz NEAR L$022aw_end
728 ; Tail Round 1
729 mov ecx,DWORD [4+esi]
730 mov edx,DWORD [4+edi]
731 add ecx,eax
732 mov eax,0
733 adc eax,eax
734 add ecx,edx
735 adc eax,0
736 dec ebp
737 mov DWORD [4+ebx],ecx
738 jz NEAR L$022aw_end
739 ; Tail Round 2
740 mov ecx,DWORD [8+esi]
741 mov edx,DWORD [8+edi]
742 add ecx,eax
743 mov eax,0
744 adc eax,eax
745 add ecx,edx
746 adc eax,0
747 dec ebp
748 mov DWORD [8+ebx],ecx
749 jz NEAR L$022aw_end
750 ; Tail Round 3
751 mov ecx,DWORD [12+esi]
752 mov edx,DWORD [12+edi]
753 add ecx,eax
754 mov eax,0
755 adc eax,eax
756 add ecx,edx
757 adc eax,0
758 dec ebp
759 mov DWORD [12+ebx],ecx
760 jz NEAR L$022aw_end
761 ; Tail Round 4
762 mov ecx,DWORD [16+esi]
763 mov edx,DWORD [16+edi]
764 add ecx,eax
765 mov eax,0
766 adc eax,eax
767 add ecx,edx
768 adc eax,0
769 dec ebp
770 mov DWORD [16+ebx],ecx
771 jz NEAR L$022aw_end
772 ; Tail Round 5
773 mov ecx,DWORD [20+esi]
774 mov edx,DWORD [20+edi]
775 add ecx,eax
776 mov eax,0
777 adc eax,eax
778 add ecx,edx
779 adc eax,0
780 dec ebp
781 mov DWORD [20+ebx],ecx
782 jz NEAR L$022aw_end
783 ; Tail Round 6
784 mov ecx,DWORD [24+esi]
785 mov edx,DWORD [24+edi]
786 add ecx,eax
787 mov eax,0
788 adc eax,eax
789 add ecx,edx
790 adc eax,0
791 mov DWORD [24+ebx],ecx
792L$022aw_end:
793 pop edi
794 pop esi
795 pop ebx
796 pop ebp
797 ret
798global _bn_sub_words
799align 16
800_bn_sub_words:
801L$_bn_sub_words_begin:
802 push ebp
803 push ebx
804 push esi
805 push edi
806 ;
807 mov ebx,DWORD [20+esp]
808 mov esi,DWORD [24+esp]
809 mov edi,DWORD [28+esp]
810 mov ebp,DWORD [32+esp]
811 xor eax,eax
812 and ebp,4294967288
813 jz NEAR L$023aw_finish
814L$024aw_loop:
815 ; Round 0
816 mov ecx,DWORD [esi]
817 mov edx,DWORD [edi]
818 sub ecx,eax
819 mov eax,0
820 adc eax,eax
821 sub ecx,edx
822 adc eax,0
823 mov DWORD [ebx],ecx
824 ; Round 1
825 mov ecx,DWORD [4+esi]
826 mov edx,DWORD [4+edi]
827 sub ecx,eax
828 mov eax,0
829 adc eax,eax
830 sub ecx,edx
831 adc eax,0
832 mov DWORD [4+ebx],ecx
833 ; Round 2
834 mov ecx,DWORD [8+esi]
835 mov edx,DWORD [8+edi]
836 sub ecx,eax
837 mov eax,0
838 adc eax,eax
839 sub ecx,edx
840 adc eax,0
841 mov DWORD [8+ebx],ecx
842 ; Round 3
843 mov ecx,DWORD [12+esi]
844 mov edx,DWORD [12+edi]
845 sub ecx,eax
846 mov eax,0
847 adc eax,eax
848 sub ecx,edx
849 adc eax,0
850 mov DWORD [12+ebx],ecx
851 ; Round 4
852 mov ecx,DWORD [16+esi]
853 mov edx,DWORD [16+edi]
854 sub ecx,eax
855 mov eax,0
856 adc eax,eax
857 sub ecx,edx
858 adc eax,0
859 mov DWORD [16+ebx],ecx
860 ; Round 5
861 mov ecx,DWORD [20+esi]
862 mov edx,DWORD [20+edi]
863 sub ecx,eax
864 mov eax,0
865 adc eax,eax
866 sub ecx,edx
867 adc eax,0
868 mov DWORD [20+ebx],ecx
869 ; Round 6
870 mov ecx,DWORD [24+esi]
871 mov edx,DWORD [24+edi]
872 sub ecx,eax
873 mov eax,0
874 adc eax,eax
875 sub ecx,edx
876 adc eax,0
877 mov DWORD [24+ebx],ecx
878 ; Round 7
879 mov ecx,DWORD [28+esi]
880 mov edx,DWORD [28+edi]
881 sub ecx,eax
882 mov eax,0
883 adc eax,eax
884 sub ecx,edx
885 adc eax,0
886 mov DWORD [28+ebx],ecx
887 ;
888 add esi,32
889 add edi,32
890 add ebx,32
891 sub ebp,8
892 jnz NEAR L$024aw_loop
893L$023aw_finish:
894 mov ebp,DWORD [32+esp]
895 and ebp,7
896 jz NEAR L$025aw_end
897 ; Tail Round 0
898 mov ecx,DWORD [esi]
899 mov edx,DWORD [edi]
900 sub ecx,eax
901 mov eax,0
902 adc eax,eax
903 sub ecx,edx
904 adc eax,0
905 dec ebp
906 mov DWORD [ebx],ecx
907 jz NEAR L$025aw_end
908 ; Tail Round 1
909 mov ecx,DWORD [4+esi]
910 mov edx,DWORD [4+edi]
911 sub ecx,eax
912 mov eax,0
913 adc eax,eax
914 sub ecx,edx
915 adc eax,0
916 dec ebp
917 mov DWORD [4+ebx],ecx
918 jz NEAR L$025aw_end
919 ; Tail Round 2
920 mov ecx,DWORD [8+esi]
921 mov edx,DWORD [8+edi]
922 sub ecx,eax
923 mov eax,0
924 adc eax,eax
925 sub ecx,edx
926 adc eax,0
927 dec ebp
928 mov DWORD [8+ebx],ecx
929 jz NEAR L$025aw_end
930 ; Tail Round 3
931 mov ecx,DWORD [12+esi]
932 mov edx,DWORD [12+edi]
933 sub ecx,eax
934 mov eax,0
935 adc eax,eax
936 sub ecx,edx
937 adc eax,0
938 dec ebp
939 mov DWORD [12+ebx],ecx
940 jz NEAR L$025aw_end
941 ; Tail Round 4
942 mov ecx,DWORD [16+esi]
943 mov edx,DWORD [16+edi]
944 sub ecx,eax
945 mov eax,0
946 adc eax,eax
947 sub ecx,edx
948 adc eax,0
949 dec ebp
950 mov DWORD [16+ebx],ecx
951 jz NEAR L$025aw_end
952 ; Tail Round 5
953 mov ecx,DWORD [20+esi]
954 mov edx,DWORD [20+edi]
955 sub ecx,eax
956 mov eax,0
957 adc eax,eax
958 sub ecx,edx
959 adc eax,0
960 dec ebp
961 mov DWORD [20+ebx],ecx
962 jz NEAR L$025aw_end
963 ; Tail Round 6
964 mov ecx,DWORD [24+esi]
965 mov edx,DWORD [24+edi]
966 sub ecx,eax
967 mov eax,0
968 adc eax,eax
969 sub ecx,edx
970 adc eax,0
971 mov DWORD [24+ebx],ecx
972L$025aw_end:
973 pop edi
974 pop esi
975 pop ebx
976 pop ebp
977 ret
978global _bn_sub_part_words
979align 16
980_bn_sub_part_words:
981L$_bn_sub_part_words_begin:
982 push ebp
983 push ebx
984 push esi
985 push edi
986 ;
987 mov ebx,DWORD [20+esp]
988 mov esi,DWORD [24+esp]
989 mov edi,DWORD [28+esp]
990 mov ebp,DWORD [32+esp]
991 xor eax,eax
992 and ebp,4294967288
993 jz NEAR L$026aw_finish
994L$027aw_loop:
995 ; Round 0
996 mov ecx,DWORD [esi]
997 mov edx,DWORD [edi]
998 sub ecx,eax
999 mov eax,0
1000 adc eax,eax
1001 sub ecx,edx
1002 adc eax,0
1003 mov DWORD [ebx],ecx
1004 ; Round 1
1005 mov ecx,DWORD [4+esi]
1006 mov edx,DWORD [4+edi]
1007 sub ecx,eax
1008 mov eax,0
1009 adc eax,eax
1010 sub ecx,edx
1011 adc eax,0
1012 mov DWORD [4+ebx],ecx
1013 ; Round 2
1014 mov ecx,DWORD [8+esi]
1015 mov edx,DWORD [8+edi]
1016 sub ecx,eax
1017 mov eax,0
1018 adc eax,eax
1019 sub ecx,edx
1020 adc eax,0
1021 mov DWORD [8+ebx],ecx
1022 ; Round 3
1023 mov ecx,DWORD [12+esi]
1024 mov edx,DWORD [12+edi]
1025 sub ecx,eax
1026 mov eax,0
1027 adc eax,eax
1028 sub ecx,edx
1029 adc eax,0
1030 mov DWORD [12+ebx],ecx
1031 ; Round 4
1032 mov ecx,DWORD [16+esi]
1033 mov edx,DWORD [16+edi]
1034 sub ecx,eax
1035 mov eax,0
1036 adc eax,eax
1037 sub ecx,edx
1038 adc eax,0
1039 mov DWORD [16+ebx],ecx
1040 ; Round 5
1041 mov ecx,DWORD [20+esi]
1042 mov edx,DWORD [20+edi]
1043 sub ecx,eax
1044 mov eax,0
1045 adc eax,eax
1046 sub ecx,edx
1047 adc eax,0
1048 mov DWORD [20+ebx],ecx
1049 ; Round 6
1050 mov ecx,DWORD [24+esi]
1051 mov edx,DWORD [24+edi]
1052 sub ecx,eax
1053 mov eax,0
1054 adc eax,eax
1055 sub ecx,edx
1056 adc eax,0
1057 mov DWORD [24+ebx],ecx
1058 ; Round 7
1059 mov ecx,DWORD [28+esi]
1060 mov edx,DWORD [28+edi]
1061 sub ecx,eax
1062 mov eax,0
1063 adc eax,eax
1064 sub ecx,edx
1065 adc eax,0
1066 mov DWORD [28+ebx],ecx
1067 ;
1068 add esi,32
1069 add edi,32
1070 add ebx,32
1071 sub ebp,8
1072 jnz NEAR L$027aw_loop
1073L$026aw_finish:
1074 mov ebp,DWORD [32+esp]
1075 and ebp,7
1076 jz NEAR L$028aw_end
1077 ; Tail Round 0
1078 mov ecx,DWORD [esi]
1079 mov edx,DWORD [edi]
1080 sub ecx,eax
1081 mov eax,0
1082 adc eax,eax
1083 sub ecx,edx
1084 adc eax,0
1085 mov DWORD [ebx],ecx
1086 add esi,4
1087 add edi,4
1088 add ebx,4
1089 dec ebp
1090 jz NEAR L$028aw_end
1091 ; Tail Round 1
1092 mov ecx,DWORD [esi]
1093 mov edx,DWORD [edi]
1094 sub ecx,eax
1095 mov eax,0
1096 adc eax,eax
1097 sub ecx,edx
1098 adc eax,0
1099 mov DWORD [ebx],ecx
1100 add esi,4
1101 add edi,4
1102 add ebx,4
1103 dec ebp
1104 jz NEAR L$028aw_end
1105 ; Tail Round 2
1106 mov ecx,DWORD [esi]
1107 mov edx,DWORD [edi]
1108 sub ecx,eax
1109 mov eax,0
1110 adc eax,eax
1111 sub ecx,edx
1112 adc eax,0
1113 mov DWORD [ebx],ecx
1114 add esi,4
1115 add edi,4
1116 add ebx,4
1117 dec ebp
1118 jz NEAR L$028aw_end
1119 ; Tail Round 3
1120 mov ecx,DWORD [esi]
1121 mov edx,DWORD [edi]
1122 sub ecx,eax
1123 mov eax,0
1124 adc eax,eax
1125 sub ecx,edx
1126 adc eax,0
1127 mov DWORD [ebx],ecx
1128 add esi,4
1129 add edi,4
1130 add ebx,4
1131 dec ebp
1132 jz NEAR L$028aw_end
1133 ; Tail Round 4
1134 mov ecx,DWORD [esi]
1135 mov edx,DWORD [edi]
1136 sub ecx,eax
1137 mov eax,0
1138 adc eax,eax
1139 sub ecx,edx
1140 adc eax,0
1141 mov DWORD [ebx],ecx
1142 add esi,4
1143 add edi,4
1144 add ebx,4
1145 dec ebp
1146 jz NEAR L$028aw_end
1147 ; Tail Round 5
1148 mov ecx,DWORD [esi]
1149 mov edx,DWORD [edi]
1150 sub ecx,eax
1151 mov eax,0
1152 adc eax,eax
1153 sub ecx,edx
1154 adc eax,0
1155 mov DWORD [ebx],ecx
1156 add esi,4
1157 add edi,4
1158 add ebx,4
1159 dec ebp
1160 jz NEAR L$028aw_end
1161 ; Tail Round 6
1162 mov ecx,DWORD [esi]
1163 mov edx,DWORD [edi]
1164 sub ecx,eax
1165 mov eax,0
1166 adc eax,eax
1167 sub ecx,edx
1168 adc eax,0
1169 mov DWORD [ebx],ecx
1170 add esi,4
1171 add edi,4
1172 add ebx,4
1173L$028aw_end:
1174 cmp DWORD [36+esp],0
1175 je NEAR L$029pw_end
1176 mov ebp,DWORD [36+esp]
1177 cmp ebp,0
1178 je NEAR L$029pw_end
1179 jge NEAR L$030pw_pos
1180 ; pw_neg
1181 mov edx,0
1182 sub edx,ebp
1183 mov ebp,edx
1184 and ebp,4294967288
1185 jz NEAR L$031pw_neg_finish
1186L$032pw_neg_loop:
1187 ; dl<0 Round 0
1188 mov ecx,0
1189 mov edx,DWORD [edi]
1190 sub ecx,eax
1191 mov eax,0
1192 adc eax,eax
1193 sub ecx,edx
1194 adc eax,0
1195 mov DWORD [ebx],ecx
1196 ; dl<0 Round 1
1197 mov ecx,0
1198 mov edx,DWORD [4+edi]
1199 sub ecx,eax
1200 mov eax,0
1201 adc eax,eax
1202 sub ecx,edx
1203 adc eax,0
1204 mov DWORD [4+ebx],ecx
1205 ; dl<0 Round 2
1206 mov ecx,0
1207 mov edx,DWORD [8+edi]
1208 sub ecx,eax
1209 mov eax,0
1210 adc eax,eax
1211 sub ecx,edx
1212 adc eax,0
1213 mov DWORD [8+ebx],ecx
1214 ; dl<0 Round 3
1215 mov ecx,0
1216 mov edx,DWORD [12+edi]
1217 sub ecx,eax
1218 mov eax,0
1219 adc eax,eax
1220 sub ecx,edx
1221 adc eax,0
1222 mov DWORD [12+ebx],ecx
1223 ; dl<0 Round 4
1224 mov ecx,0
1225 mov edx,DWORD [16+edi]
1226 sub ecx,eax
1227 mov eax,0
1228 adc eax,eax
1229 sub ecx,edx
1230 adc eax,0
1231 mov DWORD [16+ebx],ecx
1232 ; dl<0 Round 5
1233 mov ecx,0
1234 mov edx,DWORD [20+edi]
1235 sub ecx,eax
1236 mov eax,0
1237 adc eax,eax
1238 sub ecx,edx
1239 adc eax,0
1240 mov DWORD [20+ebx],ecx
1241 ; dl<0 Round 6
1242 mov ecx,0
1243 mov edx,DWORD [24+edi]
1244 sub ecx,eax
1245 mov eax,0
1246 adc eax,eax
1247 sub ecx,edx
1248 adc eax,0
1249 mov DWORD [24+ebx],ecx
1250 ; dl<0 Round 7
1251 mov ecx,0
1252 mov edx,DWORD [28+edi]
1253 sub ecx,eax
1254 mov eax,0
1255 adc eax,eax
1256 sub ecx,edx
1257 adc eax,0
1258 mov DWORD [28+ebx],ecx
1259 ;
1260 add edi,32
1261 add ebx,32
1262 sub ebp,8
1263 jnz NEAR L$032pw_neg_loop
1264L$031pw_neg_finish:
1265 mov edx,DWORD [36+esp]
1266 mov ebp,0
1267 sub ebp,edx
1268 and ebp,7
1269 jz NEAR L$029pw_end
1270 ; dl<0 Tail Round 0
1271 mov ecx,0
1272 mov edx,DWORD [edi]
1273 sub ecx,eax
1274 mov eax,0
1275 adc eax,eax
1276 sub ecx,edx
1277 adc eax,0
1278 dec ebp
1279 mov DWORD [ebx],ecx
1280 jz NEAR L$029pw_end
1281 ; dl<0 Tail Round 1
1282 mov ecx,0
1283 mov edx,DWORD [4+edi]
1284 sub ecx,eax
1285 mov eax,0
1286 adc eax,eax
1287 sub ecx,edx
1288 adc eax,0
1289 dec ebp
1290 mov DWORD [4+ebx],ecx
1291 jz NEAR L$029pw_end
1292 ; dl<0 Tail Round 2
1293 mov ecx,0
1294 mov edx,DWORD [8+edi]
1295 sub ecx,eax
1296 mov eax,0
1297 adc eax,eax
1298 sub ecx,edx
1299 adc eax,0
1300 dec ebp
1301 mov DWORD [8+ebx],ecx
1302 jz NEAR L$029pw_end
1303 ; dl<0 Tail Round 3
1304 mov ecx,0
1305 mov edx,DWORD [12+edi]
1306 sub ecx,eax
1307 mov eax,0
1308 adc eax,eax
1309 sub ecx,edx
1310 adc eax,0
1311 dec ebp
1312 mov DWORD [12+ebx],ecx
1313 jz NEAR L$029pw_end
1314 ; dl<0 Tail Round 4
1315 mov ecx,0
1316 mov edx,DWORD [16+edi]
1317 sub ecx,eax
1318 mov eax,0
1319 adc eax,eax
1320 sub ecx,edx
1321 adc eax,0
1322 dec ebp
1323 mov DWORD [16+ebx],ecx
1324 jz NEAR L$029pw_end
1325 ; dl<0 Tail Round 5
1326 mov ecx,0
1327 mov edx,DWORD [20+edi]
1328 sub ecx,eax
1329 mov eax,0
1330 adc eax,eax
1331 sub ecx,edx
1332 adc eax,0
1333 dec ebp
1334 mov DWORD [20+ebx],ecx
1335 jz NEAR L$029pw_end
1336 ; dl<0 Tail Round 6
1337 mov ecx,0
1338 mov edx,DWORD [24+edi]
1339 sub ecx,eax
1340 mov eax,0
1341 adc eax,eax
1342 sub ecx,edx
1343 adc eax,0
1344 mov DWORD [24+ebx],ecx
1345 jmp NEAR L$029pw_end
1346L$030pw_pos:
1347 and ebp,4294967288
1348 jz NEAR L$033pw_pos_finish
1349L$034pw_pos_loop:
1350 ; dl>0 Round 0
1351 mov ecx,DWORD [esi]
1352 sub ecx,eax
1353 mov DWORD [ebx],ecx
1354 jnc NEAR L$035pw_nc0
1355 ; dl>0 Round 1
1356 mov ecx,DWORD [4+esi]
1357 sub ecx,eax
1358 mov DWORD [4+ebx],ecx
1359 jnc NEAR L$036pw_nc1
1360 ; dl>0 Round 2
1361 mov ecx,DWORD [8+esi]
1362 sub ecx,eax
1363 mov DWORD [8+ebx],ecx
1364 jnc NEAR L$037pw_nc2
1365 ; dl>0 Round 3
1366 mov ecx,DWORD [12+esi]
1367 sub ecx,eax
1368 mov DWORD [12+ebx],ecx
1369 jnc NEAR L$038pw_nc3
1370 ; dl>0 Round 4
1371 mov ecx,DWORD [16+esi]
1372 sub ecx,eax
1373 mov DWORD [16+ebx],ecx
1374 jnc NEAR L$039pw_nc4
1375 ; dl>0 Round 5
1376 mov ecx,DWORD [20+esi]
1377 sub ecx,eax
1378 mov DWORD [20+ebx],ecx
1379 jnc NEAR L$040pw_nc5
1380 ; dl>0 Round 6
1381 mov ecx,DWORD [24+esi]
1382 sub ecx,eax
1383 mov DWORD [24+ebx],ecx
1384 jnc NEAR L$041pw_nc6
1385 ; dl>0 Round 7
1386 mov ecx,DWORD [28+esi]
1387 sub ecx,eax
1388 mov DWORD [28+ebx],ecx
1389 jnc NEAR L$042pw_nc7
1390 ;
1391 add esi,32
1392 add ebx,32
1393 sub ebp,8
1394 jnz NEAR L$034pw_pos_loop
1395L$033pw_pos_finish:
1396 mov ebp,DWORD [36+esp]
1397 and ebp,7
1398 jz NEAR L$029pw_end
1399 ; dl>0 Tail Round 0
1400 mov ecx,DWORD [esi]
1401 sub ecx,eax
1402 mov DWORD [ebx],ecx
1403 jnc NEAR L$043pw_tail_nc0
1404 dec ebp
1405 jz NEAR L$029pw_end
1406 ; dl>0 Tail Round 1
1407 mov ecx,DWORD [4+esi]
1408 sub ecx,eax
1409 mov DWORD [4+ebx],ecx
1410 jnc NEAR L$044pw_tail_nc1
1411 dec ebp
1412 jz NEAR L$029pw_end
1413 ; dl>0 Tail Round 2
1414 mov ecx,DWORD [8+esi]
1415 sub ecx,eax
1416 mov DWORD [8+ebx],ecx
1417 jnc NEAR L$045pw_tail_nc2
1418 dec ebp
1419 jz NEAR L$029pw_end
1420 ; dl>0 Tail Round 3
1421 mov ecx,DWORD [12+esi]
1422 sub ecx,eax
1423 mov DWORD [12+ebx],ecx
1424 jnc NEAR L$046pw_tail_nc3
1425 dec ebp
1426 jz NEAR L$029pw_end
1427 ; dl>0 Tail Round 4
1428 mov ecx,DWORD [16+esi]
1429 sub ecx,eax
1430 mov DWORD [16+ebx],ecx
1431 jnc NEAR L$047pw_tail_nc4
1432 dec ebp
1433 jz NEAR L$029pw_end
1434 ; dl>0 Tail Round 5
1435 mov ecx,DWORD [20+esi]
1436 sub ecx,eax
1437 mov DWORD [20+ebx],ecx
1438 jnc NEAR L$048pw_tail_nc5
1439 dec ebp
1440 jz NEAR L$029pw_end
1441 ; dl>0 Tail Round 6
1442 mov ecx,DWORD [24+esi]
1443 sub ecx,eax
1444 mov DWORD [24+ebx],ecx
1445 jnc NEAR L$049pw_tail_nc6
1446 mov eax,1
1447 jmp NEAR L$029pw_end
1448L$050pw_nc_loop:
1449 mov ecx,DWORD [esi]
1450 mov DWORD [ebx],ecx
1451L$035pw_nc0:
1452 mov ecx,DWORD [4+esi]
1453 mov DWORD [4+ebx],ecx
1454L$036pw_nc1:
1455 mov ecx,DWORD [8+esi]
1456 mov DWORD [8+ebx],ecx
1457L$037pw_nc2:
1458 mov ecx,DWORD [12+esi]
1459 mov DWORD [12+ebx],ecx
1460L$038pw_nc3:
1461 mov ecx,DWORD [16+esi]
1462 mov DWORD [16+ebx],ecx
1463L$039pw_nc4:
1464 mov ecx,DWORD [20+esi]
1465 mov DWORD [20+ebx],ecx
1466L$040pw_nc5:
1467 mov ecx,DWORD [24+esi]
1468 mov DWORD [24+ebx],ecx
1469L$041pw_nc6:
1470 mov ecx,DWORD [28+esi]
1471 mov DWORD [28+ebx],ecx
1472L$042pw_nc7:
1473 ;
1474 add esi,32
1475 add ebx,32
1476 sub ebp,8
1477 jnz NEAR L$050pw_nc_loop
1478 mov ebp,DWORD [36+esp]
1479 and ebp,7
1480 jz NEAR L$051pw_nc_end
1481 mov ecx,DWORD [esi]
1482 mov DWORD [ebx],ecx
1483L$043pw_tail_nc0:
1484 dec ebp
1485 jz NEAR L$051pw_nc_end
1486 mov ecx,DWORD [4+esi]
1487 mov DWORD [4+ebx],ecx
1488L$044pw_tail_nc1:
1489 dec ebp
1490 jz NEAR L$051pw_nc_end
1491 mov ecx,DWORD [8+esi]
1492 mov DWORD [8+ebx],ecx
1493L$045pw_tail_nc2:
1494 dec ebp
1495 jz NEAR L$051pw_nc_end
1496 mov ecx,DWORD [12+esi]
1497 mov DWORD [12+ebx],ecx
1498L$046pw_tail_nc3:
1499 dec ebp
1500 jz NEAR L$051pw_nc_end
1501 mov ecx,DWORD [16+esi]
1502 mov DWORD [16+ebx],ecx
1503L$047pw_tail_nc4:
1504 dec ebp
1505 jz NEAR L$051pw_nc_end
1506 mov ecx,DWORD [20+esi]
1507 mov DWORD [20+ebx],ecx
1508L$048pw_tail_nc5:
1509 dec ebp
1510 jz NEAR L$051pw_nc_end
1511 mov ecx,DWORD [24+esi]
1512 mov DWORD [24+ebx],ecx
1513L$049pw_tail_nc6:
1514L$051pw_nc_end:
1515 mov eax,0
1516L$029pw_end:
1517 pop edi
1518 pop esi
1519 pop ebx
1520 pop ebp
1521 ret
1522segment .bss
1523common _OPENSSL_ia32cap_P 16