blob: e291a88879ce0faa72fe65c1f6dab6d79f391244 [file] [log] [blame]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001#if defined(__i386__)
2.file "src/crypto/bn/asm/x86-mont.S"
3.text
4.globl bn_mul_mont
5.hidden bn_mul_mont
6.type bn_mul_mont,@function
7.align 16
8bn_mul_mont:
9.L_bn_mul_mont_begin:
10 pushl %ebp
11 pushl %ebx
12 pushl %esi
13 pushl %edi
14 xorl %eax,%eax
15 movl 40(%esp),%edi
16 cmpl $4,%edi
17 jl .L000just_leave
18 leal 20(%esp),%esi
19 leal 24(%esp),%edx
Adam Langleyd9e397b2015-01-22 14:27:53 -080020 addl $2,%edi
21 negl %edi
Robert Sloana94fe052017-02-21 08:49:28 -080022 leal -32(%esp,%edi,4),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -080023 negl %edi
Robert Sloana94fe052017-02-21 08:49:28 -080024 movl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -080025 subl %edx,%eax
26 andl $2047,%eax
Robert Sloana94fe052017-02-21 08:49:28 -080027 subl %eax,%ebp
28 xorl %ebp,%edx
Adam Langleyd9e397b2015-01-22 14:27:53 -080029 andl $2048,%edx
30 xorl $2048,%edx
Robert Sloana94fe052017-02-21 08:49:28 -080031 subl %edx,%ebp
32 andl $-64,%ebp
33 movl %esp,%eax
34 subl %ebp,%eax
35 andl $-4096,%eax
36 movl %esp,%edx
37 leal (%ebp,%eax,1),%esp
38 movl (%esp),%eax
39 cmpl %ebp,%esp
40 ja .L001page_walk
41 jmp .L002page_walk_done
42.align 16
43.L001page_walk:
44 leal -4096(%esp),%esp
45 movl (%esp),%eax
46 cmpl %ebp,%esp
47 ja .L001page_walk
48.L002page_walk_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -080049 movl (%esi),%eax
50 movl 4(%esi),%ebx
51 movl 8(%esi),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -080052 movl 12(%esi),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -080053 movl 16(%esi),%esi
54 movl (%esi),%esi
55 movl %eax,4(%esp)
56 movl %ebx,8(%esp)
57 movl %ecx,12(%esp)
Robert Sloana94fe052017-02-21 08:49:28 -080058 movl %ebp,16(%esp)
Adam Langleyd9e397b2015-01-22 14:27:53 -080059 movl %esi,20(%esp)
60 leal -3(%edi),%ebx
Robert Sloana94fe052017-02-21 08:49:28 -080061 movl %edx,24(%esp)
62 call .L003PIC_me_up
63.L003PIC_me_up:
Adam Langleye9ada862015-05-11 17:20:37 -070064 popl %eax
Robert Sloana94fe052017-02-21 08:49:28 -080065 leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
Adam Langleye9ada862015-05-11 17:20:37 -070066 btl $26,(%eax)
Robert Sloana94fe052017-02-21 08:49:28 -080067 jnc .L004non_sse2
Adam Langleye9ada862015-05-11 17:20:37 -070068 movl $-1,%eax
69 movd %eax,%mm7
70 movl 8(%esp),%esi
71 movl 12(%esp),%edi
72 movl 16(%esp),%ebp
73 xorl %edx,%edx
74 xorl %ecx,%ecx
75 movd (%edi),%mm4
76 movd (%esi),%mm5
77 movd (%ebp),%mm3
78 pmuludq %mm4,%mm5
79 movq %mm5,%mm2
80 movq %mm5,%mm0
81 pand %mm7,%mm0
82 pmuludq 20(%esp),%mm5
83 pmuludq %mm5,%mm3
84 paddq %mm0,%mm3
85 movd 4(%ebp),%mm1
86 movd 4(%esi),%mm0
87 psrlq $32,%mm2
88 psrlq $32,%mm3
89 incl %ecx
90.align 16
Robert Sloana94fe052017-02-21 08:49:28 -080091.L0051st:
Adam Langleye9ada862015-05-11 17:20:37 -070092 pmuludq %mm4,%mm0
93 pmuludq %mm5,%mm1
94 paddq %mm0,%mm2
95 paddq %mm1,%mm3
96 movq %mm2,%mm0
97 pand %mm7,%mm0
98 movd 4(%ebp,%ecx,4),%mm1
99 paddq %mm0,%mm3
100 movd 4(%esi,%ecx,4),%mm0
101 psrlq $32,%mm2
102 movd %mm3,28(%esp,%ecx,4)
103 psrlq $32,%mm3
104 leal 1(%ecx),%ecx
105 cmpl %ebx,%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800106 jl .L0051st
Adam Langleye9ada862015-05-11 17:20:37 -0700107 pmuludq %mm4,%mm0
108 pmuludq %mm5,%mm1
109 paddq %mm0,%mm2
110 paddq %mm1,%mm3
111 movq %mm2,%mm0
112 pand %mm7,%mm0
113 paddq %mm0,%mm3
114 movd %mm3,28(%esp,%ecx,4)
115 psrlq $32,%mm2
116 psrlq $32,%mm3
117 paddq %mm2,%mm3
118 movq %mm3,32(%esp,%ebx,4)
119 incl %edx
Robert Sloana94fe052017-02-21 08:49:28 -0800120.L006outer:
Adam Langleye9ada862015-05-11 17:20:37 -0700121 xorl %ecx,%ecx
122 movd (%edi,%edx,4),%mm4
123 movd (%esi),%mm5
124 movd 32(%esp),%mm6
125 movd (%ebp),%mm3
126 pmuludq %mm4,%mm5
127 paddq %mm6,%mm5
128 movq %mm5,%mm0
129 movq %mm5,%mm2
130 pand %mm7,%mm0
131 pmuludq 20(%esp),%mm5
132 pmuludq %mm5,%mm3
133 paddq %mm0,%mm3
134 movd 36(%esp),%mm6
135 movd 4(%ebp),%mm1
136 movd 4(%esi),%mm0
137 psrlq $32,%mm2
138 psrlq $32,%mm3
139 paddq %mm6,%mm2
140 incl %ecx
141 decl %ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800142.L007inner:
Adam Langleye9ada862015-05-11 17:20:37 -0700143 pmuludq %mm4,%mm0
144 pmuludq %mm5,%mm1
145 paddq %mm0,%mm2
146 paddq %mm1,%mm3
147 movq %mm2,%mm0
148 movd 36(%esp,%ecx,4),%mm6
149 pand %mm7,%mm0
150 movd 4(%ebp,%ecx,4),%mm1
151 paddq %mm0,%mm3
152 movd 4(%esi,%ecx,4),%mm0
153 psrlq $32,%mm2
154 movd %mm3,28(%esp,%ecx,4)
155 psrlq $32,%mm3
156 paddq %mm6,%mm2
157 decl %ebx
158 leal 1(%ecx),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800159 jnz .L007inner
Adam Langleye9ada862015-05-11 17:20:37 -0700160 movl %ecx,%ebx
161 pmuludq %mm4,%mm0
162 pmuludq %mm5,%mm1
163 paddq %mm0,%mm2
164 paddq %mm1,%mm3
165 movq %mm2,%mm0
166 pand %mm7,%mm0
167 paddq %mm0,%mm3
168 movd %mm3,28(%esp,%ecx,4)
169 psrlq $32,%mm2
170 psrlq $32,%mm3
171 movd 36(%esp,%ebx,4),%mm6
172 paddq %mm2,%mm3
173 paddq %mm6,%mm3
174 movq %mm3,32(%esp,%ebx,4)
175 leal 1(%edx),%edx
176 cmpl %ebx,%edx
Robert Sloana94fe052017-02-21 08:49:28 -0800177 jle .L006outer
Adam Langleye9ada862015-05-11 17:20:37 -0700178 emms
Robert Sloana94fe052017-02-21 08:49:28 -0800179 jmp .L008common_tail
Adam Langleye9ada862015-05-11 17:20:37 -0700180.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800181.L004non_sse2:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800182 movl 8(%esp),%esi
183 leal 1(%ebx),%ebp
184 movl 12(%esp),%edi
185 xorl %ecx,%ecx
186 movl %esi,%edx
187 andl $1,%ebp
188 subl %edi,%edx
189 leal 4(%edi,%ebx,4),%eax
190 orl %edx,%ebp
191 movl (%edi),%edi
Robert Sloana94fe052017-02-21 08:49:28 -0800192 jz .L009bn_sqr_mont
Adam Langleyd9e397b2015-01-22 14:27:53 -0800193 movl %eax,28(%esp)
194 movl (%esi),%eax
195 xorl %edx,%edx
196.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800197.L010mull:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800198 movl %edx,%ebp
199 mull %edi
200 addl %eax,%ebp
201 leal 1(%ecx),%ecx
202 adcl $0,%edx
203 movl (%esi,%ecx,4),%eax
204 cmpl %ebx,%ecx
205 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800206 jl .L010mull
Adam Langleyd9e397b2015-01-22 14:27:53 -0800207 movl %edx,%ebp
208 mull %edi
209 movl 20(%esp),%edi
210 addl %ebp,%eax
211 movl 16(%esp),%esi
212 adcl $0,%edx
213 imull 32(%esp),%edi
214 movl %eax,32(%esp,%ebx,4)
215 xorl %ecx,%ecx
216 movl %edx,36(%esp,%ebx,4)
217 movl %ecx,40(%esp,%ebx,4)
218 movl (%esi),%eax
219 mull %edi
220 addl 32(%esp),%eax
221 movl 4(%esi),%eax
222 adcl $0,%edx
223 incl %ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800224 jmp .L0112ndmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800225.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800226.L0121stmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800227 movl %edx,%ebp
228 mull %edi
229 addl 32(%esp,%ecx,4),%ebp
230 leal 1(%ecx),%ecx
231 adcl $0,%edx
232 addl %eax,%ebp
233 movl (%esi,%ecx,4),%eax
234 adcl $0,%edx
235 cmpl %ebx,%ecx
236 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800237 jl .L0121stmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800238 movl %edx,%ebp
239 mull %edi
240 addl 32(%esp,%ebx,4),%eax
241 movl 20(%esp),%edi
242 adcl $0,%edx
243 movl 16(%esp),%esi
244 addl %eax,%ebp
245 adcl $0,%edx
246 imull 32(%esp),%edi
247 xorl %ecx,%ecx
248 addl 36(%esp,%ebx,4),%edx
249 movl %ebp,32(%esp,%ebx,4)
250 adcl $0,%ecx
251 movl (%esi),%eax
252 movl %edx,36(%esp,%ebx,4)
253 movl %ecx,40(%esp,%ebx,4)
254 mull %edi
255 addl 32(%esp),%eax
256 movl 4(%esi),%eax
257 adcl $0,%edx
258 movl $1,%ecx
259.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800260.L0112ndmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800261 movl %edx,%ebp
262 mull %edi
263 addl 32(%esp,%ecx,4),%ebp
264 leal 1(%ecx),%ecx
265 adcl $0,%edx
266 addl %eax,%ebp
267 movl (%esi,%ecx,4),%eax
268 adcl $0,%edx
269 cmpl %ebx,%ecx
270 movl %ebp,24(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800271 jl .L0112ndmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800272 movl %edx,%ebp
273 mull %edi
274 addl 32(%esp,%ebx,4),%ebp
275 adcl $0,%edx
276 addl %eax,%ebp
277 adcl $0,%edx
278 movl %ebp,28(%esp,%ebx,4)
279 xorl %eax,%eax
280 movl 12(%esp),%ecx
281 addl 36(%esp,%ebx,4),%edx
282 adcl 40(%esp,%ebx,4),%eax
283 leal 4(%ecx),%ecx
284 movl %edx,32(%esp,%ebx,4)
285 cmpl 28(%esp),%ecx
286 movl %eax,36(%esp,%ebx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800287 je .L008common_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800288 movl (%ecx),%edi
289 movl 8(%esp),%esi
290 movl %ecx,12(%esp)
291 xorl %ecx,%ecx
292 xorl %edx,%edx
293 movl (%esi),%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800294 jmp .L0121stmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800295.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800296.L009bn_sqr_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800297 movl %ebx,(%esp)
298 movl %ecx,12(%esp)
299 movl %edi,%eax
300 mull %edi
301 movl %eax,32(%esp)
302 movl %edx,%ebx
303 shrl $1,%edx
304 andl $1,%ebx
305 incl %ecx
306.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800307.L013sqr:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800308 movl (%esi,%ecx,4),%eax
309 movl %edx,%ebp
310 mull %edi
311 addl %ebp,%eax
312 leal 1(%ecx),%ecx
313 adcl $0,%edx
314 leal (%ebx,%eax,2),%ebp
315 shrl $31,%eax
316 cmpl (%esp),%ecx
317 movl %eax,%ebx
318 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800319 jl .L013sqr
Adam Langleyd9e397b2015-01-22 14:27:53 -0800320 movl (%esi,%ecx,4),%eax
321 movl %edx,%ebp
322 mull %edi
323 addl %ebp,%eax
324 movl 20(%esp),%edi
325 adcl $0,%edx
326 movl 16(%esp),%esi
327 leal (%ebx,%eax,2),%ebp
328 imull 32(%esp),%edi
329 shrl $31,%eax
330 movl %ebp,32(%esp,%ecx,4)
331 leal (%eax,%edx,2),%ebp
332 movl (%esi),%eax
333 shrl $31,%edx
334 movl %ebp,36(%esp,%ecx,4)
335 movl %edx,40(%esp,%ecx,4)
336 mull %edi
337 addl 32(%esp),%eax
338 movl %ecx,%ebx
339 adcl $0,%edx
340 movl 4(%esi),%eax
341 movl $1,%ecx
342.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800343.L0143rdmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800344 movl %edx,%ebp
345 mull %edi
346 addl 32(%esp,%ecx,4),%ebp
347 adcl $0,%edx
348 addl %eax,%ebp
349 movl 4(%esi,%ecx,4),%eax
350 adcl $0,%edx
351 movl %ebp,28(%esp,%ecx,4)
352 movl %edx,%ebp
353 mull %edi
354 addl 36(%esp,%ecx,4),%ebp
355 leal 2(%ecx),%ecx
356 adcl $0,%edx
357 addl %eax,%ebp
358 movl (%esi,%ecx,4),%eax
359 adcl $0,%edx
360 cmpl %ebx,%ecx
361 movl %ebp,24(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800362 jl .L0143rdmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800363 movl %edx,%ebp
364 mull %edi
365 addl 32(%esp,%ebx,4),%ebp
366 adcl $0,%edx
367 addl %eax,%ebp
368 adcl $0,%edx
369 movl %ebp,28(%esp,%ebx,4)
370 movl 12(%esp),%ecx
371 xorl %eax,%eax
372 movl 8(%esp),%esi
373 addl 36(%esp,%ebx,4),%edx
374 adcl 40(%esp,%ebx,4),%eax
375 movl %edx,32(%esp,%ebx,4)
376 cmpl %ebx,%ecx
377 movl %eax,36(%esp,%ebx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800378 je .L008common_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800379 movl 4(%esi,%ecx,4),%edi
380 leal 1(%ecx),%ecx
381 movl %edi,%eax
382 movl %ecx,12(%esp)
383 mull %edi
384 addl 32(%esp,%ecx,4),%eax
385 adcl $0,%edx
386 movl %eax,32(%esp,%ecx,4)
387 xorl %ebp,%ebp
388 cmpl %ebx,%ecx
389 leal 1(%ecx),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800390 je .L015sqrlast
Adam Langleyd9e397b2015-01-22 14:27:53 -0800391 movl %edx,%ebx
392 shrl $1,%edx
393 andl $1,%ebx
394.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800395.L016sqradd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800396 movl (%esi,%ecx,4),%eax
397 movl %edx,%ebp
398 mull %edi
399 addl %ebp,%eax
400 leal (%eax,%eax,1),%ebp
401 adcl $0,%edx
402 shrl $31,%eax
403 addl 32(%esp,%ecx,4),%ebp
404 leal 1(%ecx),%ecx
405 adcl $0,%eax
406 addl %ebx,%ebp
407 adcl $0,%eax
408 cmpl (%esp),%ecx
409 movl %ebp,28(%esp,%ecx,4)
410 movl %eax,%ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800411 jle .L016sqradd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800412 movl %edx,%ebp
413 addl %edx,%edx
414 shrl $31,%ebp
415 addl %ebx,%edx
416 adcl $0,%ebp
Robert Sloana94fe052017-02-21 08:49:28 -0800417.L015sqrlast:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800418 movl 20(%esp),%edi
419 movl 16(%esp),%esi
420 imull 32(%esp),%edi
421 addl 32(%esp,%ecx,4),%edx
422 movl (%esi),%eax
423 adcl $0,%ebp
424 movl %edx,32(%esp,%ecx,4)
425 movl %ebp,36(%esp,%ecx,4)
426 mull %edi
427 addl 32(%esp),%eax
428 leal -1(%ecx),%ebx
429 adcl $0,%edx
430 movl $1,%ecx
431 movl 4(%esi),%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800432 jmp .L0143rdmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800433.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800434.L008common_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800435 movl 16(%esp),%ebp
436 movl 4(%esp),%edi
437 leal 32(%esp),%esi
438 movl (%esi),%eax
439 movl %ebx,%ecx
440 xorl %edx,%edx
441.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800442.L017sub:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800443 sbbl (%ebp,%edx,4),%eax
444 movl %eax,(%edi,%edx,4)
445 decl %ecx
446 movl 4(%esi,%edx,4),%eax
447 leal 1(%edx),%edx
Robert Sloana94fe052017-02-21 08:49:28 -0800448 jge .L017sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800449 sbbl $0,%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800450 andl %eax,%esi
451 notl %eax
452 movl %edi,%ebp
453 andl %eax,%ebp
454 orl %ebp,%esi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800455.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800456.L018copy:
457 movl (%esi,%ebx,4),%eax
458 movl %eax,(%edi,%ebx,4)
459 movl %ecx,32(%esp,%ebx,4)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800460 decl %ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800461 jge .L018copy
Adam Langleyd9e397b2015-01-22 14:27:53 -0800462 movl 24(%esp),%esp
463 movl $1,%eax
464.L000just_leave:
465 popl %edi
466 popl %esi
467 popl %ebx
468 popl %ebp
469 ret
470.size bn_mul_mont,.-.L_bn_mul_mont_begin
471.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
472.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
473.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
474.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
475.byte 111,114,103,62,0
476#endif