blob: 3fb668826b5088d863a68ed9729991dbc5dc5aa2 [file] [log] [blame]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001#if defined(__i386__)
Adam Langleyd9e397b2015-01-22 14:27:53 -08002.text
3.globl bn_mul_mont
4.hidden bn_mul_mont
5.type bn_mul_mont,@function
6.align 16
7bn_mul_mont:
8.L_bn_mul_mont_begin:
9 pushl %ebp
10 pushl %ebx
11 pushl %esi
12 pushl %edi
13 xorl %eax,%eax
14 movl 40(%esp),%edi
15 cmpl $4,%edi
16 jl .L000just_leave
17 leal 20(%esp),%esi
18 leal 24(%esp),%edx
Adam Langleyd9e397b2015-01-22 14:27:53 -080019 addl $2,%edi
20 negl %edi
Robert Sloana94fe052017-02-21 08:49:28 -080021 leal -32(%esp,%edi,4),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -080022 negl %edi
Robert Sloana94fe052017-02-21 08:49:28 -080023 movl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -080024 subl %edx,%eax
25 andl $2047,%eax
Robert Sloana94fe052017-02-21 08:49:28 -080026 subl %eax,%ebp
27 xorl %ebp,%edx
Adam Langleyd9e397b2015-01-22 14:27:53 -080028 andl $2048,%edx
29 xorl $2048,%edx
Robert Sloana94fe052017-02-21 08:49:28 -080030 subl %edx,%ebp
31 andl $-64,%ebp
32 movl %esp,%eax
33 subl %ebp,%eax
34 andl $-4096,%eax
35 movl %esp,%edx
36 leal (%ebp,%eax,1),%esp
37 movl (%esp),%eax
38 cmpl %ebp,%esp
39 ja .L001page_walk
40 jmp .L002page_walk_done
41.align 16
42.L001page_walk:
43 leal -4096(%esp),%esp
44 movl (%esp),%eax
45 cmpl %ebp,%esp
46 ja .L001page_walk
47.L002page_walk_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -080048 movl (%esi),%eax
49 movl 4(%esi),%ebx
50 movl 8(%esi),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -080051 movl 12(%esi),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -080052 movl 16(%esi),%esi
53 movl (%esi),%esi
54 movl %eax,4(%esp)
55 movl %ebx,8(%esp)
56 movl %ecx,12(%esp)
Robert Sloana94fe052017-02-21 08:49:28 -080057 movl %ebp,16(%esp)
Adam Langleyd9e397b2015-01-22 14:27:53 -080058 movl %esi,20(%esp)
59 leal -3(%edi),%ebx
Robert Sloana94fe052017-02-21 08:49:28 -080060 movl %edx,24(%esp)
61 call .L003PIC_me_up
62.L003PIC_me_up:
Adam Langleye9ada862015-05-11 17:20:37 -070063 popl %eax
Robert Sloana94fe052017-02-21 08:49:28 -080064 leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
Adam Langleye9ada862015-05-11 17:20:37 -070065 btl $26,(%eax)
Robert Sloana94fe052017-02-21 08:49:28 -080066 jnc .L004non_sse2
Adam Langleye9ada862015-05-11 17:20:37 -070067 movl $-1,%eax
68 movd %eax,%mm7
69 movl 8(%esp),%esi
70 movl 12(%esp),%edi
71 movl 16(%esp),%ebp
72 xorl %edx,%edx
73 xorl %ecx,%ecx
74 movd (%edi),%mm4
75 movd (%esi),%mm5
76 movd (%ebp),%mm3
77 pmuludq %mm4,%mm5
78 movq %mm5,%mm2
79 movq %mm5,%mm0
80 pand %mm7,%mm0
81 pmuludq 20(%esp),%mm5
82 pmuludq %mm5,%mm3
83 paddq %mm0,%mm3
84 movd 4(%ebp),%mm1
85 movd 4(%esi),%mm0
86 psrlq $32,%mm2
87 psrlq $32,%mm3
88 incl %ecx
89.align 16
Robert Sloana94fe052017-02-21 08:49:28 -080090.L0051st:
Adam Langleye9ada862015-05-11 17:20:37 -070091 pmuludq %mm4,%mm0
92 pmuludq %mm5,%mm1
93 paddq %mm0,%mm2
94 paddq %mm1,%mm3
95 movq %mm2,%mm0
96 pand %mm7,%mm0
97 movd 4(%ebp,%ecx,4),%mm1
98 paddq %mm0,%mm3
99 movd 4(%esi,%ecx,4),%mm0
100 psrlq $32,%mm2
101 movd %mm3,28(%esp,%ecx,4)
102 psrlq $32,%mm3
103 leal 1(%ecx),%ecx
104 cmpl %ebx,%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800105 jl .L0051st
Adam Langleye9ada862015-05-11 17:20:37 -0700106 pmuludq %mm4,%mm0
107 pmuludq %mm5,%mm1
108 paddq %mm0,%mm2
109 paddq %mm1,%mm3
110 movq %mm2,%mm0
111 pand %mm7,%mm0
112 paddq %mm0,%mm3
113 movd %mm3,28(%esp,%ecx,4)
114 psrlq $32,%mm2
115 psrlq $32,%mm3
116 paddq %mm2,%mm3
117 movq %mm3,32(%esp,%ebx,4)
118 incl %edx
Robert Sloana94fe052017-02-21 08:49:28 -0800119.L006outer:
Adam Langleye9ada862015-05-11 17:20:37 -0700120 xorl %ecx,%ecx
121 movd (%edi,%edx,4),%mm4
122 movd (%esi),%mm5
123 movd 32(%esp),%mm6
124 movd (%ebp),%mm3
125 pmuludq %mm4,%mm5
126 paddq %mm6,%mm5
127 movq %mm5,%mm0
128 movq %mm5,%mm2
129 pand %mm7,%mm0
130 pmuludq 20(%esp),%mm5
131 pmuludq %mm5,%mm3
132 paddq %mm0,%mm3
133 movd 36(%esp),%mm6
134 movd 4(%ebp),%mm1
135 movd 4(%esi),%mm0
136 psrlq $32,%mm2
137 psrlq $32,%mm3
138 paddq %mm6,%mm2
139 incl %ecx
140 decl %ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800141.L007inner:
Adam Langleye9ada862015-05-11 17:20:37 -0700142 pmuludq %mm4,%mm0
143 pmuludq %mm5,%mm1
144 paddq %mm0,%mm2
145 paddq %mm1,%mm3
146 movq %mm2,%mm0
147 movd 36(%esp,%ecx,4),%mm6
148 pand %mm7,%mm0
149 movd 4(%ebp,%ecx,4),%mm1
150 paddq %mm0,%mm3
151 movd 4(%esi,%ecx,4),%mm0
152 psrlq $32,%mm2
153 movd %mm3,28(%esp,%ecx,4)
154 psrlq $32,%mm3
155 paddq %mm6,%mm2
156 decl %ebx
157 leal 1(%ecx),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800158 jnz .L007inner
Adam Langleye9ada862015-05-11 17:20:37 -0700159 movl %ecx,%ebx
160 pmuludq %mm4,%mm0
161 pmuludq %mm5,%mm1
162 paddq %mm0,%mm2
163 paddq %mm1,%mm3
164 movq %mm2,%mm0
165 pand %mm7,%mm0
166 paddq %mm0,%mm3
167 movd %mm3,28(%esp,%ecx,4)
168 psrlq $32,%mm2
169 psrlq $32,%mm3
170 movd 36(%esp,%ebx,4),%mm6
171 paddq %mm2,%mm3
172 paddq %mm6,%mm3
173 movq %mm3,32(%esp,%ebx,4)
174 leal 1(%edx),%edx
175 cmpl %ebx,%edx
Robert Sloana94fe052017-02-21 08:49:28 -0800176 jle .L006outer
Adam Langleye9ada862015-05-11 17:20:37 -0700177 emms
Robert Sloana94fe052017-02-21 08:49:28 -0800178 jmp .L008common_tail
Adam Langleye9ada862015-05-11 17:20:37 -0700179.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800180.L004non_sse2:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800181 movl 8(%esp),%esi
182 leal 1(%ebx),%ebp
183 movl 12(%esp),%edi
184 xorl %ecx,%ecx
185 movl %esi,%edx
186 andl $1,%ebp
187 subl %edi,%edx
188 leal 4(%edi,%ebx,4),%eax
189 orl %edx,%ebp
190 movl (%edi),%edi
Robert Sloana94fe052017-02-21 08:49:28 -0800191 jz .L009bn_sqr_mont
Adam Langleyd9e397b2015-01-22 14:27:53 -0800192 movl %eax,28(%esp)
193 movl (%esi),%eax
194 xorl %edx,%edx
195.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800196.L010mull:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800197 movl %edx,%ebp
198 mull %edi
199 addl %eax,%ebp
200 leal 1(%ecx),%ecx
201 adcl $0,%edx
202 movl (%esi,%ecx,4),%eax
203 cmpl %ebx,%ecx
204 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800205 jl .L010mull
Adam Langleyd9e397b2015-01-22 14:27:53 -0800206 movl %edx,%ebp
207 mull %edi
208 movl 20(%esp),%edi
209 addl %ebp,%eax
210 movl 16(%esp),%esi
211 adcl $0,%edx
212 imull 32(%esp),%edi
213 movl %eax,32(%esp,%ebx,4)
214 xorl %ecx,%ecx
215 movl %edx,36(%esp,%ebx,4)
216 movl %ecx,40(%esp,%ebx,4)
217 movl (%esi),%eax
218 mull %edi
219 addl 32(%esp),%eax
220 movl 4(%esi),%eax
221 adcl $0,%edx
222 incl %ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800223 jmp .L0112ndmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800224.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800225.L0121stmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800226 movl %edx,%ebp
227 mull %edi
228 addl 32(%esp,%ecx,4),%ebp
229 leal 1(%ecx),%ecx
230 adcl $0,%edx
231 addl %eax,%ebp
232 movl (%esi,%ecx,4),%eax
233 adcl $0,%edx
234 cmpl %ebx,%ecx
235 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800236 jl .L0121stmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800237 movl %edx,%ebp
238 mull %edi
239 addl 32(%esp,%ebx,4),%eax
240 movl 20(%esp),%edi
241 adcl $0,%edx
242 movl 16(%esp),%esi
243 addl %eax,%ebp
244 adcl $0,%edx
245 imull 32(%esp),%edi
246 xorl %ecx,%ecx
247 addl 36(%esp,%ebx,4),%edx
248 movl %ebp,32(%esp,%ebx,4)
249 adcl $0,%ecx
250 movl (%esi),%eax
251 movl %edx,36(%esp,%ebx,4)
252 movl %ecx,40(%esp,%ebx,4)
253 mull %edi
254 addl 32(%esp),%eax
255 movl 4(%esi),%eax
256 adcl $0,%edx
257 movl $1,%ecx
258.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800259.L0112ndmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800260 movl %edx,%ebp
261 mull %edi
262 addl 32(%esp,%ecx,4),%ebp
263 leal 1(%ecx),%ecx
264 adcl $0,%edx
265 addl %eax,%ebp
266 movl (%esi,%ecx,4),%eax
267 adcl $0,%edx
268 cmpl %ebx,%ecx
269 movl %ebp,24(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800270 jl .L0112ndmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800271 movl %edx,%ebp
272 mull %edi
273 addl 32(%esp,%ebx,4),%ebp
274 adcl $0,%edx
275 addl %eax,%ebp
276 adcl $0,%edx
277 movl %ebp,28(%esp,%ebx,4)
278 xorl %eax,%eax
279 movl 12(%esp),%ecx
280 addl 36(%esp,%ebx,4),%edx
281 adcl 40(%esp,%ebx,4),%eax
282 leal 4(%ecx),%ecx
283 movl %edx,32(%esp,%ebx,4)
284 cmpl 28(%esp),%ecx
285 movl %eax,36(%esp,%ebx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800286 je .L008common_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800287 movl (%ecx),%edi
288 movl 8(%esp),%esi
289 movl %ecx,12(%esp)
290 xorl %ecx,%ecx
291 xorl %edx,%edx
292 movl (%esi),%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800293 jmp .L0121stmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800294.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800295.L009bn_sqr_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800296 movl %ebx,(%esp)
297 movl %ecx,12(%esp)
298 movl %edi,%eax
299 mull %edi
300 movl %eax,32(%esp)
301 movl %edx,%ebx
302 shrl $1,%edx
303 andl $1,%ebx
304 incl %ecx
305.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800306.L013sqr:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800307 movl (%esi,%ecx,4),%eax
308 movl %edx,%ebp
309 mull %edi
310 addl %ebp,%eax
311 leal 1(%ecx),%ecx
312 adcl $0,%edx
313 leal (%ebx,%eax,2),%ebp
314 shrl $31,%eax
315 cmpl (%esp),%ecx
316 movl %eax,%ebx
317 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800318 jl .L013sqr
Adam Langleyd9e397b2015-01-22 14:27:53 -0800319 movl (%esi,%ecx,4),%eax
320 movl %edx,%ebp
321 mull %edi
322 addl %ebp,%eax
323 movl 20(%esp),%edi
324 adcl $0,%edx
325 movl 16(%esp),%esi
326 leal (%ebx,%eax,2),%ebp
327 imull 32(%esp),%edi
328 shrl $31,%eax
329 movl %ebp,32(%esp,%ecx,4)
330 leal (%eax,%edx,2),%ebp
331 movl (%esi),%eax
332 shrl $31,%edx
333 movl %ebp,36(%esp,%ecx,4)
334 movl %edx,40(%esp,%ecx,4)
335 mull %edi
336 addl 32(%esp),%eax
337 movl %ecx,%ebx
338 adcl $0,%edx
339 movl 4(%esi),%eax
340 movl $1,%ecx
341.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800342.L0143rdmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800343 movl %edx,%ebp
344 mull %edi
345 addl 32(%esp,%ecx,4),%ebp
346 adcl $0,%edx
347 addl %eax,%ebp
348 movl 4(%esi,%ecx,4),%eax
349 adcl $0,%edx
350 movl %ebp,28(%esp,%ecx,4)
351 movl %edx,%ebp
352 mull %edi
353 addl 36(%esp,%ecx,4),%ebp
354 leal 2(%ecx),%ecx
355 adcl $0,%edx
356 addl %eax,%ebp
357 movl (%esi,%ecx,4),%eax
358 adcl $0,%edx
359 cmpl %ebx,%ecx
360 movl %ebp,24(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800361 jl .L0143rdmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800362 movl %edx,%ebp
363 mull %edi
364 addl 32(%esp,%ebx,4),%ebp
365 adcl $0,%edx
366 addl %eax,%ebp
367 adcl $0,%edx
368 movl %ebp,28(%esp,%ebx,4)
369 movl 12(%esp),%ecx
370 xorl %eax,%eax
371 movl 8(%esp),%esi
372 addl 36(%esp,%ebx,4),%edx
373 adcl 40(%esp,%ebx,4),%eax
374 movl %edx,32(%esp,%ebx,4)
375 cmpl %ebx,%ecx
376 movl %eax,36(%esp,%ebx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800377 je .L008common_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800378 movl 4(%esi,%ecx,4),%edi
379 leal 1(%ecx),%ecx
380 movl %edi,%eax
381 movl %ecx,12(%esp)
382 mull %edi
383 addl 32(%esp,%ecx,4),%eax
384 adcl $0,%edx
385 movl %eax,32(%esp,%ecx,4)
386 xorl %ebp,%ebp
387 cmpl %ebx,%ecx
388 leal 1(%ecx),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800389 je .L015sqrlast
Adam Langleyd9e397b2015-01-22 14:27:53 -0800390 movl %edx,%ebx
391 shrl $1,%edx
392 andl $1,%ebx
393.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800394.L016sqradd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800395 movl (%esi,%ecx,4),%eax
396 movl %edx,%ebp
397 mull %edi
398 addl %ebp,%eax
399 leal (%eax,%eax,1),%ebp
400 adcl $0,%edx
401 shrl $31,%eax
402 addl 32(%esp,%ecx,4),%ebp
403 leal 1(%ecx),%ecx
404 adcl $0,%eax
405 addl %ebx,%ebp
406 adcl $0,%eax
407 cmpl (%esp),%ecx
408 movl %ebp,28(%esp,%ecx,4)
409 movl %eax,%ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800410 jle .L016sqradd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800411 movl %edx,%ebp
412 addl %edx,%edx
413 shrl $31,%ebp
414 addl %ebx,%edx
415 adcl $0,%ebp
Robert Sloana94fe052017-02-21 08:49:28 -0800416.L015sqrlast:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800417 movl 20(%esp),%edi
418 movl 16(%esp),%esi
419 imull 32(%esp),%edi
420 addl 32(%esp,%ecx,4),%edx
421 movl (%esi),%eax
422 adcl $0,%ebp
423 movl %edx,32(%esp,%ecx,4)
424 movl %ebp,36(%esp,%ecx,4)
425 mull %edi
426 addl 32(%esp),%eax
427 leal -1(%ecx),%ebx
428 adcl $0,%edx
429 movl $1,%ecx
430 movl 4(%esi),%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800431 jmp .L0143rdmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800432.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800433.L008common_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800434 movl 16(%esp),%ebp
435 movl 4(%esp),%edi
436 leal 32(%esp),%esi
437 movl (%esi),%eax
438 movl %ebx,%ecx
439 xorl %edx,%edx
440.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800441.L017sub:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800442 sbbl (%ebp,%edx,4),%eax
443 movl %eax,(%edi,%edx,4)
444 decl %ecx
445 movl 4(%esi,%edx,4),%eax
446 leal 1(%edx),%edx
Robert Sloana94fe052017-02-21 08:49:28 -0800447 jge .L017sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800448 sbbl $0,%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800449 andl %eax,%esi
450 notl %eax
451 movl %edi,%ebp
452 andl %eax,%ebp
453 orl %ebp,%esi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800454.align 16
Robert Sloana94fe052017-02-21 08:49:28 -0800455.L018copy:
456 movl (%esi,%ebx,4),%eax
457 movl %eax,(%edi,%ebx,4)
458 movl %ecx,32(%esp,%ebx,4)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800459 decl %ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800460 jge .L018copy
Adam Langleyd9e397b2015-01-22 14:27:53 -0800461 movl 24(%esp),%esp
462 movl $1,%eax
463.L000just_leave:
464 popl %edi
465 popl %esi
466 popl %ebx
467 popl %ebp
468 ret
469.size bn_mul_mont,.-.L_bn_mul_mont_begin
470.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
471.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
472.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
473.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
474.byte 111,114,103,62,0
475#endif