blob: e7353ae2522904396bbffebe7330c6b59395f1f6 [file] [log] [blame]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001#if defined(__i386__)
Adam Langleyd9e397b2015-01-22 14:27:53 -08002.text
3.globl _bn_mul_mont
4.private_extern _bn_mul_mont
5.align 4
6_bn_mul_mont:
7L_bn_mul_mont_begin:
8 pushl %ebp
9 pushl %ebx
10 pushl %esi
11 pushl %edi
12 xorl %eax,%eax
13 movl 40(%esp),%edi
14 cmpl $4,%edi
15 jl L000just_leave
16 leal 20(%esp),%esi
17 leal 24(%esp),%edx
Adam Langleyd9e397b2015-01-22 14:27:53 -080018 addl $2,%edi
19 negl %edi
Robert Sloana94fe052017-02-21 08:49:28 -080020 leal -32(%esp,%edi,4),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -080021 negl %edi
Robert Sloana94fe052017-02-21 08:49:28 -080022 movl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -080023 subl %edx,%eax
24 andl $2047,%eax
Robert Sloana94fe052017-02-21 08:49:28 -080025 subl %eax,%ebp
26 xorl %ebp,%edx
Adam Langleyd9e397b2015-01-22 14:27:53 -080027 andl $2048,%edx
28 xorl $2048,%edx
Robert Sloana94fe052017-02-21 08:49:28 -080029 subl %edx,%ebp
30 andl $-64,%ebp
31 movl %esp,%eax
32 subl %ebp,%eax
33 andl $-4096,%eax
34 movl %esp,%edx
35 leal (%ebp,%eax,1),%esp
36 movl (%esp),%eax
37 cmpl %ebp,%esp
38 ja L001page_walk
39 jmp L002page_walk_done
40.align 4,0x90
41L001page_walk:
42 leal -4096(%esp),%esp
43 movl (%esp),%eax
44 cmpl %ebp,%esp
45 ja L001page_walk
46L002page_walk_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -080047 movl (%esi),%eax
48 movl 4(%esi),%ebx
49 movl 8(%esi),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -080050 movl 12(%esi),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -080051 movl 16(%esi),%esi
52 movl (%esi),%esi
53 movl %eax,4(%esp)
54 movl %ebx,8(%esp)
55 movl %ecx,12(%esp)
Robert Sloana94fe052017-02-21 08:49:28 -080056 movl %ebp,16(%esp)
Adam Langleyd9e397b2015-01-22 14:27:53 -080057 movl %esi,20(%esp)
58 leal -3(%edi),%ebx
Robert Sloana94fe052017-02-21 08:49:28 -080059 movl %edx,24(%esp)
60 call L003PIC_me_up
61L003PIC_me_up:
Adam Langleye9ada862015-05-11 17:20:37 -070062 popl %eax
Robert Sloana94fe052017-02-21 08:49:28 -080063 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax
Adam Langleye9ada862015-05-11 17:20:37 -070064 btl $26,(%eax)
Robert Sloana94fe052017-02-21 08:49:28 -080065 jnc L004non_sse2
Adam Langleye9ada862015-05-11 17:20:37 -070066 movl $-1,%eax
67 movd %eax,%mm7
68 movl 8(%esp),%esi
69 movl 12(%esp),%edi
70 movl 16(%esp),%ebp
71 xorl %edx,%edx
72 xorl %ecx,%ecx
73 movd (%edi),%mm4
74 movd (%esi),%mm5
75 movd (%ebp),%mm3
76 pmuludq %mm4,%mm5
77 movq %mm5,%mm2
78 movq %mm5,%mm0
79 pand %mm7,%mm0
80 pmuludq 20(%esp),%mm5
81 pmuludq %mm5,%mm3
82 paddq %mm0,%mm3
83 movd 4(%ebp),%mm1
84 movd 4(%esi),%mm0
85 psrlq $32,%mm2
86 psrlq $32,%mm3
87 incl %ecx
88.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -080089L0051st:
Adam Langleye9ada862015-05-11 17:20:37 -070090 pmuludq %mm4,%mm0
91 pmuludq %mm5,%mm1
92 paddq %mm0,%mm2
93 paddq %mm1,%mm3
94 movq %mm2,%mm0
95 pand %mm7,%mm0
96 movd 4(%ebp,%ecx,4),%mm1
97 paddq %mm0,%mm3
98 movd 4(%esi,%ecx,4),%mm0
99 psrlq $32,%mm2
100 movd %mm3,28(%esp,%ecx,4)
101 psrlq $32,%mm3
102 leal 1(%ecx),%ecx
103 cmpl %ebx,%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800104 jl L0051st
Adam Langleye9ada862015-05-11 17:20:37 -0700105 pmuludq %mm4,%mm0
106 pmuludq %mm5,%mm1
107 paddq %mm0,%mm2
108 paddq %mm1,%mm3
109 movq %mm2,%mm0
110 pand %mm7,%mm0
111 paddq %mm0,%mm3
112 movd %mm3,28(%esp,%ecx,4)
113 psrlq $32,%mm2
114 psrlq $32,%mm3
115 paddq %mm2,%mm3
116 movq %mm3,32(%esp,%ebx,4)
117 incl %edx
Robert Sloana94fe052017-02-21 08:49:28 -0800118L006outer:
Adam Langleye9ada862015-05-11 17:20:37 -0700119 xorl %ecx,%ecx
120 movd (%edi,%edx,4),%mm4
121 movd (%esi),%mm5
122 movd 32(%esp),%mm6
123 movd (%ebp),%mm3
124 pmuludq %mm4,%mm5
125 paddq %mm6,%mm5
126 movq %mm5,%mm0
127 movq %mm5,%mm2
128 pand %mm7,%mm0
129 pmuludq 20(%esp),%mm5
130 pmuludq %mm5,%mm3
131 paddq %mm0,%mm3
132 movd 36(%esp),%mm6
133 movd 4(%ebp),%mm1
134 movd 4(%esi),%mm0
135 psrlq $32,%mm2
136 psrlq $32,%mm3
137 paddq %mm6,%mm2
138 incl %ecx
139 decl %ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800140L007inner:
Adam Langleye9ada862015-05-11 17:20:37 -0700141 pmuludq %mm4,%mm0
142 pmuludq %mm5,%mm1
143 paddq %mm0,%mm2
144 paddq %mm1,%mm3
145 movq %mm2,%mm0
146 movd 36(%esp,%ecx,4),%mm6
147 pand %mm7,%mm0
148 movd 4(%ebp,%ecx,4),%mm1
149 paddq %mm0,%mm3
150 movd 4(%esi,%ecx,4),%mm0
151 psrlq $32,%mm2
152 movd %mm3,28(%esp,%ecx,4)
153 psrlq $32,%mm3
154 paddq %mm6,%mm2
155 decl %ebx
156 leal 1(%ecx),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800157 jnz L007inner
Adam Langleye9ada862015-05-11 17:20:37 -0700158 movl %ecx,%ebx
159 pmuludq %mm4,%mm0
160 pmuludq %mm5,%mm1
161 paddq %mm0,%mm2
162 paddq %mm1,%mm3
163 movq %mm2,%mm0
164 pand %mm7,%mm0
165 paddq %mm0,%mm3
166 movd %mm3,28(%esp,%ecx,4)
167 psrlq $32,%mm2
168 psrlq $32,%mm3
169 movd 36(%esp,%ebx,4),%mm6
170 paddq %mm2,%mm3
171 paddq %mm6,%mm3
172 movq %mm3,32(%esp,%ebx,4)
173 leal 1(%edx),%edx
174 cmpl %ebx,%edx
Robert Sloana94fe052017-02-21 08:49:28 -0800175 jle L006outer
Adam Langleye9ada862015-05-11 17:20:37 -0700176 emms
Robert Sloana94fe052017-02-21 08:49:28 -0800177 jmp L008common_tail
Adam Langleye9ada862015-05-11 17:20:37 -0700178.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800179L004non_sse2:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800180 movl 8(%esp),%esi
181 leal 1(%ebx),%ebp
182 movl 12(%esp),%edi
183 xorl %ecx,%ecx
184 movl %esi,%edx
185 andl $1,%ebp
186 subl %edi,%edx
187 leal 4(%edi,%ebx,4),%eax
188 orl %edx,%ebp
189 movl (%edi),%edi
Robert Sloana94fe052017-02-21 08:49:28 -0800190 jz L009bn_sqr_mont
Adam Langleyd9e397b2015-01-22 14:27:53 -0800191 movl %eax,28(%esp)
192 movl (%esi),%eax
193 xorl %edx,%edx
194.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800195L010mull:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800196 movl %edx,%ebp
197 mull %edi
198 addl %eax,%ebp
199 leal 1(%ecx),%ecx
200 adcl $0,%edx
201 movl (%esi,%ecx,4),%eax
202 cmpl %ebx,%ecx
203 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800204 jl L010mull
Adam Langleyd9e397b2015-01-22 14:27:53 -0800205 movl %edx,%ebp
206 mull %edi
207 movl 20(%esp),%edi
208 addl %ebp,%eax
209 movl 16(%esp),%esi
210 adcl $0,%edx
211 imull 32(%esp),%edi
212 movl %eax,32(%esp,%ebx,4)
213 xorl %ecx,%ecx
214 movl %edx,36(%esp,%ebx,4)
215 movl %ecx,40(%esp,%ebx,4)
216 movl (%esi),%eax
217 mull %edi
218 addl 32(%esp),%eax
219 movl 4(%esi),%eax
220 adcl $0,%edx
221 incl %ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800222 jmp L0112ndmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800223.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800224L0121stmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800225 movl %edx,%ebp
226 mull %edi
227 addl 32(%esp,%ecx,4),%ebp
228 leal 1(%ecx),%ecx
229 adcl $0,%edx
230 addl %eax,%ebp
231 movl (%esi,%ecx,4),%eax
232 adcl $0,%edx
233 cmpl %ebx,%ecx
234 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800235 jl L0121stmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800236 movl %edx,%ebp
237 mull %edi
238 addl 32(%esp,%ebx,4),%eax
239 movl 20(%esp),%edi
240 adcl $0,%edx
241 movl 16(%esp),%esi
242 addl %eax,%ebp
243 adcl $0,%edx
244 imull 32(%esp),%edi
245 xorl %ecx,%ecx
246 addl 36(%esp,%ebx,4),%edx
247 movl %ebp,32(%esp,%ebx,4)
248 adcl $0,%ecx
249 movl (%esi),%eax
250 movl %edx,36(%esp,%ebx,4)
251 movl %ecx,40(%esp,%ebx,4)
252 mull %edi
253 addl 32(%esp),%eax
254 movl 4(%esi),%eax
255 adcl $0,%edx
256 movl $1,%ecx
257.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800258L0112ndmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800259 movl %edx,%ebp
260 mull %edi
261 addl 32(%esp,%ecx,4),%ebp
262 leal 1(%ecx),%ecx
263 adcl $0,%edx
264 addl %eax,%ebp
265 movl (%esi,%ecx,4),%eax
266 adcl $0,%edx
267 cmpl %ebx,%ecx
268 movl %ebp,24(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800269 jl L0112ndmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800270 movl %edx,%ebp
271 mull %edi
272 addl 32(%esp,%ebx,4),%ebp
273 adcl $0,%edx
274 addl %eax,%ebp
275 adcl $0,%edx
276 movl %ebp,28(%esp,%ebx,4)
277 xorl %eax,%eax
278 movl 12(%esp),%ecx
279 addl 36(%esp,%ebx,4),%edx
280 adcl 40(%esp,%ebx,4),%eax
281 leal 4(%ecx),%ecx
282 movl %edx,32(%esp,%ebx,4)
283 cmpl 28(%esp),%ecx
284 movl %eax,36(%esp,%ebx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800285 je L008common_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800286 movl (%ecx),%edi
287 movl 8(%esp),%esi
288 movl %ecx,12(%esp)
289 xorl %ecx,%ecx
290 xorl %edx,%edx
291 movl (%esi),%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800292 jmp L0121stmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800293.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800294L009bn_sqr_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800295 movl %ebx,(%esp)
296 movl %ecx,12(%esp)
297 movl %edi,%eax
298 mull %edi
299 movl %eax,32(%esp)
300 movl %edx,%ebx
301 shrl $1,%edx
302 andl $1,%ebx
303 incl %ecx
304.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800305L013sqr:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800306 movl (%esi,%ecx,4),%eax
307 movl %edx,%ebp
308 mull %edi
309 addl %ebp,%eax
310 leal 1(%ecx),%ecx
311 adcl $0,%edx
312 leal (%ebx,%eax,2),%ebp
313 shrl $31,%eax
314 cmpl (%esp),%ecx
315 movl %eax,%ebx
316 movl %ebp,28(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800317 jl L013sqr
Adam Langleyd9e397b2015-01-22 14:27:53 -0800318 movl (%esi,%ecx,4),%eax
319 movl %edx,%ebp
320 mull %edi
321 addl %ebp,%eax
322 movl 20(%esp),%edi
323 adcl $0,%edx
324 movl 16(%esp),%esi
325 leal (%ebx,%eax,2),%ebp
326 imull 32(%esp),%edi
327 shrl $31,%eax
328 movl %ebp,32(%esp,%ecx,4)
329 leal (%eax,%edx,2),%ebp
330 movl (%esi),%eax
331 shrl $31,%edx
332 movl %ebp,36(%esp,%ecx,4)
333 movl %edx,40(%esp,%ecx,4)
334 mull %edi
335 addl 32(%esp),%eax
336 movl %ecx,%ebx
337 adcl $0,%edx
338 movl 4(%esi),%eax
339 movl $1,%ecx
340.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800341L0143rdmadd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800342 movl %edx,%ebp
343 mull %edi
344 addl 32(%esp,%ecx,4),%ebp
345 adcl $0,%edx
346 addl %eax,%ebp
347 movl 4(%esi,%ecx,4),%eax
348 adcl $0,%edx
349 movl %ebp,28(%esp,%ecx,4)
350 movl %edx,%ebp
351 mull %edi
352 addl 36(%esp,%ecx,4),%ebp
353 leal 2(%ecx),%ecx
354 adcl $0,%edx
355 addl %eax,%ebp
356 movl (%esi,%ecx,4),%eax
357 adcl $0,%edx
358 cmpl %ebx,%ecx
359 movl %ebp,24(%esp,%ecx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800360 jl L0143rdmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800361 movl %edx,%ebp
362 mull %edi
363 addl 32(%esp,%ebx,4),%ebp
364 adcl $0,%edx
365 addl %eax,%ebp
366 adcl $0,%edx
367 movl %ebp,28(%esp,%ebx,4)
368 movl 12(%esp),%ecx
369 xorl %eax,%eax
370 movl 8(%esp),%esi
371 addl 36(%esp,%ebx,4),%edx
372 adcl 40(%esp,%ebx,4),%eax
373 movl %edx,32(%esp,%ebx,4)
374 cmpl %ebx,%ecx
375 movl %eax,36(%esp,%ebx,4)
Robert Sloana94fe052017-02-21 08:49:28 -0800376 je L008common_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800377 movl 4(%esi,%ecx,4),%edi
378 leal 1(%ecx),%ecx
379 movl %edi,%eax
380 movl %ecx,12(%esp)
381 mull %edi
382 addl 32(%esp,%ecx,4),%eax
383 adcl $0,%edx
384 movl %eax,32(%esp,%ecx,4)
385 xorl %ebp,%ebp
386 cmpl %ebx,%ecx
387 leal 1(%ecx),%ecx
Robert Sloana94fe052017-02-21 08:49:28 -0800388 je L015sqrlast
Adam Langleyd9e397b2015-01-22 14:27:53 -0800389 movl %edx,%ebx
390 shrl $1,%edx
391 andl $1,%ebx
392.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800393L016sqradd:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800394 movl (%esi,%ecx,4),%eax
395 movl %edx,%ebp
396 mull %edi
397 addl %ebp,%eax
398 leal (%eax,%eax,1),%ebp
399 adcl $0,%edx
400 shrl $31,%eax
401 addl 32(%esp,%ecx,4),%ebp
402 leal 1(%ecx),%ecx
403 adcl $0,%eax
404 addl %ebx,%ebp
405 adcl $0,%eax
406 cmpl (%esp),%ecx
407 movl %ebp,28(%esp,%ecx,4)
408 movl %eax,%ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800409 jle L016sqradd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800410 movl %edx,%ebp
411 addl %edx,%edx
412 shrl $31,%ebp
413 addl %ebx,%edx
414 adcl $0,%ebp
Robert Sloana94fe052017-02-21 08:49:28 -0800415L015sqrlast:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800416 movl 20(%esp),%edi
417 movl 16(%esp),%esi
418 imull 32(%esp),%edi
419 addl 32(%esp,%ecx,4),%edx
420 movl (%esi),%eax
421 adcl $0,%ebp
422 movl %edx,32(%esp,%ecx,4)
423 movl %ebp,36(%esp,%ecx,4)
424 mull %edi
425 addl 32(%esp),%eax
426 leal -1(%ecx),%ebx
427 adcl $0,%edx
428 movl $1,%ecx
429 movl 4(%esi),%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800430 jmp L0143rdmadd
Adam Langleyd9e397b2015-01-22 14:27:53 -0800431.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800432L008common_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800433 movl 16(%esp),%ebp
434 movl 4(%esp),%edi
435 leal 32(%esp),%esi
436 movl (%esi),%eax
437 movl %ebx,%ecx
438 xorl %edx,%edx
439.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800440L017sub:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800441 sbbl (%ebp,%edx,4),%eax
442 movl %eax,(%edi,%edx,4)
443 decl %ecx
444 movl 4(%esi,%edx,4),%eax
445 leal 1(%edx),%edx
Robert Sloana94fe052017-02-21 08:49:28 -0800446 jge L017sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800447 sbbl $0,%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800448 andl %eax,%esi
449 notl %eax
450 movl %edi,%ebp
451 andl %eax,%ebp
452 orl %ebp,%esi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800453.align 4,0x90
Robert Sloana94fe052017-02-21 08:49:28 -0800454L018copy:
455 movl (%esi,%ebx,4),%eax
456 movl %eax,(%edi,%ebx,4)
457 movl %ecx,32(%esp,%ebx,4)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800458 decl %ebx
Robert Sloana94fe052017-02-21 08:49:28 -0800459 jge L018copy
Adam Langleyd9e397b2015-01-22 14:27:53 -0800460 movl 24(%esp),%esp
461 movl $1,%eax
462L000just_leave:
463 popl %edi
464 popl %esi
465 popl %ebx
466 popl %ebp
467 ret
468.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
469.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
470.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
471.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
472.byte 111,114,103,62,0
Adam Langleye9ada862015-05-11 17:20:37 -0700473.section __IMPORT,__pointers,non_lazy_symbol_pointers
474L_OPENSSL_ia32cap_P$non_lazy_ptr:
475.indirect_symbol _OPENSSL_ia32cap_P
476.long 0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800477#endif