blob: 91980d89bf761f90e2f9efb5ce73f1dae1db52d4 [file] [log] [blame]
Robert Sloan6f79a502017-04-03 09:16:40 -07001#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
Adam Langleyd9e397b2015-01-22 14:27:53 -08002.text
3
4
5
6.globl _bn_mul_mont_gather5
7.private_extern _bn_mul_mont_gather5
8
9.p2align 6
10_bn_mul_mont_gather5:
Robert Sloana94fe052017-02-21 08:49:28 -080011
12 movl %r9d,%r9d
13 movq %rsp,%rax
14
Adam Langleyd9e397b2015-01-22 14:27:53 -080015 testl $7,%r9d
16 jnz L$mul_enter
17 jmp L$mul4x_enter
18
19.p2align 4
20L$mul_enter:
David Benjamin4969cc92016-04-22 15:02:23 -040021 movd 8(%rsp),%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -080022 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -080023
Adam Langleyd9e397b2015-01-22 14:27:53 -080024 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -080025
Adam Langleyd9e397b2015-01-22 14:27:53 -080026 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -080027
Adam Langleyd9e397b2015-01-22 14:27:53 -080028 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -080029
Adam Langleyd9e397b2015-01-22 14:27:53 -080030 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -080031
Adam Langleyd9e397b2015-01-22 14:27:53 -080032 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -040033
Adam Langleyd9e397b2015-01-22 14:27:53 -080034
Robert Sloana94fe052017-02-21 08:49:28 -080035 negq %r9
36 movq %rsp,%r11
37 leaq -280(%rsp,%r9,8),%r10
38 negq %r9
39 andq $-1024,%r10
40
41
42
43
44
45
46
47
48
49 subq %r10,%r11
50 andq $-4096,%r11
51 leaq (%r10,%r11,1),%rsp
52 movq (%rsp),%r11
53 cmpq %r10,%rsp
54 ja L$mul_page_walk
55 jmp L$mul_page_walk_done
56
57L$mul_page_walk:
58 leaq -4096(%rsp),%rsp
59 movq (%rsp),%r11
60 cmpq %r10,%rsp
61 ja L$mul_page_walk
62L$mul_page_walk_done:
63
64 leaq L$inc(%rip),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -080065 movq %rax,8(%rsp,%r9,8)
Robert Sloana94fe052017-02-21 08:49:28 -080066
Adam Langleyd9e397b2015-01-22 14:27:53 -080067L$mul_body:
Robert Sloana94fe052017-02-21 08:49:28 -080068
David Benjamin4969cc92016-04-22 15:02:23 -040069 leaq 128(%rdx),%r12
70 movdqa 0(%r10),%xmm0
71 movdqa 16(%r10),%xmm1
72 leaq 24-112(%rsp,%r9,8),%r10
73 andq $-16,%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -080074
David Benjamin4969cc92016-04-22 15:02:23 -040075 pshufd $0,%xmm5,%xmm5
76 movdqa %xmm1,%xmm4
77 movdqa %xmm1,%xmm2
78 paddd %xmm0,%xmm1
79 pcmpeqd %xmm5,%xmm0
80.byte 0x67
81 movdqa %xmm4,%xmm3
82 paddd %xmm1,%xmm2
83 pcmpeqd %xmm5,%xmm1
84 movdqa %xmm0,112(%r10)
85 movdqa %xmm4,%xmm0
86
87 paddd %xmm2,%xmm3
88 pcmpeqd %xmm5,%xmm2
89 movdqa %xmm1,128(%r10)
90 movdqa %xmm4,%xmm1
91
92 paddd %xmm3,%xmm0
93 pcmpeqd %xmm5,%xmm3
94 movdqa %xmm2,144(%r10)
95 movdqa %xmm4,%xmm2
96
97 paddd %xmm0,%xmm1
98 pcmpeqd %xmm5,%xmm0
99 movdqa %xmm3,160(%r10)
100 movdqa %xmm4,%xmm3
101 paddd %xmm1,%xmm2
102 pcmpeqd %xmm5,%xmm1
103 movdqa %xmm0,176(%r10)
104 movdqa %xmm4,%xmm0
105
106 paddd %xmm2,%xmm3
107 pcmpeqd %xmm5,%xmm2
108 movdqa %xmm1,192(%r10)
109 movdqa %xmm4,%xmm1
110
111 paddd %xmm3,%xmm0
112 pcmpeqd %xmm5,%xmm3
113 movdqa %xmm2,208(%r10)
114 movdqa %xmm4,%xmm2
115
116 paddd %xmm0,%xmm1
117 pcmpeqd %xmm5,%xmm0
118 movdqa %xmm3,224(%r10)
119 movdqa %xmm4,%xmm3
120 paddd %xmm1,%xmm2
121 pcmpeqd %xmm5,%xmm1
122 movdqa %xmm0,240(%r10)
123 movdqa %xmm4,%xmm0
124
125 paddd %xmm2,%xmm3
126 pcmpeqd %xmm5,%xmm2
127 movdqa %xmm1,256(%r10)
128 movdqa %xmm4,%xmm1
129
130 paddd %xmm3,%xmm0
131 pcmpeqd %xmm5,%xmm3
132 movdqa %xmm2,272(%r10)
133 movdqa %xmm4,%xmm2
134
135 paddd %xmm0,%xmm1
136 pcmpeqd %xmm5,%xmm0
137 movdqa %xmm3,288(%r10)
138 movdqa %xmm4,%xmm3
139 paddd %xmm1,%xmm2
140 pcmpeqd %xmm5,%xmm1
141 movdqa %xmm0,304(%r10)
142
143 paddd %xmm2,%xmm3
144.byte 0x67
145 pcmpeqd %xmm5,%xmm2
146 movdqa %xmm1,320(%r10)
147
148 pcmpeqd %xmm5,%xmm3
149 movdqa %xmm2,336(%r10)
150 pand 64(%r12),%xmm0
151
152 pand 80(%r12),%xmm1
153 pand 96(%r12),%xmm2
154 movdqa %xmm3,352(%r10)
155 pand 112(%r12),%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800156 por %xmm2,%xmm0
David Benjamin4969cc92016-04-22 15:02:23 -0400157 por %xmm3,%xmm1
158 movdqa -128(%r12),%xmm4
159 movdqa -112(%r12),%xmm5
160 movdqa -96(%r12),%xmm2
161 pand 112(%r10),%xmm4
162 movdqa -80(%r12),%xmm3
163 pand 128(%r10),%xmm5
164 por %xmm4,%xmm0
165 pand 144(%r10),%xmm2
166 por %xmm5,%xmm1
167 pand 160(%r10),%xmm3
168 por %xmm2,%xmm0
169 por %xmm3,%xmm1
170 movdqa -64(%r12),%xmm4
171 movdqa -48(%r12),%xmm5
172 movdqa -32(%r12),%xmm2
173 pand 176(%r10),%xmm4
174 movdqa -16(%r12),%xmm3
175 pand 192(%r10),%xmm5
176 por %xmm4,%xmm0
177 pand 208(%r10),%xmm2
178 por %xmm5,%xmm1
179 pand 224(%r10),%xmm3
180 por %xmm2,%xmm0
181 por %xmm3,%xmm1
182 movdqa 0(%r12),%xmm4
183 movdqa 16(%r12),%xmm5
184 movdqa 32(%r12),%xmm2
185 pand 240(%r10),%xmm4
186 movdqa 48(%r12),%xmm3
187 pand 256(%r10),%xmm5
188 por %xmm4,%xmm0
189 pand 272(%r10),%xmm2
190 por %xmm5,%xmm1
191 pand 288(%r10),%xmm3
192 por %xmm2,%xmm0
193 por %xmm3,%xmm1
194 por %xmm1,%xmm0
195 pshufd $0x4e,%xmm0,%xmm1
196 por %xmm1,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800197 leaq 256(%r12),%r12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800198.byte 102,72,15,126,195
199
200 movq (%r8),%r8
201 movq (%rsi),%rax
202
203 xorq %r14,%r14
204 xorq %r15,%r15
205
Adam Langleyd9e397b2015-01-22 14:27:53 -0800206 movq %r8,%rbp
207 mulq %rbx
208 movq %rax,%r10
209 movq (%rcx),%rax
210
Adam Langleyd9e397b2015-01-22 14:27:53 -0800211 imulq %r10,%rbp
212 movq %rdx,%r11
213
Adam Langleyd9e397b2015-01-22 14:27:53 -0800214 mulq %rbp
215 addq %rax,%r10
216 movq 8(%rsi),%rax
217 adcq $0,%rdx
218 movq %rdx,%r13
219
220 leaq 1(%r15),%r15
221 jmp L$1st_enter
222
223.p2align 4
224L$1st:
225 addq %rax,%r13
226 movq (%rsi,%r15,8),%rax
227 adcq $0,%rdx
228 addq %r11,%r13
229 movq %r10,%r11
230 adcq $0,%rdx
231 movq %r13,-16(%rsp,%r15,8)
232 movq %rdx,%r13
233
234L$1st_enter:
235 mulq %rbx
236 addq %rax,%r11
237 movq (%rcx,%r15,8),%rax
238 adcq $0,%rdx
239 leaq 1(%r15),%r15
240 movq %rdx,%r10
241
242 mulq %rbp
243 cmpq %r9,%r15
244 jne L$1st
245
Adam Langleyd9e397b2015-01-22 14:27:53 -0800246
247 addq %rax,%r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800248 adcq $0,%rdx
249 addq %r11,%r13
250 adcq $0,%rdx
David Benjamin4969cc92016-04-22 15:02:23 -0400251 movq %r13,-16(%rsp,%r9,8)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800252 movq %rdx,%r13
253 movq %r10,%r11
254
255 xorq %rdx,%rdx
256 addq %r11,%r13
257 adcq $0,%rdx
258 movq %r13,-8(%rsp,%r9,8)
259 movq %rdx,(%rsp,%r9,8)
260
261 leaq 1(%r14),%r14
262 jmp L$outer
263.p2align 4
264L$outer:
David Benjamin4969cc92016-04-22 15:02:23 -0400265 leaq 24+128(%rsp,%r9,8),%rdx
266 andq $-16,%rdx
267 pxor %xmm4,%xmm4
268 pxor %xmm5,%xmm5
269 movdqa -128(%r12),%xmm0
270 movdqa -112(%r12),%xmm1
271 movdqa -96(%r12),%xmm2
272 movdqa -80(%r12),%xmm3
273 pand -128(%rdx),%xmm0
274 pand -112(%rdx),%xmm1
275 por %xmm0,%xmm4
276 pand -96(%rdx),%xmm2
277 por %xmm1,%xmm5
278 pand -80(%rdx),%xmm3
279 por %xmm2,%xmm4
280 por %xmm3,%xmm5
281 movdqa -64(%r12),%xmm0
282 movdqa -48(%r12),%xmm1
283 movdqa -32(%r12),%xmm2
284 movdqa -16(%r12),%xmm3
285 pand -64(%rdx),%xmm0
286 pand -48(%rdx),%xmm1
287 por %xmm0,%xmm4
288 pand -32(%rdx),%xmm2
289 por %xmm1,%xmm5
290 pand -16(%rdx),%xmm3
291 por %xmm2,%xmm4
292 por %xmm3,%xmm5
293 movdqa 0(%r12),%xmm0
294 movdqa 16(%r12),%xmm1
295 movdqa 32(%r12),%xmm2
296 movdqa 48(%r12),%xmm3
297 pand 0(%rdx),%xmm0
298 pand 16(%rdx),%xmm1
299 por %xmm0,%xmm4
300 pand 32(%rdx),%xmm2
301 por %xmm1,%xmm5
302 pand 48(%rdx),%xmm3
303 por %xmm2,%xmm4
304 por %xmm3,%xmm5
305 movdqa 64(%r12),%xmm0
306 movdqa 80(%r12),%xmm1
307 movdqa 96(%r12),%xmm2
308 movdqa 112(%r12),%xmm3
309 pand 64(%rdx),%xmm0
310 pand 80(%rdx),%xmm1
311 por %xmm0,%xmm4
312 pand 96(%rdx),%xmm2
313 por %xmm1,%xmm5
314 pand 112(%rdx),%xmm3
315 por %xmm2,%xmm4
316 por %xmm3,%xmm5
317 por %xmm5,%xmm4
318 pshufd $0x4e,%xmm4,%xmm0
319 por %xmm4,%xmm0
320 leaq 256(%r12),%r12
321
322 movq (%rsi),%rax
323.byte 102,72,15,126,195
324
Adam Langleyd9e397b2015-01-22 14:27:53 -0800325 xorq %r15,%r15
326 movq %r8,%rbp
327 movq (%rsp),%r10
328
Adam Langleyd9e397b2015-01-22 14:27:53 -0800329 mulq %rbx
330 addq %rax,%r10
331 movq (%rcx),%rax
332 adcq $0,%rdx
333
Adam Langleyd9e397b2015-01-22 14:27:53 -0800334 imulq %r10,%rbp
335 movq %rdx,%r11
336
Adam Langleyd9e397b2015-01-22 14:27:53 -0800337 mulq %rbp
338 addq %rax,%r10
339 movq 8(%rsi),%rax
340 adcq $0,%rdx
341 movq 8(%rsp),%r10
342 movq %rdx,%r13
343
344 leaq 1(%r15),%r15
345 jmp L$inner_enter
346
347.p2align 4
348L$inner:
349 addq %rax,%r13
350 movq (%rsi,%r15,8),%rax
351 adcq $0,%rdx
352 addq %r10,%r13
353 movq (%rsp,%r15,8),%r10
354 adcq $0,%rdx
355 movq %r13,-16(%rsp,%r15,8)
356 movq %rdx,%r13
357
358L$inner_enter:
359 mulq %rbx
360 addq %rax,%r11
361 movq (%rcx,%r15,8),%rax
362 adcq $0,%rdx
363 addq %r11,%r10
364 movq %rdx,%r11
365 adcq $0,%r11
366 leaq 1(%r15),%r15
367
368 mulq %rbp
369 cmpq %r9,%r15
370 jne L$inner
371
Adam Langleyd9e397b2015-01-22 14:27:53 -0800372 addq %rax,%r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800373 adcq $0,%rdx
374 addq %r10,%r13
David Benjamin4969cc92016-04-22 15:02:23 -0400375 movq (%rsp,%r9,8),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800376 adcq $0,%rdx
David Benjamin4969cc92016-04-22 15:02:23 -0400377 movq %r13,-16(%rsp,%r9,8)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800378 movq %rdx,%r13
379
380 xorq %rdx,%rdx
381 addq %r11,%r13
382 adcq $0,%rdx
383 addq %r10,%r13
384 adcq $0,%rdx
385 movq %r13,-8(%rsp,%r9,8)
386 movq %rdx,(%rsp,%r9,8)
387
388 leaq 1(%r14),%r14
389 cmpq %r9,%r14
390 jb L$outer
391
392 xorq %r14,%r14
393 movq (%rsp),%rax
394 leaq (%rsp),%rsi
395 movq %r9,%r15
396 jmp L$sub
397.p2align 4
Robert Sloanab8b8882018-03-26 11:39:51 -0700398L$sub: sbbq (%rcx,%r14,8),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800399 movq %rax,(%rdi,%r14,8)
400 movq 8(%rsi,%r14,8),%rax
401 leaq 1(%r14),%r14
402 decq %r15
403 jnz L$sub
404
405 sbbq $0,%rax
406 xorq %r14,%r14
Robert Sloana94fe052017-02-21 08:49:28 -0800407 andq %rax,%rsi
408 notq %rax
409 movq %rdi,%rcx
410 andq %rax,%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800411 movq %r9,%r15
Robert Sloana94fe052017-02-21 08:49:28 -0800412 orq %rcx,%rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800413.p2align 4
414L$copy:
Robert Sloana94fe052017-02-21 08:49:28 -0800415 movq (%rsi,%r14,8),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800416 movq %r14,(%rsp,%r14,8)
Robert Sloana94fe052017-02-21 08:49:28 -0800417 movq %rax,(%rdi,%r14,8)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800418 leaq 1(%r14),%r14
419 subq $1,%r15
420 jnz L$copy
421
422 movq 8(%rsp,%r9,8),%rsi
Robert Sloana94fe052017-02-21 08:49:28 -0800423
Adam Langleyd9e397b2015-01-22 14:27:53 -0800424 movq $1,%rax
David Benjamin4969cc92016-04-22 15:02:23 -0400425
Adam Langleyd9e397b2015-01-22 14:27:53 -0800426 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -0800427
Adam Langleyd9e397b2015-01-22 14:27:53 -0800428 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -0800429
Adam Langleyd9e397b2015-01-22 14:27:53 -0800430 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -0800431
Adam Langleyd9e397b2015-01-22 14:27:53 -0800432 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -0800433
Adam Langleyd9e397b2015-01-22 14:27:53 -0800434 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800435
Adam Langleyd9e397b2015-01-22 14:27:53 -0800436 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800437
Adam Langleyd9e397b2015-01-22 14:27:53 -0800438 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -0800439
Adam Langleyd9e397b2015-01-22 14:27:53 -0800440L$mul_epilogue:
441 .byte 0xf3,0xc3
442
443
Robert Sloana94fe052017-02-21 08:49:28 -0800444
Adam Langleyd9e397b2015-01-22 14:27:53 -0800445.p2align 5
446bn_mul4x_mont_gather5:
Robert Sloana94fe052017-02-21 08:49:28 -0800447
Adam Langleyd9e397b2015-01-22 14:27:53 -0800448.byte 0x67
449 movq %rsp,%rax
Robert Sloana94fe052017-02-21 08:49:28 -0800450
451L$mul4x_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800452 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800453
Adam Langleyd9e397b2015-01-22 14:27:53 -0800454 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800455
Adam Langleyd9e397b2015-01-22 14:27:53 -0800456 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -0800457
Adam Langleyd9e397b2015-01-22 14:27:53 -0800458 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -0800459
Adam Langleyd9e397b2015-01-22 14:27:53 -0800460 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -0800461
Adam Langleyd9e397b2015-01-22 14:27:53 -0800462 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -0400463
Robert Sloana94fe052017-02-21 08:49:28 -0800464L$mul4x_prologue:
465
Adam Langleyd9e397b2015-01-22 14:27:53 -0800466.byte 0x67
Adam Langleyd9e397b2015-01-22 14:27:53 -0800467 shll $3,%r9d
David Benjamin4969cc92016-04-22 15:02:23 -0400468 leaq (%r9,%r9,2),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800469 negq %r9
470
471
472
473
474
475
476
477
David Benjamin4969cc92016-04-22 15:02:23 -0400478
479
480 leaq -320(%rsp,%r9,2),%r11
Robert Sloana94fe052017-02-21 08:49:28 -0800481 movq %rsp,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -0400482 subq %rdi,%r11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800483 andq $4095,%r11
484 cmpq %r11,%r10
485 jb L$mul4xsp_alt
Robert Sloana94fe052017-02-21 08:49:28 -0800486 subq %r11,%rbp
487 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800488 jmp L$mul4xsp_done
489
490.p2align 5
491L$mul4xsp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -0400492 leaq 4096-320(,%r9,2),%r10
Robert Sloana94fe052017-02-21 08:49:28 -0800493 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800494 subq %r10,%r11
495 movq $0,%r10
496 cmovcq %r10,%r11
Robert Sloana94fe052017-02-21 08:49:28 -0800497 subq %r11,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800498L$mul4xsp_done:
Robert Sloana94fe052017-02-21 08:49:28 -0800499 andq $-64,%rbp
500 movq %rsp,%r11
501 subq %rbp,%r11
502 andq $-4096,%r11
503 leaq (%r11,%rbp,1),%rsp
504 movq (%rsp),%r10
505 cmpq %rbp,%rsp
506 ja L$mul4x_page_walk
507 jmp L$mul4x_page_walk_done
508
509L$mul4x_page_walk:
510 leaq -4096(%rsp),%rsp
511 movq (%rsp),%r10
512 cmpq %rbp,%rsp
513 ja L$mul4x_page_walk
514L$mul4x_page_walk_done:
515
Adam Langleyd9e397b2015-01-22 14:27:53 -0800516 negq %r9
517
518 movq %rax,40(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -0800519
Adam Langleyd9e397b2015-01-22 14:27:53 -0800520L$mul4x_body:
521
522 call mul4x_internal
523
524 movq 40(%rsp),%rsi
Robert Sloana94fe052017-02-21 08:49:28 -0800525
Adam Langleyd9e397b2015-01-22 14:27:53 -0800526 movq $1,%rax
David Benjamin4969cc92016-04-22 15:02:23 -0400527
Adam Langleyd9e397b2015-01-22 14:27:53 -0800528 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -0800529
Adam Langleyd9e397b2015-01-22 14:27:53 -0800530 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -0800531
Adam Langleyd9e397b2015-01-22 14:27:53 -0800532 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -0800533
Adam Langleyd9e397b2015-01-22 14:27:53 -0800534 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -0800535
Adam Langleyd9e397b2015-01-22 14:27:53 -0800536 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800537
Adam Langleyd9e397b2015-01-22 14:27:53 -0800538 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800539
Adam Langleyd9e397b2015-01-22 14:27:53 -0800540 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -0800541
Adam Langleyd9e397b2015-01-22 14:27:53 -0800542L$mul4x_epilogue:
543 .byte 0xf3,0xc3
544
545
546
Robert Sloana94fe052017-02-21 08:49:28 -0800547
Adam Langleyd9e397b2015-01-22 14:27:53 -0800548.p2align 5
549mul4x_internal:
550 shlq $5,%r9
David Benjamin4969cc92016-04-22 15:02:23 -0400551 movd 8(%rax),%xmm5
552 leaq L$inc(%rip),%rax
553 leaq 128(%rdx,%r9,1),%r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800554 shrq $5,%r9
David Benjamin4969cc92016-04-22 15:02:23 -0400555 movdqa 0(%rax),%xmm0
556 movdqa 16(%rax),%xmm1
557 leaq 88-112(%rsp,%r9,1),%r10
558 leaq 128(%rdx),%r12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800559
David Benjamin4969cc92016-04-22 15:02:23 -0400560 pshufd $0,%xmm5,%xmm5
561 movdqa %xmm1,%xmm4
562.byte 0x67,0x67
563 movdqa %xmm1,%xmm2
564 paddd %xmm0,%xmm1
565 pcmpeqd %xmm5,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800566.byte 0x67
David Benjamin4969cc92016-04-22 15:02:23 -0400567 movdqa %xmm4,%xmm3
568 paddd %xmm1,%xmm2
569 pcmpeqd %xmm5,%xmm1
570 movdqa %xmm0,112(%r10)
571 movdqa %xmm4,%xmm0
572
573 paddd %xmm2,%xmm3
574 pcmpeqd %xmm5,%xmm2
575 movdqa %xmm1,128(%r10)
576 movdqa %xmm4,%xmm1
577
578 paddd %xmm3,%xmm0
579 pcmpeqd %xmm5,%xmm3
580 movdqa %xmm2,144(%r10)
581 movdqa %xmm4,%xmm2
582
583 paddd %xmm0,%xmm1
584 pcmpeqd %xmm5,%xmm0
585 movdqa %xmm3,160(%r10)
586 movdqa %xmm4,%xmm3
587 paddd %xmm1,%xmm2
588 pcmpeqd %xmm5,%xmm1
589 movdqa %xmm0,176(%r10)
590 movdqa %xmm4,%xmm0
591
592 paddd %xmm2,%xmm3
593 pcmpeqd %xmm5,%xmm2
594 movdqa %xmm1,192(%r10)
595 movdqa %xmm4,%xmm1
596
597 paddd %xmm3,%xmm0
598 pcmpeqd %xmm5,%xmm3
599 movdqa %xmm2,208(%r10)
600 movdqa %xmm4,%xmm2
601
602 paddd %xmm0,%xmm1
603 pcmpeqd %xmm5,%xmm0
604 movdqa %xmm3,224(%r10)
605 movdqa %xmm4,%xmm3
606 paddd %xmm1,%xmm2
607 pcmpeqd %xmm5,%xmm1
608 movdqa %xmm0,240(%r10)
609 movdqa %xmm4,%xmm0
610
611 paddd %xmm2,%xmm3
612 pcmpeqd %xmm5,%xmm2
613 movdqa %xmm1,256(%r10)
614 movdqa %xmm4,%xmm1
615
616 paddd %xmm3,%xmm0
617 pcmpeqd %xmm5,%xmm3
618 movdqa %xmm2,272(%r10)
619 movdqa %xmm4,%xmm2
620
621 paddd %xmm0,%xmm1
622 pcmpeqd %xmm5,%xmm0
623 movdqa %xmm3,288(%r10)
624 movdqa %xmm4,%xmm3
625 paddd %xmm1,%xmm2
626 pcmpeqd %xmm5,%xmm1
627 movdqa %xmm0,304(%r10)
628
629 paddd %xmm2,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800630.byte 0x67
David Benjamin4969cc92016-04-22 15:02:23 -0400631 pcmpeqd %xmm5,%xmm2
632 movdqa %xmm1,320(%r10)
633
634 pcmpeqd %xmm5,%xmm3
635 movdqa %xmm2,336(%r10)
636 pand 64(%r12),%xmm0
637
638 pand 80(%r12),%xmm1
639 pand 96(%r12),%xmm2
640 movdqa %xmm3,352(%r10)
641 pand 112(%r12),%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800642 por %xmm2,%xmm0
David Benjamin4969cc92016-04-22 15:02:23 -0400643 por %xmm3,%xmm1
644 movdqa -128(%r12),%xmm4
645 movdqa -112(%r12),%xmm5
646 movdqa -96(%r12),%xmm2
647 pand 112(%r10),%xmm4
648 movdqa -80(%r12),%xmm3
649 pand 128(%r10),%xmm5
650 por %xmm4,%xmm0
651 pand 144(%r10),%xmm2
652 por %xmm5,%xmm1
653 pand 160(%r10),%xmm3
654 por %xmm2,%xmm0
655 por %xmm3,%xmm1
656 movdqa -64(%r12),%xmm4
657 movdqa -48(%r12),%xmm5
658 movdqa -32(%r12),%xmm2
659 pand 176(%r10),%xmm4
660 movdqa -16(%r12),%xmm3
661 pand 192(%r10),%xmm5
662 por %xmm4,%xmm0
663 pand 208(%r10),%xmm2
664 por %xmm5,%xmm1
665 pand 224(%r10),%xmm3
666 por %xmm2,%xmm0
667 por %xmm3,%xmm1
668 movdqa 0(%r12),%xmm4
669 movdqa 16(%r12),%xmm5
670 movdqa 32(%r12),%xmm2
671 pand 240(%r10),%xmm4
672 movdqa 48(%r12),%xmm3
673 pand 256(%r10),%xmm5
674 por %xmm4,%xmm0
675 pand 272(%r10),%xmm2
676 por %xmm5,%xmm1
677 pand 288(%r10),%xmm3
678 por %xmm2,%xmm0
679 por %xmm3,%xmm1
680 por %xmm1,%xmm0
681 pshufd $0x4e,%xmm0,%xmm1
682 por %xmm1,%xmm0
683 leaq 256(%r12),%r12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800684.byte 102,72,15,126,195
David Benjamin4969cc92016-04-22 15:02:23 -0400685
Adam Langleyd9e397b2015-01-22 14:27:53 -0800686 movq %r13,16+8(%rsp)
687 movq %rdi,56+8(%rsp)
688
689 movq (%r8),%r8
690 movq (%rsi),%rax
691 leaq (%rsi,%r9,1),%rsi
692 negq %r9
693
694 movq %r8,%rbp
695 mulq %rbx
696 movq %rax,%r10
697 movq (%rcx),%rax
698
Adam Langleyd9e397b2015-01-22 14:27:53 -0800699 imulq %r10,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -0400700 leaq 64+8(%rsp),%r14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800701 movq %rdx,%r11
702
Adam Langleyd9e397b2015-01-22 14:27:53 -0800703 mulq %rbp
704 addq %rax,%r10
705 movq 8(%rsi,%r9,1),%rax
706 adcq $0,%rdx
707 movq %rdx,%rdi
708
709 mulq %rbx
710 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400711 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800712 adcq $0,%rdx
713 movq %rdx,%r10
714
715 mulq %rbp
716 addq %rax,%rdi
717 movq 16(%rsi,%r9,1),%rax
718 adcq $0,%rdx
719 addq %r11,%rdi
720 leaq 32(%r9),%r15
David Benjamin4969cc92016-04-22 15:02:23 -0400721 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800722 adcq $0,%rdx
723 movq %rdi,(%r14)
724 movq %rdx,%r13
725 jmp L$1st4x
726
727.p2align 5
728L$1st4x:
729 mulq %rbx
730 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -0400731 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800732 leaq 32(%r14),%r14
733 adcq $0,%rdx
734 movq %rdx,%r11
735
736 mulq %rbp
737 addq %rax,%r13
738 movq -8(%rsi,%r15,1),%rax
739 adcq $0,%rdx
740 addq %r10,%r13
741 adcq $0,%rdx
742 movq %r13,-24(%r14)
743 movq %rdx,%rdi
744
745 mulq %rbx
746 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400747 movq -8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800748 adcq $0,%rdx
749 movq %rdx,%r10
750
751 mulq %rbp
752 addq %rax,%rdi
753 movq (%rsi,%r15,1),%rax
754 adcq $0,%rdx
755 addq %r11,%rdi
756 adcq $0,%rdx
757 movq %rdi,-16(%r14)
758 movq %rdx,%r13
759
760 mulq %rbx
761 addq %rax,%r10
762 movq 0(%rcx),%rax
763 adcq $0,%rdx
764 movq %rdx,%r11
765
766 mulq %rbp
767 addq %rax,%r13
768 movq 8(%rsi,%r15,1),%rax
769 adcq $0,%rdx
770 addq %r10,%r13
771 adcq $0,%rdx
772 movq %r13,-8(%r14)
773 movq %rdx,%rdi
774
775 mulq %rbx
776 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400777 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800778 adcq $0,%rdx
779 movq %rdx,%r10
780
781 mulq %rbp
782 addq %rax,%rdi
783 movq 16(%rsi,%r15,1),%rax
784 adcq $0,%rdx
785 addq %r11,%rdi
David Benjamin4969cc92016-04-22 15:02:23 -0400786 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800787 adcq $0,%rdx
788 movq %rdi,(%r14)
789 movq %rdx,%r13
790
791 addq $32,%r15
792 jnz L$1st4x
793
794 mulq %rbx
795 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -0400796 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800797 leaq 32(%r14),%r14
798 adcq $0,%rdx
799 movq %rdx,%r11
800
801 mulq %rbp
802 addq %rax,%r13
803 movq -8(%rsi),%rax
804 adcq $0,%rdx
805 addq %r10,%r13
806 adcq $0,%rdx
807 movq %r13,-24(%r14)
808 movq %rdx,%rdi
809
810 mulq %rbx
811 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400812 movq -8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800813 adcq $0,%rdx
814 movq %rdx,%r10
815
816 mulq %rbp
817 addq %rax,%rdi
818 movq (%rsi,%r9,1),%rax
819 adcq $0,%rdx
820 addq %r11,%rdi
821 adcq $0,%rdx
822 movq %rdi,-16(%r14)
823 movq %rdx,%r13
824
David Benjamin4969cc92016-04-22 15:02:23 -0400825 leaq (%rcx,%r9,1),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800826
827 xorq %rdi,%rdi
828 addq %r10,%r13
829 adcq $0,%rdi
830 movq %r13,-8(%r14)
831
832 jmp L$outer4x
833
834.p2align 5
835L$outer4x:
David Benjamin4969cc92016-04-22 15:02:23 -0400836 leaq 16+128(%r14),%rdx
837 pxor %xmm4,%xmm4
838 pxor %xmm5,%xmm5
839 movdqa -128(%r12),%xmm0
840 movdqa -112(%r12),%xmm1
841 movdqa -96(%r12),%xmm2
842 movdqa -80(%r12),%xmm3
843 pand -128(%rdx),%xmm0
844 pand -112(%rdx),%xmm1
845 por %xmm0,%xmm4
846 pand -96(%rdx),%xmm2
847 por %xmm1,%xmm5
848 pand -80(%rdx),%xmm3
849 por %xmm2,%xmm4
850 por %xmm3,%xmm5
851 movdqa -64(%r12),%xmm0
852 movdqa -48(%r12),%xmm1
853 movdqa -32(%r12),%xmm2
854 movdqa -16(%r12),%xmm3
855 pand -64(%rdx),%xmm0
856 pand -48(%rdx),%xmm1
857 por %xmm0,%xmm4
858 pand -32(%rdx),%xmm2
859 por %xmm1,%xmm5
860 pand -16(%rdx),%xmm3
861 por %xmm2,%xmm4
862 por %xmm3,%xmm5
863 movdqa 0(%r12),%xmm0
864 movdqa 16(%r12),%xmm1
865 movdqa 32(%r12),%xmm2
866 movdqa 48(%r12),%xmm3
867 pand 0(%rdx),%xmm0
868 pand 16(%rdx),%xmm1
869 por %xmm0,%xmm4
870 pand 32(%rdx),%xmm2
871 por %xmm1,%xmm5
872 pand 48(%rdx),%xmm3
873 por %xmm2,%xmm4
874 por %xmm3,%xmm5
875 movdqa 64(%r12),%xmm0
876 movdqa 80(%r12),%xmm1
877 movdqa 96(%r12),%xmm2
878 movdqa 112(%r12),%xmm3
879 pand 64(%rdx),%xmm0
880 pand 80(%rdx),%xmm1
881 por %xmm0,%xmm4
882 pand 96(%rdx),%xmm2
883 por %xmm1,%xmm5
884 pand 112(%rdx),%xmm3
885 por %xmm2,%xmm4
886 por %xmm3,%xmm5
887 por %xmm5,%xmm4
888 pshufd $0x4e,%xmm4,%xmm0
889 por %xmm4,%xmm0
890 leaq 256(%r12),%r12
891.byte 102,72,15,126,195
892
Adam Langleyd9e397b2015-01-22 14:27:53 -0800893 movq (%r14,%r9,1),%r10
894 movq %r8,%rbp
895 mulq %rbx
896 addq %rax,%r10
897 movq (%rcx),%rax
898 adcq $0,%rdx
899
Adam Langleyd9e397b2015-01-22 14:27:53 -0800900 imulq %r10,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800901 movq %rdx,%r11
902 movq %rdi,(%r14)
903
Adam Langleyd9e397b2015-01-22 14:27:53 -0800904 leaq (%r14,%r9,1),%r14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800905
906 mulq %rbp
907 addq %rax,%r10
908 movq 8(%rsi,%r9,1),%rax
909 adcq $0,%rdx
910 movq %rdx,%rdi
911
912 mulq %rbx
913 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400914 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800915 adcq $0,%rdx
916 addq 8(%r14),%r11
917 adcq $0,%rdx
918 movq %rdx,%r10
919
920 mulq %rbp
921 addq %rax,%rdi
922 movq 16(%rsi,%r9,1),%rax
923 adcq $0,%rdx
924 addq %r11,%rdi
925 leaq 32(%r9),%r15
David Benjamin4969cc92016-04-22 15:02:23 -0400926 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800927 adcq $0,%rdx
928 movq %rdx,%r13
929 jmp L$inner4x
930
931.p2align 5
932L$inner4x:
933 mulq %rbx
934 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -0400935 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800936 adcq $0,%rdx
937 addq 16(%r14),%r10
938 leaq 32(%r14),%r14
939 adcq $0,%rdx
940 movq %rdx,%r11
941
942 mulq %rbp
943 addq %rax,%r13
944 movq -8(%rsi,%r15,1),%rax
945 adcq $0,%rdx
946 addq %r10,%r13
947 adcq $0,%rdx
948 movq %rdi,-32(%r14)
949 movq %rdx,%rdi
950
951 mulq %rbx
952 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400953 movq -8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800954 adcq $0,%rdx
955 addq -8(%r14),%r11
956 adcq $0,%rdx
957 movq %rdx,%r10
958
959 mulq %rbp
960 addq %rax,%rdi
961 movq (%rsi,%r15,1),%rax
962 adcq $0,%rdx
963 addq %r11,%rdi
964 adcq $0,%rdx
965 movq %r13,-24(%r14)
966 movq %rdx,%r13
967
968 mulq %rbx
969 addq %rax,%r10
970 movq 0(%rcx),%rax
971 adcq $0,%rdx
972 addq (%r14),%r10
973 adcq $0,%rdx
974 movq %rdx,%r11
975
976 mulq %rbp
977 addq %rax,%r13
978 movq 8(%rsi,%r15,1),%rax
979 adcq $0,%rdx
980 addq %r10,%r13
981 adcq $0,%rdx
982 movq %rdi,-16(%r14)
983 movq %rdx,%rdi
984
985 mulq %rbx
986 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400987 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800988 adcq $0,%rdx
989 addq 8(%r14),%r11
990 adcq $0,%rdx
991 movq %rdx,%r10
992
993 mulq %rbp
994 addq %rax,%rdi
995 movq 16(%rsi,%r15,1),%rax
996 adcq $0,%rdx
997 addq %r11,%rdi
David Benjamin4969cc92016-04-22 15:02:23 -0400998 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800999 adcq $0,%rdx
1000 movq %r13,-8(%r14)
1001 movq %rdx,%r13
1002
1003 addq $32,%r15
1004 jnz L$inner4x
1005
1006 mulq %rbx
1007 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -04001008 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001009 adcq $0,%rdx
1010 addq 16(%r14),%r10
1011 leaq 32(%r14),%r14
1012 adcq $0,%rdx
1013 movq %rdx,%r11
1014
1015 mulq %rbp
1016 addq %rax,%r13
1017 movq -8(%rsi),%rax
1018 adcq $0,%rdx
1019 addq %r10,%r13
1020 adcq $0,%rdx
1021 movq %rdi,-32(%r14)
1022 movq %rdx,%rdi
1023
1024 mulq %rbx
1025 addq %rax,%r11
1026 movq %rbp,%rax
David Benjamin4969cc92016-04-22 15:02:23 -04001027 movq -8(%rcx),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001028 adcq $0,%rdx
1029 addq -8(%r14),%r11
1030 adcq $0,%rdx
1031 movq %rdx,%r10
1032
1033 mulq %rbp
1034 addq %rax,%rdi
1035 movq (%rsi,%r9,1),%rax
1036 adcq $0,%rdx
1037 addq %r11,%rdi
1038 adcq $0,%rdx
1039 movq %r13,-24(%r14)
1040 movq %rdx,%r13
1041
Adam Langleyd9e397b2015-01-22 14:27:53 -08001042 movq %rdi,-16(%r14)
David Benjamin4969cc92016-04-22 15:02:23 -04001043 leaq (%rcx,%r9,1),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001044
1045 xorq %rdi,%rdi
1046 addq %r10,%r13
1047 adcq $0,%rdi
1048 addq (%r14),%r13
1049 adcq $0,%rdi
1050 movq %r13,-8(%r14)
1051
1052 cmpq 16+8(%rsp),%r12
1053 jb L$outer4x
David Benjamin4969cc92016-04-22 15:02:23 -04001054 xorq %rax,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001055 subq %r13,%rbp
1056 adcq %r15,%r15
1057 orq %r15,%rdi
David Benjamin4969cc92016-04-22 15:02:23 -04001058 subq %rdi,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001059 leaq (%r14,%r9,1),%rbx
David Benjamin4969cc92016-04-22 15:02:23 -04001060 movq (%rcx),%r12
1061 leaq (%rcx),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001062 movq %r9,%rcx
1063 sarq $3+2,%rcx
1064 movq 56+8(%rsp),%rdi
David Benjamin4969cc92016-04-22 15:02:23 -04001065 decq %r12
1066 xorq %r10,%r10
1067 movq 8(%rbp),%r13
1068 movq 16(%rbp),%r14
1069 movq 24(%rbp),%r15
1070 jmp L$sqr4x_sub_entry
Adam Langleyd9e397b2015-01-22 14:27:53 -08001071
1072.globl _bn_power5
1073.private_extern _bn_power5
1074
1075.p2align 5
1076_bn_power5:
Robert Sloana94fe052017-02-21 08:49:28 -08001077
Adam Langleyd9e397b2015-01-22 14:27:53 -08001078 movq %rsp,%rax
Robert Sloana94fe052017-02-21 08:49:28 -08001079
Adam Langleyd9e397b2015-01-22 14:27:53 -08001080 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -08001081
Adam Langleyd9e397b2015-01-22 14:27:53 -08001082 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -08001083
Adam Langleyd9e397b2015-01-22 14:27:53 -08001084 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -08001085
Adam Langleyd9e397b2015-01-22 14:27:53 -08001086 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -08001087
Adam Langleyd9e397b2015-01-22 14:27:53 -08001088 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -08001089
Adam Langleyd9e397b2015-01-22 14:27:53 -08001090 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -04001091
Robert Sloana94fe052017-02-21 08:49:28 -08001092L$power5_prologue:
1093
Adam Langleyd9e397b2015-01-22 14:27:53 -08001094 shll $3,%r9d
David Benjamin4969cc92016-04-22 15:02:23 -04001095 leal (%r9,%r9,2),%r10d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001096 negq %r9
1097 movq (%r8),%r8
1098
1099
1100
1101
1102
1103
1104
David Benjamin4969cc92016-04-22 15:02:23 -04001105
1106 leaq -320(%rsp,%r9,2),%r11
Robert Sloana94fe052017-02-21 08:49:28 -08001107 movq %rsp,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -04001108 subq %rdi,%r11
Adam Langleyd9e397b2015-01-22 14:27:53 -08001109 andq $4095,%r11
1110 cmpq %r11,%r10
1111 jb L$pwr_sp_alt
Robert Sloana94fe052017-02-21 08:49:28 -08001112 subq %r11,%rbp
1113 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001114 jmp L$pwr_sp_done
1115
1116.p2align 5
1117L$pwr_sp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -04001118 leaq 4096-320(,%r9,2),%r10
Robert Sloana94fe052017-02-21 08:49:28 -08001119 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001120 subq %r10,%r11
1121 movq $0,%r10
1122 cmovcq %r10,%r11
Robert Sloana94fe052017-02-21 08:49:28 -08001123 subq %r11,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001124L$pwr_sp_done:
Robert Sloana94fe052017-02-21 08:49:28 -08001125 andq $-64,%rbp
1126 movq %rsp,%r11
1127 subq %rbp,%r11
1128 andq $-4096,%r11
1129 leaq (%r11,%rbp,1),%rsp
1130 movq (%rsp),%r10
1131 cmpq %rbp,%rsp
1132 ja L$pwr_page_walk
1133 jmp L$pwr_page_walk_done
1134
1135L$pwr_page_walk:
1136 leaq -4096(%rsp),%rsp
1137 movq (%rsp),%r10
1138 cmpq %rbp,%rsp
1139 ja L$pwr_page_walk
1140L$pwr_page_walk_done:
1141
Adam Langleyd9e397b2015-01-22 14:27:53 -08001142 movq %r9,%r10
1143 negq %r9
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154 movq %r8,32(%rsp)
1155 movq %rax,40(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -08001156
Adam Langleyd9e397b2015-01-22 14:27:53 -08001157L$power5_body:
1158.byte 102,72,15,110,207
1159.byte 102,72,15,110,209
1160.byte 102,73,15,110,218
1161.byte 102,72,15,110,226
1162
1163 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001164 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001165 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001166 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001167 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001168 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001169 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001170 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001171 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001172 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001173
1174.byte 102,72,15,126,209
1175.byte 102,72,15,126,226
1176 movq %rsi,%rdi
1177 movq 40(%rsp),%rax
1178 leaq 32(%rsp),%r8
1179
1180 call mul4x_internal
1181
1182 movq 40(%rsp),%rsi
Robert Sloana94fe052017-02-21 08:49:28 -08001183
Adam Langleyd9e397b2015-01-22 14:27:53 -08001184 movq $1,%rax
1185 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -08001186
Adam Langleyd9e397b2015-01-22 14:27:53 -08001187 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -08001188
Adam Langleyd9e397b2015-01-22 14:27:53 -08001189 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -08001190
Adam Langleyd9e397b2015-01-22 14:27:53 -08001191 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -08001192
Adam Langleyd9e397b2015-01-22 14:27:53 -08001193 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -08001194
Adam Langleyd9e397b2015-01-22 14:27:53 -08001195 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -08001196
Adam Langleyd9e397b2015-01-22 14:27:53 -08001197 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -08001198
Adam Langleyd9e397b2015-01-22 14:27:53 -08001199L$power5_epilogue:
1200 .byte 0xf3,0xc3
1201
1202
Robert Sloana94fe052017-02-21 08:49:28 -08001203
Adam Langleyd9e397b2015-01-22 14:27:53 -08001204.globl _bn_sqr8x_internal
1205.private_extern _bn_sqr8x_internal
1206.private_extern _bn_sqr8x_internal
1207
1208.p2align 5
1209_bn_sqr8x_internal:
1210__bn_sqr8x_internal:
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 leaq 32(%r10),%rbp
1285 leaq (%rsi,%r9,1),%rsi
1286
1287 movq %r9,%rcx
1288
1289
1290 movq -32(%rsi,%rbp,1),%r14
1291 leaq 48+8(%rsp,%r9,2),%rdi
1292 movq -24(%rsi,%rbp,1),%rax
1293 leaq -32(%rdi,%rbp,1),%rdi
1294 movq -16(%rsi,%rbp,1),%rbx
1295 movq %rax,%r15
1296
1297 mulq %r14
1298 movq %rax,%r10
1299 movq %rbx,%rax
1300 movq %rdx,%r11
1301 movq %r10,-24(%rdi,%rbp,1)
1302
1303 mulq %r14
1304 addq %rax,%r11
1305 movq %rbx,%rax
1306 adcq $0,%rdx
1307 movq %r11,-16(%rdi,%rbp,1)
1308 movq %rdx,%r10
1309
1310
1311 movq -8(%rsi,%rbp,1),%rbx
1312 mulq %r15
1313 movq %rax,%r12
1314 movq %rbx,%rax
1315 movq %rdx,%r13
1316
1317 leaq (%rbp),%rcx
1318 mulq %r14
1319 addq %rax,%r10
1320 movq %rbx,%rax
1321 movq %rdx,%r11
1322 adcq $0,%r11
1323 addq %r12,%r10
1324 adcq $0,%r11
1325 movq %r10,-8(%rdi,%rcx,1)
1326 jmp L$sqr4x_1st
1327
1328.p2align 5
1329L$sqr4x_1st:
1330 movq (%rsi,%rcx,1),%rbx
1331 mulq %r15
1332 addq %rax,%r13
1333 movq %rbx,%rax
1334 movq %rdx,%r12
1335 adcq $0,%r12
1336
1337 mulq %r14
1338 addq %rax,%r11
1339 movq %rbx,%rax
1340 movq 8(%rsi,%rcx,1),%rbx
1341 movq %rdx,%r10
1342 adcq $0,%r10
1343 addq %r13,%r11
1344 adcq $0,%r10
1345
1346
1347 mulq %r15
1348 addq %rax,%r12
1349 movq %rbx,%rax
1350 movq %r11,(%rdi,%rcx,1)
1351 movq %rdx,%r13
1352 adcq $0,%r13
1353
1354 mulq %r14
1355 addq %rax,%r10
1356 movq %rbx,%rax
1357 movq 16(%rsi,%rcx,1),%rbx
1358 movq %rdx,%r11
1359 adcq $0,%r11
1360 addq %r12,%r10
1361 adcq $0,%r11
1362
1363 mulq %r15
1364 addq %rax,%r13
1365 movq %rbx,%rax
1366 movq %r10,8(%rdi,%rcx,1)
1367 movq %rdx,%r12
1368 adcq $0,%r12
1369
1370 mulq %r14
1371 addq %rax,%r11
1372 movq %rbx,%rax
1373 movq 24(%rsi,%rcx,1),%rbx
1374 movq %rdx,%r10
1375 adcq $0,%r10
1376 addq %r13,%r11
1377 adcq $0,%r10
1378
1379
1380 mulq %r15
1381 addq %rax,%r12
1382 movq %rbx,%rax
1383 movq %r11,16(%rdi,%rcx,1)
1384 movq %rdx,%r13
1385 adcq $0,%r13
1386 leaq 32(%rcx),%rcx
1387
1388 mulq %r14
1389 addq %rax,%r10
1390 movq %rbx,%rax
1391 movq %rdx,%r11
1392 adcq $0,%r11
1393 addq %r12,%r10
1394 adcq $0,%r11
1395 movq %r10,-8(%rdi,%rcx,1)
1396
1397 cmpq $0,%rcx
1398 jne L$sqr4x_1st
1399
1400 mulq %r15
1401 addq %rax,%r13
1402 leaq 16(%rbp),%rbp
1403 adcq $0,%rdx
1404 addq %r11,%r13
1405 adcq $0,%rdx
1406
1407 movq %r13,(%rdi)
1408 movq %rdx,%r12
1409 movq %rdx,8(%rdi)
1410 jmp L$sqr4x_outer
1411
1412.p2align 5
1413L$sqr4x_outer:
1414 movq -32(%rsi,%rbp,1),%r14
1415 leaq 48+8(%rsp,%r9,2),%rdi
1416 movq -24(%rsi,%rbp,1),%rax
1417 leaq -32(%rdi,%rbp,1),%rdi
1418 movq -16(%rsi,%rbp,1),%rbx
1419 movq %rax,%r15
1420
1421 mulq %r14
1422 movq -24(%rdi,%rbp,1),%r10
1423 addq %rax,%r10
1424 movq %rbx,%rax
1425 adcq $0,%rdx
1426 movq %r10,-24(%rdi,%rbp,1)
1427 movq %rdx,%r11
1428
1429 mulq %r14
1430 addq %rax,%r11
1431 movq %rbx,%rax
1432 adcq $0,%rdx
1433 addq -16(%rdi,%rbp,1),%r11
1434 movq %rdx,%r10
1435 adcq $0,%r10
1436 movq %r11,-16(%rdi,%rbp,1)
1437
1438 xorq %r12,%r12
1439
1440 movq -8(%rsi,%rbp,1),%rbx
1441 mulq %r15
1442 addq %rax,%r12
1443 movq %rbx,%rax
1444 adcq $0,%rdx
1445 addq -8(%rdi,%rbp,1),%r12
1446 movq %rdx,%r13
1447 adcq $0,%r13
1448
1449 mulq %r14
1450 addq %rax,%r10
1451 movq %rbx,%rax
1452 adcq $0,%rdx
1453 addq %r12,%r10
1454 movq %rdx,%r11
1455 adcq $0,%r11
1456 movq %r10,-8(%rdi,%rbp,1)
1457
1458 leaq (%rbp),%rcx
1459 jmp L$sqr4x_inner
1460
1461.p2align 5
1462L$sqr4x_inner:
1463 movq (%rsi,%rcx,1),%rbx
1464 mulq %r15
1465 addq %rax,%r13
1466 movq %rbx,%rax
1467 movq %rdx,%r12
1468 adcq $0,%r12
1469 addq (%rdi,%rcx,1),%r13
1470 adcq $0,%r12
1471
1472.byte 0x67
1473 mulq %r14
1474 addq %rax,%r11
1475 movq %rbx,%rax
1476 movq 8(%rsi,%rcx,1),%rbx
1477 movq %rdx,%r10
1478 adcq $0,%r10
1479 addq %r13,%r11
1480 adcq $0,%r10
1481
1482 mulq %r15
1483 addq %rax,%r12
1484 movq %r11,(%rdi,%rcx,1)
1485 movq %rbx,%rax
1486 movq %rdx,%r13
1487 adcq $0,%r13
1488 addq 8(%rdi,%rcx,1),%r12
1489 leaq 16(%rcx),%rcx
1490 adcq $0,%r13
1491
1492 mulq %r14
1493 addq %rax,%r10
1494 movq %rbx,%rax
1495 adcq $0,%rdx
1496 addq %r12,%r10
1497 movq %rdx,%r11
1498 adcq $0,%r11
1499 movq %r10,-8(%rdi,%rcx,1)
1500
1501 cmpq $0,%rcx
1502 jne L$sqr4x_inner
1503
1504.byte 0x67
1505 mulq %r15
1506 addq %rax,%r13
1507 adcq $0,%rdx
1508 addq %r11,%r13
1509 adcq $0,%rdx
1510
1511 movq %r13,(%rdi)
1512 movq %rdx,%r12
1513 movq %rdx,8(%rdi)
1514
1515 addq $16,%rbp
1516 jnz L$sqr4x_outer
1517
1518
1519 movq -32(%rsi),%r14
1520 leaq 48+8(%rsp,%r9,2),%rdi
1521 movq -24(%rsi),%rax
1522 leaq -32(%rdi,%rbp,1),%rdi
1523 movq -16(%rsi),%rbx
1524 movq %rax,%r15
1525
1526 mulq %r14
1527 addq %rax,%r10
1528 movq %rbx,%rax
1529 movq %rdx,%r11
1530 adcq $0,%r11
1531
1532 mulq %r14
1533 addq %rax,%r11
1534 movq %rbx,%rax
1535 movq %r10,-24(%rdi)
1536 movq %rdx,%r10
1537 adcq $0,%r10
1538 addq %r13,%r11
1539 movq -8(%rsi),%rbx
1540 adcq $0,%r10
1541
1542 mulq %r15
1543 addq %rax,%r12
1544 movq %rbx,%rax
1545 movq %r11,-16(%rdi)
1546 movq %rdx,%r13
1547 adcq $0,%r13
1548
1549 mulq %r14
1550 addq %rax,%r10
1551 movq %rbx,%rax
1552 movq %rdx,%r11
1553 adcq $0,%r11
1554 addq %r12,%r10
1555 adcq $0,%r11
1556 movq %r10,-8(%rdi)
1557
1558 mulq %r15
1559 addq %rax,%r13
1560 movq -16(%rsi),%rax
1561 adcq $0,%rdx
1562 addq %r11,%r13
1563 adcq $0,%rdx
1564
1565 movq %r13,(%rdi)
1566 movq %rdx,%r12
1567 movq %rdx,8(%rdi)
1568
1569 mulq %rbx
1570 addq $16,%rbp
1571 xorq %r14,%r14
1572 subq %r9,%rbp
1573 xorq %r15,%r15
1574
1575 addq %r12,%rax
1576 adcq $0,%rdx
1577 movq %rax,8(%rdi)
1578 movq %rdx,16(%rdi)
1579 movq %r15,24(%rdi)
1580
1581 movq -16(%rsi,%rbp,1),%rax
1582 leaq 48+8(%rsp),%rdi
1583 xorq %r10,%r10
1584 movq 8(%rdi),%r11
1585
1586 leaq (%r14,%r10,2),%r12
1587 shrq $63,%r10
1588 leaq (%rcx,%r11,2),%r13
1589 shrq $63,%r11
1590 orq %r10,%r13
1591 movq 16(%rdi),%r10
1592 movq %r11,%r14
1593 mulq %rax
1594 negq %r15
1595 movq 24(%rdi),%r11
1596 adcq %rax,%r12
1597 movq -8(%rsi,%rbp,1),%rax
1598 movq %r12,(%rdi)
1599 adcq %rdx,%r13
1600
1601 leaq (%r14,%r10,2),%rbx
1602 movq %r13,8(%rdi)
1603 sbbq %r15,%r15
1604 shrq $63,%r10
1605 leaq (%rcx,%r11,2),%r8
1606 shrq $63,%r11
1607 orq %r10,%r8
1608 movq 32(%rdi),%r10
1609 movq %r11,%r14
1610 mulq %rax
1611 negq %r15
1612 movq 40(%rdi),%r11
1613 adcq %rax,%rbx
1614 movq 0(%rsi,%rbp,1),%rax
1615 movq %rbx,16(%rdi)
1616 adcq %rdx,%r8
1617 leaq 16(%rbp),%rbp
1618 movq %r8,24(%rdi)
1619 sbbq %r15,%r15
1620 leaq 64(%rdi),%rdi
1621 jmp L$sqr4x_shift_n_add
1622
1623.p2align 5
1624L$sqr4x_shift_n_add:
1625 leaq (%r14,%r10,2),%r12
1626 shrq $63,%r10
1627 leaq (%rcx,%r11,2),%r13
1628 shrq $63,%r11
1629 orq %r10,%r13
1630 movq -16(%rdi),%r10
1631 movq %r11,%r14
1632 mulq %rax
1633 negq %r15
1634 movq -8(%rdi),%r11
1635 adcq %rax,%r12
1636 movq -8(%rsi,%rbp,1),%rax
1637 movq %r12,-32(%rdi)
1638 adcq %rdx,%r13
1639
1640 leaq (%r14,%r10,2),%rbx
1641 movq %r13,-24(%rdi)
1642 sbbq %r15,%r15
1643 shrq $63,%r10
1644 leaq (%rcx,%r11,2),%r8
1645 shrq $63,%r11
1646 orq %r10,%r8
1647 movq 0(%rdi),%r10
1648 movq %r11,%r14
1649 mulq %rax
1650 negq %r15
1651 movq 8(%rdi),%r11
1652 adcq %rax,%rbx
1653 movq 0(%rsi,%rbp,1),%rax
1654 movq %rbx,-16(%rdi)
1655 adcq %rdx,%r8
1656
1657 leaq (%r14,%r10,2),%r12
1658 movq %r8,-8(%rdi)
1659 sbbq %r15,%r15
1660 shrq $63,%r10
1661 leaq (%rcx,%r11,2),%r13
1662 shrq $63,%r11
1663 orq %r10,%r13
1664 movq 16(%rdi),%r10
1665 movq %r11,%r14
1666 mulq %rax
1667 negq %r15
1668 movq 24(%rdi),%r11
1669 adcq %rax,%r12
1670 movq 8(%rsi,%rbp,1),%rax
1671 movq %r12,0(%rdi)
1672 adcq %rdx,%r13
1673
1674 leaq (%r14,%r10,2),%rbx
1675 movq %r13,8(%rdi)
1676 sbbq %r15,%r15
1677 shrq $63,%r10
1678 leaq (%rcx,%r11,2),%r8
1679 shrq $63,%r11
1680 orq %r10,%r8
1681 movq 32(%rdi),%r10
1682 movq %r11,%r14
1683 mulq %rax
1684 negq %r15
1685 movq 40(%rdi),%r11
1686 adcq %rax,%rbx
1687 movq 16(%rsi,%rbp,1),%rax
1688 movq %rbx,16(%rdi)
1689 adcq %rdx,%r8
1690 movq %r8,24(%rdi)
1691 sbbq %r15,%r15
1692 leaq 64(%rdi),%rdi
1693 addq $32,%rbp
1694 jnz L$sqr4x_shift_n_add
1695
1696 leaq (%r14,%r10,2),%r12
1697.byte 0x67
1698 shrq $63,%r10
1699 leaq (%rcx,%r11,2),%r13
1700 shrq $63,%r11
1701 orq %r10,%r13
1702 movq -16(%rdi),%r10
1703 movq %r11,%r14
1704 mulq %rax
1705 negq %r15
1706 movq -8(%rdi),%r11
1707 adcq %rax,%r12
1708 movq -8(%rsi),%rax
1709 movq %r12,-32(%rdi)
1710 adcq %rdx,%r13
1711
1712 leaq (%r14,%r10,2),%rbx
1713 movq %r13,-24(%rdi)
1714 sbbq %r15,%r15
1715 shrq $63,%r10
1716 leaq (%rcx,%r11,2),%r8
1717 shrq $63,%r11
1718 orq %r10,%r8
1719 mulq %rax
1720 negq %r15
1721 adcq %rax,%rbx
1722 adcq %rdx,%r8
1723 movq %rbx,-16(%rdi)
1724 movq %r8,-8(%rdi)
1725.byte 102,72,15,126,213
David Benjamin4969cc92016-04-22 15:02:23 -04001726__bn_sqr8x_reduction:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001727 xorq %rax,%rax
David Benjamin4969cc92016-04-22 15:02:23 -04001728 leaq (%r9,%rbp,1),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001729 leaq 48+8(%rsp,%r9,2),%rdx
1730 movq %rcx,0+8(%rsp)
1731 leaq 48+8(%rsp,%r9,1),%rdi
1732 movq %rdx,8+8(%rsp)
1733 negq %r9
1734 jmp L$8x_reduction_loop
1735
1736.p2align 5
1737L$8x_reduction_loop:
1738 leaq (%rdi,%r9,1),%rdi
1739.byte 0x66
1740 movq 0(%rdi),%rbx
1741 movq 8(%rdi),%r9
1742 movq 16(%rdi),%r10
1743 movq 24(%rdi),%r11
1744 movq 32(%rdi),%r12
1745 movq 40(%rdi),%r13
1746 movq 48(%rdi),%r14
1747 movq 56(%rdi),%r15
1748 movq %rax,(%rdx)
1749 leaq 64(%rdi),%rdi
1750
1751.byte 0x67
1752 movq %rbx,%r8
1753 imulq 32+8(%rsp),%rbx
1754 movq 0(%rbp),%rax
1755 movl $8,%ecx
1756 jmp L$8x_reduce
1757
1758.p2align 5
1759L$8x_reduce:
1760 mulq %rbx
David Benjamin4969cc92016-04-22 15:02:23 -04001761 movq 8(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001762 negq %r8
1763 movq %rdx,%r8
1764 adcq $0,%r8
1765
1766 mulq %rbx
1767 addq %rax,%r9
David Benjamin4969cc92016-04-22 15:02:23 -04001768 movq 16(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001769 adcq $0,%rdx
1770 addq %r9,%r8
1771 movq %rbx,48-8+8(%rsp,%rcx,8)
1772 movq %rdx,%r9
1773 adcq $0,%r9
1774
1775 mulq %rbx
1776 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -04001777 movq 24(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001778 adcq $0,%rdx
1779 addq %r10,%r9
1780 movq 32+8(%rsp),%rsi
1781 movq %rdx,%r10
1782 adcq $0,%r10
1783
1784 mulq %rbx
1785 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -04001786 movq 32(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001787 adcq $0,%rdx
1788 imulq %r8,%rsi
1789 addq %r11,%r10
1790 movq %rdx,%r11
1791 adcq $0,%r11
1792
1793 mulq %rbx
1794 addq %rax,%r12
David Benjamin4969cc92016-04-22 15:02:23 -04001795 movq 40(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001796 adcq $0,%rdx
1797 addq %r12,%r11
1798 movq %rdx,%r12
1799 adcq $0,%r12
1800
1801 mulq %rbx
1802 addq %rax,%r13
David Benjamin4969cc92016-04-22 15:02:23 -04001803 movq 48(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001804 adcq $0,%rdx
1805 addq %r13,%r12
1806 movq %rdx,%r13
1807 adcq $0,%r13
1808
1809 mulq %rbx
1810 addq %rax,%r14
David Benjamin4969cc92016-04-22 15:02:23 -04001811 movq 56(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001812 adcq $0,%rdx
1813 addq %r14,%r13
1814 movq %rdx,%r14
1815 adcq $0,%r14
1816
1817 mulq %rbx
1818 movq %rsi,%rbx
1819 addq %rax,%r15
1820 movq 0(%rbp),%rax
1821 adcq $0,%rdx
1822 addq %r15,%r14
1823 movq %rdx,%r15
1824 adcq $0,%r15
1825
1826 decl %ecx
1827 jnz L$8x_reduce
1828
David Benjamin4969cc92016-04-22 15:02:23 -04001829 leaq 64(%rbp),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001830 xorq %rax,%rax
1831 movq 8+8(%rsp),%rdx
1832 cmpq 0+8(%rsp),%rbp
1833 jae L$8x_no_tail
1834
1835.byte 0x66
1836 addq 0(%rdi),%r8
1837 adcq 8(%rdi),%r9
1838 adcq 16(%rdi),%r10
1839 adcq 24(%rdi),%r11
1840 adcq 32(%rdi),%r12
1841 adcq 40(%rdi),%r13
1842 adcq 48(%rdi),%r14
1843 adcq 56(%rdi),%r15
1844 sbbq %rsi,%rsi
1845
1846 movq 48+56+8(%rsp),%rbx
1847 movl $8,%ecx
1848 movq 0(%rbp),%rax
1849 jmp L$8x_tail
1850
1851.p2align 5
1852L$8x_tail:
1853 mulq %rbx
1854 addq %rax,%r8
David Benjamin4969cc92016-04-22 15:02:23 -04001855 movq 8(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001856 movq %r8,(%rdi)
1857 movq %rdx,%r8
1858 adcq $0,%r8
1859
1860 mulq %rbx
1861 addq %rax,%r9
David Benjamin4969cc92016-04-22 15:02:23 -04001862 movq 16(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001863 adcq $0,%rdx
1864 addq %r9,%r8
1865 leaq 8(%rdi),%rdi
1866 movq %rdx,%r9
1867 adcq $0,%r9
1868
1869 mulq %rbx
1870 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -04001871 movq 24(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001872 adcq $0,%rdx
1873 addq %r10,%r9
1874 movq %rdx,%r10
1875 adcq $0,%r10
1876
1877 mulq %rbx
1878 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -04001879 movq 32(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001880 adcq $0,%rdx
1881 addq %r11,%r10
1882 movq %rdx,%r11
1883 adcq $0,%r11
1884
1885 mulq %rbx
1886 addq %rax,%r12
David Benjamin4969cc92016-04-22 15:02:23 -04001887 movq 40(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001888 adcq $0,%rdx
1889 addq %r12,%r11
1890 movq %rdx,%r12
1891 adcq $0,%r12
1892
1893 mulq %rbx
1894 addq %rax,%r13
David Benjamin4969cc92016-04-22 15:02:23 -04001895 movq 48(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001896 adcq $0,%rdx
1897 addq %r13,%r12
1898 movq %rdx,%r13
1899 adcq $0,%r13
1900
1901 mulq %rbx
1902 addq %rax,%r14
David Benjamin4969cc92016-04-22 15:02:23 -04001903 movq 56(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001904 adcq $0,%rdx
1905 addq %r14,%r13
1906 movq %rdx,%r14
1907 adcq $0,%r14
1908
1909 mulq %rbx
1910 movq 48-16+8(%rsp,%rcx,8),%rbx
1911 addq %rax,%r15
1912 adcq $0,%rdx
1913 addq %r15,%r14
1914 movq 0(%rbp),%rax
1915 movq %rdx,%r15
1916 adcq $0,%r15
1917
1918 decl %ecx
1919 jnz L$8x_tail
1920
David Benjamin4969cc92016-04-22 15:02:23 -04001921 leaq 64(%rbp),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001922 movq 8+8(%rsp),%rdx
1923 cmpq 0+8(%rsp),%rbp
1924 jae L$8x_tail_done
1925
1926 movq 48+56+8(%rsp),%rbx
1927 negq %rsi
1928 movq 0(%rbp),%rax
1929 adcq 0(%rdi),%r8
1930 adcq 8(%rdi),%r9
1931 adcq 16(%rdi),%r10
1932 adcq 24(%rdi),%r11
1933 adcq 32(%rdi),%r12
1934 adcq 40(%rdi),%r13
1935 adcq 48(%rdi),%r14
1936 adcq 56(%rdi),%r15
1937 sbbq %rsi,%rsi
1938
1939 movl $8,%ecx
1940 jmp L$8x_tail
1941
1942.p2align 5
1943L$8x_tail_done:
Robert Sloan4d1ac502017-02-06 08:36:14 -08001944 xorq %rax,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001945 addq (%rdx),%r8
Adam Langley4139edb2016-01-13 15:00:54 -08001946 adcq $0,%r9
1947 adcq $0,%r10
1948 adcq $0,%r11
1949 adcq $0,%r12
1950 adcq $0,%r13
1951 adcq $0,%r14
1952 adcq $0,%r15
Robert Sloan4d1ac502017-02-06 08:36:14 -08001953 adcq $0,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001954
1955 negq %rsi
1956L$8x_no_tail:
1957 adcq 0(%rdi),%r8
1958 adcq 8(%rdi),%r9
1959 adcq 16(%rdi),%r10
1960 adcq 24(%rdi),%r11
1961 adcq 32(%rdi),%r12
1962 adcq 40(%rdi),%r13
1963 adcq 48(%rdi),%r14
1964 adcq 56(%rdi),%r15
1965 adcq $0,%rax
David Benjamin4969cc92016-04-22 15:02:23 -04001966 movq -8(%rbp),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001967 xorq %rsi,%rsi
1968
1969.byte 102,72,15,126,213
1970
1971 movq %r8,0(%rdi)
1972 movq %r9,8(%rdi)
1973.byte 102,73,15,126,217
1974 movq %r10,16(%rdi)
1975 movq %r11,24(%rdi)
1976 movq %r12,32(%rdi)
1977 movq %r13,40(%rdi)
1978 movq %r14,48(%rdi)
1979 movq %r15,56(%rdi)
1980 leaq 64(%rdi),%rdi
1981
1982 cmpq %rdx,%rdi
1983 jb L$8x_reduction_loop
David Benjamin4969cc92016-04-22 15:02:23 -04001984 .byte 0xf3,0xc3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001985
Adam Langleyd9e397b2015-01-22 14:27:53 -08001986
1987.p2align 5
David Benjamin4969cc92016-04-22 15:02:23 -04001988__bn_post4x_internal:
1989 movq 0(%rbp),%r12
1990 leaq (%rdi,%r9,1),%rbx
1991 movq %r9,%rcx
1992.byte 102,72,15,126,207
1993 negq %rax
1994.byte 102,72,15,126,206
1995 sarq $3+2,%rcx
1996 decq %r12
1997 xorq %r10,%r10
1998 movq 8(%rbp),%r13
1999 movq 16(%rbp),%r14
2000 movq 24(%rbp),%r15
2001 jmp L$sqr4x_sub_entry
2002
2003.p2align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -08002004L$sqr4x_sub:
David Benjamin4969cc92016-04-22 15:02:23 -04002005 movq 0(%rbp),%r12
2006 movq 8(%rbp),%r13
2007 movq 16(%rbp),%r14
2008 movq 24(%rbp),%r15
2009L$sqr4x_sub_entry:
2010 leaq 32(%rbp),%rbp
2011 notq %r12
2012 notq %r13
2013 notq %r14
2014 notq %r15
2015 andq %rax,%r12
2016 andq %rax,%r13
2017 andq %rax,%r14
2018 andq %rax,%r15
2019
2020 negq %r10
2021 adcq 0(%rbx),%r12
2022 adcq 8(%rbx),%r13
2023 adcq 16(%rbx),%r14
2024 adcq 24(%rbx),%r15
Adam Langleyd9e397b2015-01-22 14:27:53 -08002025 movq %r12,0(%rdi)
David Benjamin4969cc92016-04-22 15:02:23 -04002026 leaq 32(%rbx),%rbx
Adam Langleyd9e397b2015-01-22 14:27:53 -08002027 movq %r13,8(%rdi)
David Benjamin4969cc92016-04-22 15:02:23 -04002028 sbbq %r10,%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -08002029 movq %r14,16(%rdi)
2030 movq %r15,24(%rdi)
2031 leaq 32(%rdi),%rdi
2032
2033 incq %rcx
2034 jnz L$sqr4x_sub
David Benjamin4969cc92016-04-22 15:02:23 -04002035
Adam Langleyd9e397b2015-01-22 14:27:53 -08002036 movq %r9,%r10
2037 negq %r9
2038 .byte 0xf3,0xc3
2039
2040.globl _bn_from_montgomery
2041.private_extern _bn_from_montgomery
2042
2043.p2align 5
2044_bn_from_montgomery:
2045 testl $7,%r9d
2046 jz bn_from_mont8x
2047 xorl %eax,%eax
2048 .byte 0xf3,0xc3
2049
2050
2051
2052.p2align 5
2053bn_from_mont8x:
Robert Sloana94fe052017-02-21 08:49:28 -08002054
Adam Langleyd9e397b2015-01-22 14:27:53 -08002055.byte 0x67
2056 movq %rsp,%rax
Robert Sloana94fe052017-02-21 08:49:28 -08002057
Adam Langleyd9e397b2015-01-22 14:27:53 -08002058 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -08002059
Adam Langleyd9e397b2015-01-22 14:27:53 -08002060 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -08002061
Adam Langleyd9e397b2015-01-22 14:27:53 -08002062 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -08002063
Adam Langleyd9e397b2015-01-22 14:27:53 -08002064 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -08002065
Adam Langleyd9e397b2015-01-22 14:27:53 -08002066 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -08002067
Adam Langleyd9e397b2015-01-22 14:27:53 -08002068 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -04002069
Robert Sloana94fe052017-02-21 08:49:28 -08002070L$from_prologue:
2071
Adam Langleyd9e397b2015-01-22 14:27:53 -08002072 shll $3,%r9d
David Benjamin4969cc92016-04-22 15:02:23 -04002073 leaq (%r9,%r9,2),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -08002074 negq %r9
2075 movq (%r8),%r8
2076
2077
2078
2079
2080
2081
2082
David Benjamin4969cc92016-04-22 15:02:23 -04002083
2084 leaq -320(%rsp,%r9,2),%r11
Robert Sloana94fe052017-02-21 08:49:28 -08002085 movq %rsp,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -04002086 subq %rdi,%r11
Adam Langleyd9e397b2015-01-22 14:27:53 -08002087 andq $4095,%r11
2088 cmpq %r11,%r10
2089 jb L$from_sp_alt
Robert Sloana94fe052017-02-21 08:49:28 -08002090 subq %r11,%rbp
2091 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002092 jmp L$from_sp_done
2093
2094.p2align 5
2095L$from_sp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -04002096 leaq 4096-320(,%r9,2),%r10
Robert Sloana94fe052017-02-21 08:49:28 -08002097 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002098 subq %r10,%r11
2099 movq $0,%r10
2100 cmovcq %r10,%r11
Robert Sloana94fe052017-02-21 08:49:28 -08002101 subq %r11,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002102L$from_sp_done:
Robert Sloana94fe052017-02-21 08:49:28 -08002103 andq $-64,%rbp
2104 movq %rsp,%r11
2105 subq %rbp,%r11
2106 andq $-4096,%r11
2107 leaq (%r11,%rbp,1),%rsp
2108 movq (%rsp),%r10
2109 cmpq %rbp,%rsp
2110 ja L$from_page_walk
2111 jmp L$from_page_walk_done
2112
2113L$from_page_walk:
2114 leaq -4096(%rsp),%rsp
2115 movq (%rsp),%r10
2116 cmpq %rbp,%rsp
2117 ja L$from_page_walk
2118L$from_page_walk_done:
2119
Adam Langleyd9e397b2015-01-22 14:27:53 -08002120 movq %r9,%r10
2121 negq %r9
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 movq %r8,32(%rsp)
2133 movq %rax,40(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -08002134
Adam Langleyd9e397b2015-01-22 14:27:53 -08002135L$from_body:
2136 movq %r9,%r11
2137 leaq 48(%rsp),%rax
2138 pxor %xmm0,%xmm0
2139 jmp L$mul_by_1
2140
2141.p2align 5
2142L$mul_by_1:
2143 movdqu (%rsi),%xmm1
2144 movdqu 16(%rsi),%xmm2
2145 movdqu 32(%rsi),%xmm3
2146 movdqa %xmm0,(%rax,%r9,1)
2147 movdqu 48(%rsi),%xmm4
2148 movdqa %xmm0,16(%rax,%r9,1)
2149.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2150 movdqa %xmm1,(%rax)
2151 movdqa %xmm0,32(%rax,%r9,1)
2152 movdqa %xmm2,16(%rax)
2153 movdqa %xmm0,48(%rax,%r9,1)
2154 movdqa %xmm3,32(%rax)
2155 movdqa %xmm4,48(%rax)
2156 leaq 64(%rax),%rax
2157 subq $64,%r11
2158 jnz L$mul_by_1
2159
2160.byte 102,72,15,110,207
2161.byte 102,72,15,110,209
2162.byte 0x67
2163 movq %rcx,%rbp
2164.byte 102,73,15,110,218
David Benjamin4969cc92016-04-22 15:02:23 -04002165 call __bn_sqr8x_reduction
2166 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08002167
2168 pxor %xmm0,%xmm0
2169 leaq 48(%rsp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08002170 jmp L$from_mont_zero
2171
2172.p2align 5
2173L$from_mont_zero:
Robert Sloana94fe052017-02-21 08:49:28 -08002174 movq 40(%rsp),%rsi
2175
Adam Langleyd9e397b2015-01-22 14:27:53 -08002176 movdqa %xmm0,0(%rax)
2177 movdqa %xmm0,16(%rax)
2178 movdqa %xmm0,32(%rax)
2179 movdqa %xmm0,48(%rax)
2180 leaq 64(%rax),%rax
2181 subq $32,%r9
2182 jnz L$from_mont_zero
2183
2184 movq $1,%rax
2185 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -08002186
Adam Langleyd9e397b2015-01-22 14:27:53 -08002187 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -08002188
Adam Langleyd9e397b2015-01-22 14:27:53 -08002189 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -08002190
Adam Langleyd9e397b2015-01-22 14:27:53 -08002191 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -08002192
Adam Langleyd9e397b2015-01-22 14:27:53 -08002193 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -08002194
Adam Langleyd9e397b2015-01-22 14:27:53 -08002195 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -08002196
Adam Langleyd9e397b2015-01-22 14:27:53 -08002197 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -08002198
Adam Langleyd9e397b2015-01-22 14:27:53 -08002199L$from_epilogue:
2200 .byte 0xf3,0xc3
2201
Robert Sloana94fe052017-02-21 08:49:28 -08002202
Adam Langleyd9e397b2015-01-22 14:27:53 -08002203.globl _bn_scatter5
2204.private_extern _bn_scatter5
2205
2206.p2align 4
2207_bn_scatter5:
2208 cmpl $0,%esi
2209 jz L$scatter_epilogue
2210 leaq (%rdx,%rcx,8),%rdx
2211L$scatter:
2212 movq (%rdi),%rax
2213 leaq 8(%rdi),%rdi
2214 movq %rax,(%rdx)
2215 leaq 256(%rdx),%rdx
2216 subl $1,%esi
2217 jnz L$scatter
2218L$scatter_epilogue:
2219 .byte 0xf3,0xc3
2220
2221
2222.globl _bn_gather5
2223.private_extern _bn_gather5
2224
David Benjamin4969cc92016-04-22 15:02:23 -04002225.p2align 5
Adam Langleyd9e397b2015-01-22 14:27:53 -08002226_bn_gather5:
David Benjamin4969cc92016-04-22 15:02:23 -04002227L$SEH_begin_bn_gather5:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002228
David Benjamin4969cc92016-04-22 15:02:23 -04002229.byte 0x4c,0x8d,0x14,0x24
2230.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
2231 leaq L$inc(%rip),%rax
2232 andq $-16,%rsp
2233
2234 movd %ecx,%xmm5
2235 movdqa 0(%rax),%xmm0
2236 movdqa 16(%rax),%xmm1
2237 leaq 128(%rdx),%r11
2238 leaq 128(%rsp),%rax
2239
2240 pshufd $0,%xmm5,%xmm5
2241 movdqa %xmm1,%xmm4
2242 movdqa %xmm1,%xmm2
2243 paddd %xmm0,%xmm1
2244 pcmpeqd %xmm5,%xmm0
2245 movdqa %xmm4,%xmm3
2246
2247 paddd %xmm1,%xmm2
2248 pcmpeqd %xmm5,%xmm1
2249 movdqa %xmm0,-128(%rax)
2250 movdqa %xmm4,%xmm0
2251
2252 paddd %xmm2,%xmm3
2253 pcmpeqd %xmm5,%xmm2
2254 movdqa %xmm1,-112(%rax)
2255 movdqa %xmm4,%xmm1
2256
2257 paddd %xmm3,%xmm0
2258 pcmpeqd %xmm5,%xmm3
2259 movdqa %xmm2,-96(%rax)
2260 movdqa %xmm4,%xmm2
2261 paddd %xmm0,%xmm1
2262 pcmpeqd %xmm5,%xmm0
2263 movdqa %xmm3,-80(%rax)
2264 movdqa %xmm4,%xmm3
2265
2266 paddd %xmm1,%xmm2
2267 pcmpeqd %xmm5,%xmm1
2268 movdqa %xmm0,-64(%rax)
2269 movdqa %xmm4,%xmm0
2270
2271 paddd %xmm2,%xmm3
2272 pcmpeqd %xmm5,%xmm2
2273 movdqa %xmm1,-48(%rax)
2274 movdqa %xmm4,%xmm1
2275
2276 paddd %xmm3,%xmm0
2277 pcmpeqd %xmm5,%xmm3
2278 movdqa %xmm2,-32(%rax)
2279 movdqa %xmm4,%xmm2
2280 paddd %xmm0,%xmm1
2281 pcmpeqd %xmm5,%xmm0
2282 movdqa %xmm3,-16(%rax)
2283 movdqa %xmm4,%xmm3
2284
2285 paddd %xmm1,%xmm2
2286 pcmpeqd %xmm5,%xmm1
2287 movdqa %xmm0,0(%rax)
2288 movdqa %xmm4,%xmm0
2289
2290 paddd %xmm2,%xmm3
2291 pcmpeqd %xmm5,%xmm2
2292 movdqa %xmm1,16(%rax)
2293 movdqa %xmm4,%xmm1
2294
2295 paddd %xmm3,%xmm0
2296 pcmpeqd %xmm5,%xmm3
2297 movdqa %xmm2,32(%rax)
2298 movdqa %xmm4,%xmm2
2299 paddd %xmm0,%xmm1
2300 pcmpeqd %xmm5,%xmm0
2301 movdqa %xmm3,48(%rax)
2302 movdqa %xmm4,%xmm3
2303
2304 paddd %xmm1,%xmm2
2305 pcmpeqd %xmm5,%xmm1
2306 movdqa %xmm0,64(%rax)
2307 movdqa %xmm4,%xmm0
2308
2309 paddd %xmm2,%xmm3
2310 pcmpeqd %xmm5,%xmm2
2311 movdqa %xmm1,80(%rax)
2312 movdqa %xmm4,%xmm1
2313
2314 paddd %xmm3,%xmm0
2315 pcmpeqd %xmm5,%xmm3
2316 movdqa %xmm2,96(%rax)
2317 movdqa %xmm4,%xmm2
2318 movdqa %xmm3,112(%rax)
2319 jmp L$gather
2320
2321.p2align 5
2322L$gather:
2323 pxor %xmm4,%xmm4
2324 pxor %xmm5,%xmm5
2325 movdqa -128(%r11),%xmm0
2326 movdqa -112(%r11),%xmm1
2327 movdqa -96(%r11),%xmm2
2328 pand -128(%rax),%xmm0
2329 movdqa -80(%r11),%xmm3
2330 pand -112(%rax),%xmm1
2331 por %xmm0,%xmm4
2332 pand -96(%rax),%xmm2
2333 por %xmm1,%xmm5
2334 pand -80(%rax),%xmm3
2335 por %xmm2,%xmm4
2336 por %xmm3,%xmm5
2337 movdqa -64(%r11),%xmm0
2338 movdqa -48(%r11),%xmm1
2339 movdqa -32(%r11),%xmm2
2340 pand -64(%rax),%xmm0
2341 movdqa -16(%r11),%xmm3
2342 pand -48(%rax),%xmm1
2343 por %xmm0,%xmm4
2344 pand -32(%rax),%xmm2
2345 por %xmm1,%xmm5
2346 pand -16(%rax),%xmm3
2347 por %xmm2,%xmm4
2348 por %xmm3,%xmm5
2349 movdqa 0(%r11),%xmm0
2350 movdqa 16(%r11),%xmm1
2351 movdqa 32(%r11),%xmm2
2352 pand 0(%rax),%xmm0
2353 movdqa 48(%r11),%xmm3
2354 pand 16(%rax),%xmm1
2355 por %xmm0,%xmm4
2356 pand 32(%rax),%xmm2
2357 por %xmm1,%xmm5
2358 pand 48(%rax),%xmm3
2359 por %xmm2,%xmm4
2360 por %xmm3,%xmm5
2361 movdqa 64(%r11),%xmm0
2362 movdqa 80(%r11),%xmm1
2363 movdqa 96(%r11),%xmm2
2364 pand 64(%rax),%xmm0
2365 movdqa 112(%r11),%xmm3
2366 pand 80(%rax),%xmm1
2367 por %xmm0,%xmm4
2368 pand 96(%rax),%xmm2
2369 por %xmm1,%xmm5
2370 pand 112(%rax),%xmm3
2371 por %xmm2,%xmm4
2372 por %xmm3,%xmm5
2373 por %xmm5,%xmm4
2374 leaq 256(%r11),%r11
2375 pshufd $0x4e,%xmm4,%xmm0
2376 por %xmm4,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002377 movq %xmm0,(%rdi)
2378 leaq 8(%rdi),%rdi
2379 subl $1,%esi
2380 jnz L$gather
David Benjamin4969cc92016-04-22 15:02:23 -04002381
2382 leaq (%r10),%rsp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002383 .byte 0xf3,0xc3
2384L$SEH_end_bn_gather5:
2385
2386.p2align 6
David Benjamin4969cc92016-04-22 15:02:23 -04002387L$inc:
2388.long 0,0, 1,1
2389.long 2,2, 2,2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002390.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2391#endif