blob: abc65f11926e3f367548fe873462267ff1e54eb1 [file] [log] [blame]
Robert Sloan6f79a502017-04-03 09:16:40 -07001#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
Adam Langleyd9e397b2015-01-22 14:27:53 -08002.text
3
4
5
6.globl _bn_mul_mont_gather5
7.private_extern _bn_mul_mont_gather5
8
9.p2align 6
10_bn_mul_mont_gather5:
Robert Sloana94fe052017-02-21 08:49:28 -080011
12 movl %r9d,%r9d
13 movq %rsp,%rax
14
Adam Langleyd9e397b2015-01-22 14:27:53 -080015 testl $7,%r9d
16 jnz L$mul_enter
17 jmp L$mul4x_enter
18
19.p2align 4
20L$mul_enter:
David Benjamin4969cc92016-04-22 15:02:23 -040021 movd 8(%rsp),%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -080022 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -080023
Adam Langleyd9e397b2015-01-22 14:27:53 -080024 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -080025
Adam Langleyd9e397b2015-01-22 14:27:53 -080026 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -080027
Adam Langleyd9e397b2015-01-22 14:27:53 -080028 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -080029
Adam Langleyd9e397b2015-01-22 14:27:53 -080030 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -080031
Adam Langleyd9e397b2015-01-22 14:27:53 -080032 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -040033
Adam Langleyd9e397b2015-01-22 14:27:53 -080034
Robert Sloana94fe052017-02-21 08:49:28 -080035 negq %r9
36 movq %rsp,%r11
37 leaq -280(%rsp,%r9,8),%r10
38 negq %r9
39 andq $-1024,%r10
40
41
42
43
44
45
46
47
48
49 subq %r10,%r11
50 andq $-4096,%r11
51 leaq (%r10,%r11,1),%rsp
52 movq (%rsp),%r11
53 cmpq %r10,%rsp
54 ja L$mul_page_walk
55 jmp L$mul_page_walk_done
56
57L$mul_page_walk:
58 leaq -4096(%rsp),%rsp
59 movq (%rsp),%r11
60 cmpq %r10,%rsp
61 ja L$mul_page_walk
62L$mul_page_walk_done:
63
64 leaq L$inc(%rip),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -080065 movq %rax,8(%rsp,%r9,8)
Robert Sloana94fe052017-02-21 08:49:28 -080066
Adam Langleyd9e397b2015-01-22 14:27:53 -080067L$mul_body:
Robert Sloana94fe052017-02-21 08:49:28 -080068
David Benjamin4969cc92016-04-22 15:02:23 -040069 leaq 128(%rdx),%r12
70 movdqa 0(%r10),%xmm0
71 movdqa 16(%r10),%xmm1
72 leaq 24-112(%rsp,%r9,8),%r10
73 andq $-16,%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -080074
David Benjamin4969cc92016-04-22 15:02:23 -040075 pshufd $0,%xmm5,%xmm5
76 movdqa %xmm1,%xmm4
77 movdqa %xmm1,%xmm2
78 paddd %xmm0,%xmm1
79 pcmpeqd %xmm5,%xmm0
80.byte 0x67
81 movdqa %xmm4,%xmm3
82 paddd %xmm1,%xmm2
83 pcmpeqd %xmm5,%xmm1
84 movdqa %xmm0,112(%r10)
85 movdqa %xmm4,%xmm0
86
87 paddd %xmm2,%xmm3
88 pcmpeqd %xmm5,%xmm2
89 movdqa %xmm1,128(%r10)
90 movdqa %xmm4,%xmm1
91
92 paddd %xmm3,%xmm0
93 pcmpeqd %xmm5,%xmm3
94 movdqa %xmm2,144(%r10)
95 movdqa %xmm4,%xmm2
96
97 paddd %xmm0,%xmm1
98 pcmpeqd %xmm5,%xmm0
99 movdqa %xmm3,160(%r10)
100 movdqa %xmm4,%xmm3
101 paddd %xmm1,%xmm2
102 pcmpeqd %xmm5,%xmm1
103 movdqa %xmm0,176(%r10)
104 movdqa %xmm4,%xmm0
105
106 paddd %xmm2,%xmm3
107 pcmpeqd %xmm5,%xmm2
108 movdqa %xmm1,192(%r10)
109 movdqa %xmm4,%xmm1
110
111 paddd %xmm3,%xmm0
112 pcmpeqd %xmm5,%xmm3
113 movdqa %xmm2,208(%r10)
114 movdqa %xmm4,%xmm2
115
116 paddd %xmm0,%xmm1
117 pcmpeqd %xmm5,%xmm0
118 movdqa %xmm3,224(%r10)
119 movdqa %xmm4,%xmm3
120 paddd %xmm1,%xmm2
121 pcmpeqd %xmm5,%xmm1
122 movdqa %xmm0,240(%r10)
123 movdqa %xmm4,%xmm0
124
125 paddd %xmm2,%xmm3
126 pcmpeqd %xmm5,%xmm2
127 movdqa %xmm1,256(%r10)
128 movdqa %xmm4,%xmm1
129
130 paddd %xmm3,%xmm0
131 pcmpeqd %xmm5,%xmm3
132 movdqa %xmm2,272(%r10)
133 movdqa %xmm4,%xmm2
134
135 paddd %xmm0,%xmm1
136 pcmpeqd %xmm5,%xmm0
137 movdqa %xmm3,288(%r10)
138 movdqa %xmm4,%xmm3
139 paddd %xmm1,%xmm2
140 pcmpeqd %xmm5,%xmm1
141 movdqa %xmm0,304(%r10)
142
143 paddd %xmm2,%xmm3
144.byte 0x67
145 pcmpeqd %xmm5,%xmm2
146 movdqa %xmm1,320(%r10)
147
148 pcmpeqd %xmm5,%xmm3
149 movdqa %xmm2,336(%r10)
150 pand 64(%r12),%xmm0
151
152 pand 80(%r12),%xmm1
153 pand 96(%r12),%xmm2
154 movdqa %xmm3,352(%r10)
155 pand 112(%r12),%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800156 por %xmm2,%xmm0
David Benjamin4969cc92016-04-22 15:02:23 -0400157 por %xmm3,%xmm1
158 movdqa -128(%r12),%xmm4
159 movdqa -112(%r12),%xmm5
160 movdqa -96(%r12),%xmm2
161 pand 112(%r10),%xmm4
162 movdqa -80(%r12),%xmm3
163 pand 128(%r10),%xmm5
164 por %xmm4,%xmm0
165 pand 144(%r10),%xmm2
166 por %xmm5,%xmm1
167 pand 160(%r10),%xmm3
168 por %xmm2,%xmm0
169 por %xmm3,%xmm1
170 movdqa -64(%r12),%xmm4
171 movdqa -48(%r12),%xmm5
172 movdqa -32(%r12),%xmm2
173 pand 176(%r10),%xmm4
174 movdqa -16(%r12),%xmm3
175 pand 192(%r10),%xmm5
176 por %xmm4,%xmm0
177 pand 208(%r10),%xmm2
178 por %xmm5,%xmm1
179 pand 224(%r10),%xmm3
180 por %xmm2,%xmm0
181 por %xmm3,%xmm1
182 movdqa 0(%r12),%xmm4
183 movdqa 16(%r12),%xmm5
184 movdqa 32(%r12),%xmm2
185 pand 240(%r10),%xmm4
186 movdqa 48(%r12),%xmm3
187 pand 256(%r10),%xmm5
188 por %xmm4,%xmm0
189 pand 272(%r10),%xmm2
190 por %xmm5,%xmm1
191 pand 288(%r10),%xmm3
192 por %xmm2,%xmm0
193 por %xmm3,%xmm1
194 por %xmm1,%xmm0
195 pshufd $0x4e,%xmm0,%xmm1
196 por %xmm1,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800197 leaq 256(%r12),%r12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800198.byte 102,72,15,126,195
199
200 movq (%r8),%r8
201 movq (%rsi),%rax
202
203 xorq %r14,%r14
204 xorq %r15,%r15
205
Adam Langleyd9e397b2015-01-22 14:27:53 -0800206 movq %r8,%rbp
207 mulq %rbx
208 movq %rax,%r10
209 movq (%rcx),%rax
210
Adam Langleyd9e397b2015-01-22 14:27:53 -0800211 imulq %r10,%rbp
212 movq %rdx,%r11
213
Adam Langleyd9e397b2015-01-22 14:27:53 -0800214 mulq %rbp
215 addq %rax,%r10
216 movq 8(%rsi),%rax
217 adcq $0,%rdx
218 movq %rdx,%r13
219
220 leaq 1(%r15),%r15
221 jmp L$1st_enter
222
223.p2align 4
224L$1st:
225 addq %rax,%r13
226 movq (%rsi,%r15,8),%rax
227 adcq $0,%rdx
228 addq %r11,%r13
229 movq %r10,%r11
230 adcq $0,%rdx
231 movq %r13,-16(%rsp,%r15,8)
232 movq %rdx,%r13
233
234L$1st_enter:
235 mulq %rbx
236 addq %rax,%r11
237 movq (%rcx,%r15,8),%rax
238 adcq $0,%rdx
239 leaq 1(%r15),%r15
240 movq %rdx,%r10
241
242 mulq %rbp
243 cmpq %r9,%r15
244 jne L$1st
245
Adam Langleyd9e397b2015-01-22 14:27:53 -0800246
247 addq %rax,%r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800248 adcq $0,%rdx
249 addq %r11,%r13
250 adcq $0,%rdx
David Benjamin4969cc92016-04-22 15:02:23 -0400251 movq %r13,-16(%rsp,%r9,8)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800252 movq %rdx,%r13
253 movq %r10,%r11
254
255 xorq %rdx,%rdx
256 addq %r11,%r13
257 adcq $0,%rdx
258 movq %r13,-8(%rsp,%r9,8)
259 movq %rdx,(%rsp,%r9,8)
260
261 leaq 1(%r14),%r14
262 jmp L$outer
263.p2align 4
264L$outer:
David Benjamin4969cc92016-04-22 15:02:23 -0400265 leaq 24+128(%rsp,%r9,8),%rdx
266 andq $-16,%rdx
267 pxor %xmm4,%xmm4
268 pxor %xmm5,%xmm5
269 movdqa -128(%r12),%xmm0
270 movdqa -112(%r12),%xmm1
271 movdqa -96(%r12),%xmm2
272 movdqa -80(%r12),%xmm3
273 pand -128(%rdx),%xmm0
274 pand -112(%rdx),%xmm1
275 por %xmm0,%xmm4
276 pand -96(%rdx),%xmm2
277 por %xmm1,%xmm5
278 pand -80(%rdx),%xmm3
279 por %xmm2,%xmm4
280 por %xmm3,%xmm5
281 movdqa -64(%r12),%xmm0
282 movdqa -48(%r12),%xmm1
283 movdqa -32(%r12),%xmm2
284 movdqa -16(%r12),%xmm3
285 pand -64(%rdx),%xmm0
286 pand -48(%rdx),%xmm1
287 por %xmm0,%xmm4
288 pand -32(%rdx),%xmm2
289 por %xmm1,%xmm5
290 pand -16(%rdx),%xmm3
291 por %xmm2,%xmm4
292 por %xmm3,%xmm5
293 movdqa 0(%r12),%xmm0
294 movdqa 16(%r12),%xmm1
295 movdqa 32(%r12),%xmm2
296 movdqa 48(%r12),%xmm3
297 pand 0(%rdx),%xmm0
298 pand 16(%rdx),%xmm1
299 por %xmm0,%xmm4
300 pand 32(%rdx),%xmm2
301 por %xmm1,%xmm5
302 pand 48(%rdx),%xmm3
303 por %xmm2,%xmm4
304 por %xmm3,%xmm5
305 movdqa 64(%r12),%xmm0
306 movdqa 80(%r12),%xmm1
307 movdqa 96(%r12),%xmm2
308 movdqa 112(%r12),%xmm3
309 pand 64(%rdx),%xmm0
310 pand 80(%rdx),%xmm1
311 por %xmm0,%xmm4
312 pand 96(%rdx),%xmm2
313 por %xmm1,%xmm5
314 pand 112(%rdx),%xmm3
315 por %xmm2,%xmm4
316 por %xmm3,%xmm5
317 por %xmm5,%xmm4
318 pshufd $0x4e,%xmm4,%xmm0
319 por %xmm4,%xmm0
320 leaq 256(%r12),%r12
321
322 movq (%rsi),%rax
323.byte 102,72,15,126,195
324
Adam Langleyd9e397b2015-01-22 14:27:53 -0800325 xorq %r15,%r15
326 movq %r8,%rbp
327 movq (%rsp),%r10
328
Adam Langleyd9e397b2015-01-22 14:27:53 -0800329 mulq %rbx
330 addq %rax,%r10
331 movq (%rcx),%rax
332 adcq $0,%rdx
333
Adam Langleyd9e397b2015-01-22 14:27:53 -0800334 imulq %r10,%rbp
335 movq %rdx,%r11
336
Adam Langleyd9e397b2015-01-22 14:27:53 -0800337 mulq %rbp
338 addq %rax,%r10
339 movq 8(%rsi),%rax
340 adcq $0,%rdx
341 movq 8(%rsp),%r10
342 movq %rdx,%r13
343
344 leaq 1(%r15),%r15
345 jmp L$inner_enter
346
347.p2align 4
348L$inner:
349 addq %rax,%r13
350 movq (%rsi,%r15,8),%rax
351 adcq $0,%rdx
352 addq %r10,%r13
353 movq (%rsp,%r15,8),%r10
354 adcq $0,%rdx
355 movq %r13,-16(%rsp,%r15,8)
356 movq %rdx,%r13
357
358L$inner_enter:
359 mulq %rbx
360 addq %rax,%r11
361 movq (%rcx,%r15,8),%rax
362 adcq $0,%rdx
363 addq %r11,%r10
364 movq %rdx,%r11
365 adcq $0,%r11
366 leaq 1(%r15),%r15
367
368 mulq %rbp
369 cmpq %r9,%r15
370 jne L$inner
371
Adam Langleyd9e397b2015-01-22 14:27:53 -0800372 addq %rax,%r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800373 adcq $0,%rdx
374 addq %r10,%r13
David Benjamin4969cc92016-04-22 15:02:23 -0400375 movq (%rsp,%r9,8),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800376 adcq $0,%rdx
David Benjamin4969cc92016-04-22 15:02:23 -0400377 movq %r13,-16(%rsp,%r9,8)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800378 movq %rdx,%r13
379
380 xorq %rdx,%rdx
381 addq %r11,%r13
382 adcq $0,%rdx
383 addq %r10,%r13
384 adcq $0,%rdx
385 movq %r13,-8(%rsp,%r9,8)
386 movq %rdx,(%rsp,%r9,8)
387
388 leaq 1(%r14),%r14
389 cmpq %r9,%r14
390 jb L$outer
391
392 xorq %r14,%r14
393 movq (%rsp),%rax
394 leaq (%rsp),%rsi
395 movq %r9,%r15
396 jmp L$sub
397.p2align 4
Robert Sloan8ff03552017-06-14 12:40:58 -0700398L$sub:
399 sbbq (%rcx,%r14,8),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800400 movq %rax,(%rdi,%r14,8)
401 movq 8(%rsi,%r14,8),%rax
402 leaq 1(%r14),%r14
403 decq %r15
404 jnz L$sub
405
406 sbbq $0,%rax
407 xorq %r14,%r14
Robert Sloana94fe052017-02-21 08:49:28 -0800408 andq %rax,%rsi
409 notq %rax
410 movq %rdi,%rcx
411 andq %rax,%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800412 movq %r9,%r15
Robert Sloana94fe052017-02-21 08:49:28 -0800413 orq %rcx,%rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800414.p2align 4
415L$copy:
Robert Sloana94fe052017-02-21 08:49:28 -0800416 movq (%rsi,%r14,8),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800417 movq %r14,(%rsp,%r14,8)
Robert Sloana94fe052017-02-21 08:49:28 -0800418 movq %rax,(%rdi,%r14,8)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800419 leaq 1(%r14),%r14
420 subq $1,%r15
421 jnz L$copy
422
423 movq 8(%rsp,%r9,8),%rsi
Robert Sloana94fe052017-02-21 08:49:28 -0800424
Adam Langleyd9e397b2015-01-22 14:27:53 -0800425 movq $1,%rax
David Benjamin4969cc92016-04-22 15:02:23 -0400426
Adam Langleyd9e397b2015-01-22 14:27:53 -0800427 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -0800428
Adam Langleyd9e397b2015-01-22 14:27:53 -0800429 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -0800430
Adam Langleyd9e397b2015-01-22 14:27:53 -0800431 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -0800432
Adam Langleyd9e397b2015-01-22 14:27:53 -0800433 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -0800434
Adam Langleyd9e397b2015-01-22 14:27:53 -0800435 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800436
Adam Langleyd9e397b2015-01-22 14:27:53 -0800437 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800438
Adam Langleyd9e397b2015-01-22 14:27:53 -0800439 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -0800440
Adam Langleyd9e397b2015-01-22 14:27:53 -0800441L$mul_epilogue:
442 .byte 0xf3,0xc3
443
444
Robert Sloana94fe052017-02-21 08:49:28 -0800445
Adam Langleyd9e397b2015-01-22 14:27:53 -0800446.p2align 5
447bn_mul4x_mont_gather5:
Robert Sloana94fe052017-02-21 08:49:28 -0800448
Adam Langleyd9e397b2015-01-22 14:27:53 -0800449.byte 0x67
450 movq %rsp,%rax
Robert Sloana94fe052017-02-21 08:49:28 -0800451
452L$mul4x_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800453 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800454
Adam Langleyd9e397b2015-01-22 14:27:53 -0800455 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800456
Adam Langleyd9e397b2015-01-22 14:27:53 -0800457 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -0800458
Adam Langleyd9e397b2015-01-22 14:27:53 -0800459 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -0800460
Adam Langleyd9e397b2015-01-22 14:27:53 -0800461 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -0800462
Adam Langleyd9e397b2015-01-22 14:27:53 -0800463 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -0400464
Robert Sloana94fe052017-02-21 08:49:28 -0800465L$mul4x_prologue:
466
Adam Langleyd9e397b2015-01-22 14:27:53 -0800467.byte 0x67
Adam Langleyd9e397b2015-01-22 14:27:53 -0800468 shll $3,%r9d
David Benjamin4969cc92016-04-22 15:02:23 -0400469 leaq (%r9,%r9,2),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800470 negq %r9
471
472
473
474
475
476
477
478
David Benjamin4969cc92016-04-22 15:02:23 -0400479
480
481 leaq -320(%rsp,%r9,2),%r11
Robert Sloana94fe052017-02-21 08:49:28 -0800482 movq %rsp,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -0400483 subq %rdi,%r11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800484 andq $4095,%r11
485 cmpq %r11,%r10
486 jb L$mul4xsp_alt
Robert Sloana94fe052017-02-21 08:49:28 -0800487 subq %r11,%rbp
488 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800489 jmp L$mul4xsp_done
490
491.p2align 5
492L$mul4xsp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -0400493 leaq 4096-320(,%r9,2),%r10
Robert Sloana94fe052017-02-21 08:49:28 -0800494 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800495 subq %r10,%r11
496 movq $0,%r10
497 cmovcq %r10,%r11
Robert Sloana94fe052017-02-21 08:49:28 -0800498 subq %r11,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800499L$mul4xsp_done:
Robert Sloana94fe052017-02-21 08:49:28 -0800500 andq $-64,%rbp
501 movq %rsp,%r11
502 subq %rbp,%r11
503 andq $-4096,%r11
504 leaq (%r11,%rbp,1),%rsp
505 movq (%rsp),%r10
506 cmpq %rbp,%rsp
507 ja L$mul4x_page_walk
508 jmp L$mul4x_page_walk_done
509
510L$mul4x_page_walk:
511 leaq -4096(%rsp),%rsp
512 movq (%rsp),%r10
513 cmpq %rbp,%rsp
514 ja L$mul4x_page_walk
515L$mul4x_page_walk_done:
516
Adam Langleyd9e397b2015-01-22 14:27:53 -0800517 negq %r9
518
519 movq %rax,40(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -0800520
Adam Langleyd9e397b2015-01-22 14:27:53 -0800521L$mul4x_body:
522
523 call mul4x_internal
524
525 movq 40(%rsp),%rsi
Robert Sloana94fe052017-02-21 08:49:28 -0800526
Adam Langleyd9e397b2015-01-22 14:27:53 -0800527 movq $1,%rax
David Benjamin4969cc92016-04-22 15:02:23 -0400528
Adam Langleyd9e397b2015-01-22 14:27:53 -0800529 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -0800530
Adam Langleyd9e397b2015-01-22 14:27:53 -0800531 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -0800532
Adam Langleyd9e397b2015-01-22 14:27:53 -0800533 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -0800534
Adam Langleyd9e397b2015-01-22 14:27:53 -0800535 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -0800536
Adam Langleyd9e397b2015-01-22 14:27:53 -0800537 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800538
Adam Langleyd9e397b2015-01-22 14:27:53 -0800539 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800540
Adam Langleyd9e397b2015-01-22 14:27:53 -0800541 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -0800542
Adam Langleyd9e397b2015-01-22 14:27:53 -0800543L$mul4x_epilogue:
544 .byte 0xf3,0xc3
545
546
547
Robert Sloana94fe052017-02-21 08:49:28 -0800548
Adam Langleyd9e397b2015-01-22 14:27:53 -0800549.p2align 5
550mul4x_internal:
551 shlq $5,%r9
David Benjamin4969cc92016-04-22 15:02:23 -0400552 movd 8(%rax),%xmm5
553 leaq L$inc(%rip),%rax
554 leaq 128(%rdx,%r9,1),%r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800555 shrq $5,%r9
David Benjamin4969cc92016-04-22 15:02:23 -0400556 movdqa 0(%rax),%xmm0
557 movdqa 16(%rax),%xmm1
558 leaq 88-112(%rsp,%r9,1),%r10
559 leaq 128(%rdx),%r12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800560
David Benjamin4969cc92016-04-22 15:02:23 -0400561 pshufd $0,%xmm5,%xmm5
562 movdqa %xmm1,%xmm4
563.byte 0x67,0x67
564 movdqa %xmm1,%xmm2
565 paddd %xmm0,%xmm1
566 pcmpeqd %xmm5,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800567.byte 0x67
David Benjamin4969cc92016-04-22 15:02:23 -0400568 movdqa %xmm4,%xmm3
569 paddd %xmm1,%xmm2
570 pcmpeqd %xmm5,%xmm1
571 movdqa %xmm0,112(%r10)
572 movdqa %xmm4,%xmm0
573
574 paddd %xmm2,%xmm3
575 pcmpeqd %xmm5,%xmm2
576 movdqa %xmm1,128(%r10)
577 movdqa %xmm4,%xmm1
578
579 paddd %xmm3,%xmm0
580 pcmpeqd %xmm5,%xmm3
581 movdqa %xmm2,144(%r10)
582 movdqa %xmm4,%xmm2
583
584 paddd %xmm0,%xmm1
585 pcmpeqd %xmm5,%xmm0
586 movdqa %xmm3,160(%r10)
587 movdqa %xmm4,%xmm3
588 paddd %xmm1,%xmm2
589 pcmpeqd %xmm5,%xmm1
590 movdqa %xmm0,176(%r10)
591 movdqa %xmm4,%xmm0
592
593 paddd %xmm2,%xmm3
594 pcmpeqd %xmm5,%xmm2
595 movdqa %xmm1,192(%r10)
596 movdqa %xmm4,%xmm1
597
598 paddd %xmm3,%xmm0
599 pcmpeqd %xmm5,%xmm3
600 movdqa %xmm2,208(%r10)
601 movdqa %xmm4,%xmm2
602
603 paddd %xmm0,%xmm1
604 pcmpeqd %xmm5,%xmm0
605 movdqa %xmm3,224(%r10)
606 movdqa %xmm4,%xmm3
607 paddd %xmm1,%xmm2
608 pcmpeqd %xmm5,%xmm1
609 movdqa %xmm0,240(%r10)
610 movdqa %xmm4,%xmm0
611
612 paddd %xmm2,%xmm3
613 pcmpeqd %xmm5,%xmm2
614 movdqa %xmm1,256(%r10)
615 movdqa %xmm4,%xmm1
616
617 paddd %xmm3,%xmm0
618 pcmpeqd %xmm5,%xmm3
619 movdqa %xmm2,272(%r10)
620 movdqa %xmm4,%xmm2
621
622 paddd %xmm0,%xmm1
623 pcmpeqd %xmm5,%xmm0
624 movdqa %xmm3,288(%r10)
625 movdqa %xmm4,%xmm3
626 paddd %xmm1,%xmm2
627 pcmpeqd %xmm5,%xmm1
628 movdqa %xmm0,304(%r10)
629
630 paddd %xmm2,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800631.byte 0x67
David Benjamin4969cc92016-04-22 15:02:23 -0400632 pcmpeqd %xmm5,%xmm2
633 movdqa %xmm1,320(%r10)
634
635 pcmpeqd %xmm5,%xmm3
636 movdqa %xmm2,336(%r10)
637 pand 64(%r12),%xmm0
638
639 pand 80(%r12),%xmm1
640 pand 96(%r12),%xmm2
641 movdqa %xmm3,352(%r10)
642 pand 112(%r12),%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800643 por %xmm2,%xmm0
David Benjamin4969cc92016-04-22 15:02:23 -0400644 por %xmm3,%xmm1
645 movdqa -128(%r12),%xmm4
646 movdqa -112(%r12),%xmm5
647 movdqa -96(%r12),%xmm2
648 pand 112(%r10),%xmm4
649 movdqa -80(%r12),%xmm3
650 pand 128(%r10),%xmm5
651 por %xmm4,%xmm0
652 pand 144(%r10),%xmm2
653 por %xmm5,%xmm1
654 pand 160(%r10),%xmm3
655 por %xmm2,%xmm0
656 por %xmm3,%xmm1
657 movdqa -64(%r12),%xmm4
658 movdqa -48(%r12),%xmm5
659 movdqa -32(%r12),%xmm2
660 pand 176(%r10),%xmm4
661 movdqa -16(%r12),%xmm3
662 pand 192(%r10),%xmm5
663 por %xmm4,%xmm0
664 pand 208(%r10),%xmm2
665 por %xmm5,%xmm1
666 pand 224(%r10),%xmm3
667 por %xmm2,%xmm0
668 por %xmm3,%xmm1
669 movdqa 0(%r12),%xmm4
670 movdqa 16(%r12),%xmm5
671 movdqa 32(%r12),%xmm2
672 pand 240(%r10),%xmm4
673 movdqa 48(%r12),%xmm3
674 pand 256(%r10),%xmm5
675 por %xmm4,%xmm0
676 pand 272(%r10),%xmm2
677 por %xmm5,%xmm1
678 pand 288(%r10),%xmm3
679 por %xmm2,%xmm0
680 por %xmm3,%xmm1
681 por %xmm1,%xmm0
682 pshufd $0x4e,%xmm0,%xmm1
683 por %xmm1,%xmm0
684 leaq 256(%r12),%r12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800685.byte 102,72,15,126,195
David Benjamin4969cc92016-04-22 15:02:23 -0400686
Adam Langleyd9e397b2015-01-22 14:27:53 -0800687 movq %r13,16+8(%rsp)
688 movq %rdi,56+8(%rsp)
689
690 movq (%r8),%r8
691 movq (%rsi),%rax
692 leaq (%rsi,%r9,1),%rsi
693 negq %r9
694
695 movq %r8,%rbp
696 mulq %rbx
697 movq %rax,%r10
698 movq (%rcx),%rax
699
Adam Langleyd9e397b2015-01-22 14:27:53 -0800700 imulq %r10,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -0400701 leaq 64+8(%rsp),%r14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800702 movq %rdx,%r11
703
Adam Langleyd9e397b2015-01-22 14:27:53 -0800704 mulq %rbp
705 addq %rax,%r10
706 movq 8(%rsi,%r9,1),%rax
707 adcq $0,%rdx
708 movq %rdx,%rdi
709
710 mulq %rbx
711 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400712 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800713 adcq $0,%rdx
714 movq %rdx,%r10
715
716 mulq %rbp
717 addq %rax,%rdi
718 movq 16(%rsi,%r9,1),%rax
719 adcq $0,%rdx
720 addq %r11,%rdi
721 leaq 32(%r9),%r15
David Benjamin4969cc92016-04-22 15:02:23 -0400722 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800723 adcq $0,%rdx
724 movq %rdi,(%r14)
725 movq %rdx,%r13
726 jmp L$1st4x
727
728.p2align 5
729L$1st4x:
730 mulq %rbx
731 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -0400732 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800733 leaq 32(%r14),%r14
734 adcq $0,%rdx
735 movq %rdx,%r11
736
737 mulq %rbp
738 addq %rax,%r13
739 movq -8(%rsi,%r15,1),%rax
740 adcq $0,%rdx
741 addq %r10,%r13
742 adcq $0,%rdx
743 movq %r13,-24(%r14)
744 movq %rdx,%rdi
745
746 mulq %rbx
747 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400748 movq -8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800749 adcq $0,%rdx
750 movq %rdx,%r10
751
752 mulq %rbp
753 addq %rax,%rdi
754 movq (%rsi,%r15,1),%rax
755 adcq $0,%rdx
756 addq %r11,%rdi
757 adcq $0,%rdx
758 movq %rdi,-16(%r14)
759 movq %rdx,%r13
760
761 mulq %rbx
762 addq %rax,%r10
763 movq 0(%rcx),%rax
764 adcq $0,%rdx
765 movq %rdx,%r11
766
767 mulq %rbp
768 addq %rax,%r13
769 movq 8(%rsi,%r15,1),%rax
770 adcq $0,%rdx
771 addq %r10,%r13
772 adcq $0,%rdx
773 movq %r13,-8(%r14)
774 movq %rdx,%rdi
775
776 mulq %rbx
777 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400778 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800779 adcq $0,%rdx
780 movq %rdx,%r10
781
782 mulq %rbp
783 addq %rax,%rdi
784 movq 16(%rsi,%r15,1),%rax
785 adcq $0,%rdx
786 addq %r11,%rdi
David Benjamin4969cc92016-04-22 15:02:23 -0400787 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800788 adcq $0,%rdx
789 movq %rdi,(%r14)
790 movq %rdx,%r13
791
792 addq $32,%r15
793 jnz L$1st4x
794
795 mulq %rbx
796 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -0400797 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800798 leaq 32(%r14),%r14
799 adcq $0,%rdx
800 movq %rdx,%r11
801
802 mulq %rbp
803 addq %rax,%r13
804 movq -8(%rsi),%rax
805 adcq $0,%rdx
806 addq %r10,%r13
807 adcq $0,%rdx
808 movq %r13,-24(%r14)
809 movq %rdx,%rdi
810
811 mulq %rbx
812 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400813 movq -8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800814 adcq $0,%rdx
815 movq %rdx,%r10
816
817 mulq %rbp
818 addq %rax,%rdi
819 movq (%rsi,%r9,1),%rax
820 adcq $0,%rdx
821 addq %r11,%rdi
822 adcq $0,%rdx
823 movq %rdi,-16(%r14)
824 movq %rdx,%r13
825
David Benjamin4969cc92016-04-22 15:02:23 -0400826 leaq (%rcx,%r9,1),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800827
828 xorq %rdi,%rdi
829 addq %r10,%r13
830 adcq $0,%rdi
831 movq %r13,-8(%r14)
832
833 jmp L$outer4x
834
835.p2align 5
836L$outer4x:
David Benjamin4969cc92016-04-22 15:02:23 -0400837 leaq 16+128(%r14),%rdx
838 pxor %xmm4,%xmm4
839 pxor %xmm5,%xmm5
840 movdqa -128(%r12),%xmm0
841 movdqa -112(%r12),%xmm1
842 movdqa -96(%r12),%xmm2
843 movdqa -80(%r12),%xmm3
844 pand -128(%rdx),%xmm0
845 pand -112(%rdx),%xmm1
846 por %xmm0,%xmm4
847 pand -96(%rdx),%xmm2
848 por %xmm1,%xmm5
849 pand -80(%rdx),%xmm3
850 por %xmm2,%xmm4
851 por %xmm3,%xmm5
852 movdqa -64(%r12),%xmm0
853 movdqa -48(%r12),%xmm1
854 movdqa -32(%r12),%xmm2
855 movdqa -16(%r12),%xmm3
856 pand -64(%rdx),%xmm0
857 pand -48(%rdx),%xmm1
858 por %xmm0,%xmm4
859 pand -32(%rdx),%xmm2
860 por %xmm1,%xmm5
861 pand -16(%rdx),%xmm3
862 por %xmm2,%xmm4
863 por %xmm3,%xmm5
864 movdqa 0(%r12),%xmm0
865 movdqa 16(%r12),%xmm1
866 movdqa 32(%r12),%xmm2
867 movdqa 48(%r12),%xmm3
868 pand 0(%rdx),%xmm0
869 pand 16(%rdx),%xmm1
870 por %xmm0,%xmm4
871 pand 32(%rdx),%xmm2
872 por %xmm1,%xmm5
873 pand 48(%rdx),%xmm3
874 por %xmm2,%xmm4
875 por %xmm3,%xmm5
876 movdqa 64(%r12),%xmm0
877 movdqa 80(%r12),%xmm1
878 movdqa 96(%r12),%xmm2
879 movdqa 112(%r12),%xmm3
880 pand 64(%rdx),%xmm0
881 pand 80(%rdx),%xmm1
882 por %xmm0,%xmm4
883 pand 96(%rdx),%xmm2
884 por %xmm1,%xmm5
885 pand 112(%rdx),%xmm3
886 por %xmm2,%xmm4
887 por %xmm3,%xmm5
888 por %xmm5,%xmm4
889 pshufd $0x4e,%xmm4,%xmm0
890 por %xmm4,%xmm0
891 leaq 256(%r12),%r12
892.byte 102,72,15,126,195
893
Adam Langleyd9e397b2015-01-22 14:27:53 -0800894 movq (%r14,%r9,1),%r10
895 movq %r8,%rbp
896 mulq %rbx
897 addq %rax,%r10
898 movq (%rcx),%rax
899 adcq $0,%rdx
900
Adam Langleyd9e397b2015-01-22 14:27:53 -0800901 imulq %r10,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800902 movq %rdx,%r11
903 movq %rdi,(%r14)
904
Adam Langleyd9e397b2015-01-22 14:27:53 -0800905 leaq (%r14,%r9,1),%r14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800906
907 mulq %rbp
908 addq %rax,%r10
909 movq 8(%rsi,%r9,1),%rax
910 adcq $0,%rdx
911 movq %rdx,%rdi
912
913 mulq %rbx
914 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400915 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800916 adcq $0,%rdx
917 addq 8(%r14),%r11
918 adcq $0,%rdx
919 movq %rdx,%r10
920
921 mulq %rbp
922 addq %rax,%rdi
923 movq 16(%rsi,%r9,1),%rax
924 adcq $0,%rdx
925 addq %r11,%rdi
926 leaq 32(%r9),%r15
David Benjamin4969cc92016-04-22 15:02:23 -0400927 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800928 adcq $0,%rdx
929 movq %rdx,%r13
930 jmp L$inner4x
931
932.p2align 5
933L$inner4x:
934 mulq %rbx
935 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -0400936 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800937 adcq $0,%rdx
938 addq 16(%r14),%r10
939 leaq 32(%r14),%r14
940 adcq $0,%rdx
941 movq %rdx,%r11
942
943 mulq %rbp
944 addq %rax,%r13
945 movq -8(%rsi,%r15,1),%rax
946 adcq $0,%rdx
947 addq %r10,%r13
948 adcq $0,%rdx
949 movq %rdi,-32(%r14)
950 movq %rdx,%rdi
951
952 mulq %rbx
953 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400954 movq -8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800955 adcq $0,%rdx
956 addq -8(%r14),%r11
957 adcq $0,%rdx
958 movq %rdx,%r10
959
960 mulq %rbp
961 addq %rax,%rdi
962 movq (%rsi,%r15,1),%rax
963 adcq $0,%rdx
964 addq %r11,%rdi
965 adcq $0,%rdx
966 movq %r13,-24(%r14)
967 movq %rdx,%r13
968
969 mulq %rbx
970 addq %rax,%r10
971 movq 0(%rcx),%rax
972 adcq $0,%rdx
973 addq (%r14),%r10
974 adcq $0,%rdx
975 movq %rdx,%r11
976
977 mulq %rbp
978 addq %rax,%r13
979 movq 8(%rsi,%r15,1),%rax
980 adcq $0,%rdx
981 addq %r10,%r13
982 adcq $0,%rdx
983 movq %rdi,-16(%r14)
984 movq %rdx,%rdi
985
986 mulq %rbx
987 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -0400988 movq 8(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800989 adcq $0,%rdx
990 addq 8(%r14),%r11
991 adcq $0,%rdx
992 movq %rdx,%r10
993
994 mulq %rbp
995 addq %rax,%rdi
996 movq 16(%rsi,%r15,1),%rax
997 adcq $0,%rdx
998 addq %r11,%rdi
David Benjamin4969cc92016-04-22 15:02:23 -0400999 leaq 32(%rcx),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001000 adcq $0,%rdx
1001 movq %r13,-8(%r14)
1002 movq %rdx,%r13
1003
1004 addq $32,%r15
1005 jnz L$inner4x
1006
1007 mulq %rbx
1008 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -04001009 movq -16(%rcx),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001010 adcq $0,%rdx
1011 addq 16(%r14),%r10
1012 leaq 32(%r14),%r14
1013 adcq $0,%rdx
1014 movq %rdx,%r11
1015
1016 mulq %rbp
1017 addq %rax,%r13
1018 movq -8(%rsi),%rax
1019 adcq $0,%rdx
1020 addq %r10,%r13
1021 adcq $0,%rdx
1022 movq %rdi,-32(%r14)
1023 movq %rdx,%rdi
1024
1025 mulq %rbx
1026 addq %rax,%r11
1027 movq %rbp,%rax
David Benjamin4969cc92016-04-22 15:02:23 -04001028 movq -8(%rcx),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001029 adcq $0,%rdx
1030 addq -8(%r14),%r11
1031 adcq $0,%rdx
1032 movq %rdx,%r10
1033
1034 mulq %rbp
1035 addq %rax,%rdi
1036 movq (%rsi,%r9,1),%rax
1037 adcq $0,%rdx
1038 addq %r11,%rdi
1039 adcq $0,%rdx
1040 movq %r13,-24(%r14)
1041 movq %rdx,%r13
1042
Adam Langleyd9e397b2015-01-22 14:27:53 -08001043 movq %rdi,-16(%r14)
David Benjamin4969cc92016-04-22 15:02:23 -04001044 leaq (%rcx,%r9,1),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001045
1046 xorq %rdi,%rdi
1047 addq %r10,%r13
1048 adcq $0,%rdi
1049 addq (%r14),%r13
1050 adcq $0,%rdi
1051 movq %r13,-8(%r14)
1052
1053 cmpq 16+8(%rsp),%r12
1054 jb L$outer4x
David Benjamin4969cc92016-04-22 15:02:23 -04001055 xorq %rax,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001056 subq %r13,%rbp
1057 adcq %r15,%r15
1058 orq %r15,%rdi
David Benjamin4969cc92016-04-22 15:02:23 -04001059 subq %rdi,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001060 leaq (%r14,%r9,1),%rbx
David Benjamin4969cc92016-04-22 15:02:23 -04001061 movq (%rcx),%r12
1062 leaq (%rcx),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001063 movq %r9,%rcx
1064 sarq $3+2,%rcx
1065 movq 56+8(%rsp),%rdi
David Benjamin4969cc92016-04-22 15:02:23 -04001066 decq %r12
1067 xorq %r10,%r10
1068 movq 8(%rbp),%r13
1069 movq 16(%rbp),%r14
1070 movq 24(%rbp),%r15
1071 jmp L$sqr4x_sub_entry
Adam Langleyd9e397b2015-01-22 14:27:53 -08001072
1073.globl _bn_power5
1074.private_extern _bn_power5
1075
1076.p2align 5
1077_bn_power5:
Robert Sloana94fe052017-02-21 08:49:28 -08001078
Adam Langleyd9e397b2015-01-22 14:27:53 -08001079 movq %rsp,%rax
Robert Sloana94fe052017-02-21 08:49:28 -08001080
Adam Langleyd9e397b2015-01-22 14:27:53 -08001081 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -08001082
Adam Langleyd9e397b2015-01-22 14:27:53 -08001083 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -08001084
Adam Langleyd9e397b2015-01-22 14:27:53 -08001085 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -08001086
Adam Langleyd9e397b2015-01-22 14:27:53 -08001087 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -08001088
Adam Langleyd9e397b2015-01-22 14:27:53 -08001089 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -08001090
Adam Langleyd9e397b2015-01-22 14:27:53 -08001091 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -04001092
Robert Sloana94fe052017-02-21 08:49:28 -08001093L$power5_prologue:
1094
Adam Langleyd9e397b2015-01-22 14:27:53 -08001095 shll $3,%r9d
David Benjamin4969cc92016-04-22 15:02:23 -04001096 leal (%r9,%r9,2),%r10d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001097 negq %r9
1098 movq (%r8),%r8
1099
1100
1101
1102
1103
1104
1105
David Benjamin4969cc92016-04-22 15:02:23 -04001106
1107 leaq -320(%rsp,%r9,2),%r11
Robert Sloana94fe052017-02-21 08:49:28 -08001108 movq %rsp,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -04001109 subq %rdi,%r11
Adam Langleyd9e397b2015-01-22 14:27:53 -08001110 andq $4095,%r11
1111 cmpq %r11,%r10
1112 jb L$pwr_sp_alt
Robert Sloana94fe052017-02-21 08:49:28 -08001113 subq %r11,%rbp
1114 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001115 jmp L$pwr_sp_done
1116
1117.p2align 5
1118L$pwr_sp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -04001119 leaq 4096-320(,%r9,2),%r10
Robert Sloana94fe052017-02-21 08:49:28 -08001120 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001121 subq %r10,%r11
1122 movq $0,%r10
1123 cmovcq %r10,%r11
Robert Sloana94fe052017-02-21 08:49:28 -08001124 subq %r11,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001125L$pwr_sp_done:
Robert Sloana94fe052017-02-21 08:49:28 -08001126 andq $-64,%rbp
1127 movq %rsp,%r11
1128 subq %rbp,%r11
1129 andq $-4096,%r11
1130 leaq (%r11,%rbp,1),%rsp
1131 movq (%rsp),%r10
1132 cmpq %rbp,%rsp
1133 ja L$pwr_page_walk
1134 jmp L$pwr_page_walk_done
1135
1136L$pwr_page_walk:
1137 leaq -4096(%rsp),%rsp
1138 movq (%rsp),%r10
1139 cmpq %rbp,%rsp
1140 ja L$pwr_page_walk
1141L$pwr_page_walk_done:
1142
Adam Langleyd9e397b2015-01-22 14:27:53 -08001143 movq %r9,%r10
1144 negq %r9
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155 movq %r8,32(%rsp)
1156 movq %rax,40(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -08001157
Adam Langleyd9e397b2015-01-22 14:27:53 -08001158L$power5_body:
1159.byte 102,72,15,110,207
1160.byte 102,72,15,110,209
1161.byte 102,73,15,110,218
1162.byte 102,72,15,110,226
1163
1164 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001165 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001166 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001167 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001168 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001169 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001170 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001171 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001172 call __bn_sqr8x_internal
David Benjamin4969cc92016-04-22 15:02:23 -04001173 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001174
1175.byte 102,72,15,126,209
1176.byte 102,72,15,126,226
1177 movq %rsi,%rdi
1178 movq 40(%rsp),%rax
1179 leaq 32(%rsp),%r8
1180
1181 call mul4x_internal
1182
1183 movq 40(%rsp),%rsi
Robert Sloana94fe052017-02-21 08:49:28 -08001184
Adam Langleyd9e397b2015-01-22 14:27:53 -08001185 movq $1,%rax
1186 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -08001187
Adam Langleyd9e397b2015-01-22 14:27:53 -08001188 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -08001189
Adam Langleyd9e397b2015-01-22 14:27:53 -08001190 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -08001191
Adam Langleyd9e397b2015-01-22 14:27:53 -08001192 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -08001193
Adam Langleyd9e397b2015-01-22 14:27:53 -08001194 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -08001195
Adam Langleyd9e397b2015-01-22 14:27:53 -08001196 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -08001197
Adam Langleyd9e397b2015-01-22 14:27:53 -08001198 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -08001199
Adam Langleyd9e397b2015-01-22 14:27:53 -08001200L$power5_epilogue:
1201 .byte 0xf3,0xc3
1202
1203
Robert Sloana94fe052017-02-21 08:49:28 -08001204
Adam Langleyd9e397b2015-01-22 14:27:53 -08001205.globl _bn_sqr8x_internal
1206.private_extern _bn_sqr8x_internal
1207.private_extern _bn_sqr8x_internal
1208
1209.p2align 5
1210_bn_sqr8x_internal:
1211__bn_sqr8x_internal:
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285 leaq 32(%r10),%rbp
1286 leaq (%rsi,%r9,1),%rsi
1287
1288 movq %r9,%rcx
1289
1290
1291 movq -32(%rsi,%rbp,1),%r14
1292 leaq 48+8(%rsp,%r9,2),%rdi
1293 movq -24(%rsi,%rbp,1),%rax
1294 leaq -32(%rdi,%rbp,1),%rdi
1295 movq -16(%rsi,%rbp,1),%rbx
1296 movq %rax,%r15
1297
1298 mulq %r14
1299 movq %rax,%r10
1300 movq %rbx,%rax
1301 movq %rdx,%r11
1302 movq %r10,-24(%rdi,%rbp,1)
1303
1304 mulq %r14
1305 addq %rax,%r11
1306 movq %rbx,%rax
1307 adcq $0,%rdx
1308 movq %r11,-16(%rdi,%rbp,1)
1309 movq %rdx,%r10
1310
1311
1312 movq -8(%rsi,%rbp,1),%rbx
1313 mulq %r15
1314 movq %rax,%r12
1315 movq %rbx,%rax
1316 movq %rdx,%r13
1317
1318 leaq (%rbp),%rcx
1319 mulq %r14
1320 addq %rax,%r10
1321 movq %rbx,%rax
1322 movq %rdx,%r11
1323 adcq $0,%r11
1324 addq %r12,%r10
1325 adcq $0,%r11
1326 movq %r10,-8(%rdi,%rcx,1)
1327 jmp L$sqr4x_1st
1328
1329.p2align 5
1330L$sqr4x_1st:
1331 movq (%rsi,%rcx,1),%rbx
1332 mulq %r15
1333 addq %rax,%r13
1334 movq %rbx,%rax
1335 movq %rdx,%r12
1336 adcq $0,%r12
1337
1338 mulq %r14
1339 addq %rax,%r11
1340 movq %rbx,%rax
1341 movq 8(%rsi,%rcx,1),%rbx
1342 movq %rdx,%r10
1343 adcq $0,%r10
1344 addq %r13,%r11
1345 adcq $0,%r10
1346
1347
1348 mulq %r15
1349 addq %rax,%r12
1350 movq %rbx,%rax
1351 movq %r11,(%rdi,%rcx,1)
1352 movq %rdx,%r13
1353 adcq $0,%r13
1354
1355 mulq %r14
1356 addq %rax,%r10
1357 movq %rbx,%rax
1358 movq 16(%rsi,%rcx,1),%rbx
1359 movq %rdx,%r11
1360 adcq $0,%r11
1361 addq %r12,%r10
1362 adcq $0,%r11
1363
1364 mulq %r15
1365 addq %rax,%r13
1366 movq %rbx,%rax
1367 movq %r10,8(%rdi,%rcx,1)
1368 movq %rdx,%r12
1369 adcq $0,%r12
1370
1371 mulq %r14
1372 addq %rax,%r11
1373 movq %rbx,%rax
1374 movq 24(%rsi,%rcx,1),%rbx
1375 movq %rdx,%r10
1376 adcq $0,%r10
1377 addq %r13,%r11
1378 adcq $0,%r10
1379
1380
1381 mulq %r15
1382 addq %rax,%r12
1383 movq %rbx,%rax
1384 movq %r11,16(%rdi,%rcx,1)
1385 movq %rdx,%r13
1386 adcq $0,%r13
1387 leaq 32(%rcx),%rcx
1388
1389 mulq %r14
1390 addq %rax,%r10
1391 movq %rbx,%rax
1392 movq %rdx,%r11
1393 adcq $0,%r11
1394 addq %r12,%r10
1395 adcq $0,%r11
1396 movq %r10,-8(%rdi,%rcx,1)
1397
1398 cmpq $0,%rcx
1399 jne L$sqr4x_1st
1400
1401 mulq %r15
1402 addq %rax,%r13
1403 leaq 16(%rbp),%rbp
1404 adcq $0,%rdx
1405 addq %r11,%r13
1406 adcq $0,%rdx
1407
1408 movq %r13,(%rdi)
1409 movq %rdx,%r12
1410 movq %rdx,8(%rdi)
1411 jmp L$sqr4x_outer
1412
1413.p2align 5
1414L$sqr4x_outer:
1415 movq -32(%rsi,%rbp,1),%r14
1416 leaq 48+8(%rsp,%r9,2),%rdi
1417 movq -24(%rsi,%rbp,1),%rax
1418 leaq -32(%rdi,%rbp,1),%rdi
1419 movq -16(%rsi,%rbp,1),%rbx
1420 movq %rax,%r15
1421
1422 mulq %r14
1423 movq -24(%rdi,%rbp,1),%r10
1424 addq %rax,%r10
1425 movq %rbx,%rax
1426 adcq $0,%rdx
1427 movq %r10,-24(%rdi,%rbp,1)
1428 movq %rdx,%r11
1429
1430 mulq %r14
1431 addq %rax,%r11
1432 movq %rbx,%rax
1433 adcq $0,%rdx
1434 addq -16(%rdi,%rbp,1),%r11
1435 movq %rdx,%r10
1436 adcq $0,%r10
1437 movq %r11,-16(%rdi,%rbp,1)
1438
1439 xorq %r12,%r12
1440
1441 movq -8(%rsi,%rbp,1),%rbx
1442 mulq %r15
1443 addq %rax,%r12
1444 movq %rbx,%rax
1445 adcq $0,%rdx
1446 addq -8(%rdi,%rbp,1),%r12
1447 movq %rdx,%r13
1448 adcq $0,%r13
1449
1450 mulq %r14
1451 addq %rax,%r10
1452 movq %rbx,%rax
1453 adcq $0,%rdx
1454 addq %r12,%r10
1455 movq %rdx,%r11
1456 adcq $0,%r11
1457 movq %r10,-8(%rdi,%rbp,1)
1458
1459 leaq (%rbp),%rcx
1460 jmp L$sqr4x_inner
1461
1462.p2align 5
1463L$sqr4x_inner:
1464 movq (%rsi,%rcx,1),%rbx
1465 mulq %r15
1466 addq %rax,%r13
1467 movq %rbx,%rax
1468 movq %rdx,%r12
1469 adcq $0,%r12
1470 addq (%rdi,%rcx,1),%r13
1471 adcq $0,%r12
1472
1473.byte 0x67
1474 mulq %r14
1475 addq %rax,%r11
1476 movq %rbx,%rax
1477 movq 8(%rsi,%rcx,1),%rbx
1478 movq %rdx,%r10
1479 adcq $0,%r10
1480 addq %r13,%r11
1481 adcq $0,%r10
1482
1483 mulq %r15
1484 addq %rax,%r12
1485 movq %r11,(%rdi,%rcx,1)
1486 movq %rbx,%rax
1487 movq %rdx,%r13
1488 adcq $0,%r13
1489 addq 8(%rdi,%rcx,1),%r12
1490 leaq 16(%rcx),%rcx
1491 adcq $0,%r13
1492
1493 mulq %r14
1494 addq %rax,%r10
1495 movq %rbx,%rax
1496 adcq $0,%rdx
1497 addq %r12,%r10
1498 movq %rdx,%r11
1499 adcq $0,%r11
1500 movq %r10,-8(%rdi,%rcx,1)
1501
1502 cmpq $0,%rcx
1503 jne L$sqr4x_inner
1504
1505.byte 0x67
1506 mulq %r15
1507 addq %rax,%r13
1508 adcq $0,%rdx
1509 addq %r11,%r13
1510 adcq $0,%rdx
1511
1512 movq %r13,(%rdi)
1513 movq %rdx,%r12
1514 movq %rdx,8(%rdi)
1515
1516 addq $16,%rbp
1517 jnz L$sqr4x_outer
1518
1519
1520 movq -32(%rsi),%r14
1521 leaq 48+8(%rsp,%r9,2),%rdi
1522 movq -24(%rsi),%rax
1523 leaq -32(%rdi,%rbp,1),%rdi
1524 movq -16(%rsi),%rbx
1525 movq %rax,%r15
1526
1527 mulq %r14
1528 addq %rax,%r10
1529 movq %rbx,%rax
1530 movq %rdx,%r11
1531 adcq $0,%r11
1532
1533 mulq %r14
1534 addq %rax,%r11
1535 movq %rbx,%rax
1536 movq %r10,-24(%rdi)
1537 movq %rdx,%r10
1538 adcq $0,%r10
1539 addq %r13,%r11
1540 movq -8(%rsi),%rbx
1541 adcq $0,%r10
1542
1543 mulq %r15
1544 addq %rax,%r12
1545 movq %rbx,%rax
1546 movq %r11,-16(%rdi)
1547 movq %rdx,%r13
1548 adcq $0,%r13
1549
1550 mulq %r14
1551 addq %rax,%r10
1552 movq %rbx,%rax
1553 movq %rdx,%r11
1554 adcq $0,%r11
1555 addq %r12,%r10
1556 adcq $0,%r11
1557 movq %r10,-8(%rdi)
1558
1559 mulq %r15
1560 addq %rax,%r13
1561 movq -16(%rsi),%rax
1562 adcq $0,%rdx
1563 addq %r11,%r13
1564 adcq $0,%rdx
1565
1566 movq %r13,(%rdi)
1567 movq %rdx,%r12
1568 movq %rdx,8(%rdi)
1569
1570 mulq %rbx
1571 addq $16,%rbp
1572 xorq %r14,%r14
1573 subq %r9,%rbp
1574 xorq %r15,%r15
1575
1576 addq %r12,%rax
1577 adcq $0,%rdx
1578 movq %rax,8(%rdi)
1579 movq %rdx,16(%rdi)
1580 movq %r15,24(%rdi)
1581
1582 movq -16(%rsi,%rbp,1),%rax
1583 leaq 48+8(%rsp),%rdi
1584 xorq %r10,%r10
1585 movq 8(%rdi),%r11
1586
1587 leaq (%r14,%r10,2),%r12
1588 shrq $63,%r10
1589 leaq (%rcx,%r11,2),%r13
1590 shrq $63,%r11
1591 orq %r10,%r13
1592 movq 16(%rdi),%r10
1593 movq %r11,%r14
1594 mulq %rax
1595 negq %r15
1596 movq 24(%rdi),%r11
1597 adcq %rax,%r12
1598 movq -8(%rsi,%rbp,1),%rax
1599 movq %r12,(%rdi)
1600 adcq %rdx,%r13
1601
1602 leaq (%r14,%r10,2),%rbx
1603 movq %r13,8(%rdi)
1604 sbbq %r15,%r15
1605 shrq $63,%r10
1606 leaq (%rcx,%r11,2),%r8
1607 shrq $63,%r11
1608 orq %r10,%r8
1609 movq 32(%rdi),%r10
1610 movq %r11,%r14
1611 mulq %rax
1612 negq %r15
1613 movq 40(%rdi),%r11
1614 adcq %rax,%rbx
1615 movq 0(%rsi,%rbp,1),%rax
1616 movq %rbx,16(%rdi)
1617 adcq %rdx,%r8
1618 leaq 16(%rbp),%rbp
1619 movq %r8,24(%rdi)
1620 sbbq %r15,%r15
1621 leaq 64(%rdi),%rdi
1622 jmp L$sqr4x_shift_n_add
1623
1624.p2align 5
1625L$sqr4x_shift_n_add:
1626 leaq (%r14,%r10,2),%r12
1627 shrq $63,%r10
1628 leaq (%rcx,%r11,2),%r13
1629 shrq $63,%r11
1630 orq %r10,%r13
1631 movq -16(%rdi),%r10
1632 movq %r11,%r14
1633 mulq %rax
1634 negq %r15
1635 movq -8(%rdi),%r11
1636 adcq %rax,%r12
1637 movq -8(%rsi,%rbp,1),%rax
1638 movq %r12,-32(%rdi)
1639 adcq %rdx,%r13
1640
1641 leaq (%r14,%r10,2),%rbx
1642 movq %r13,-24(%rdi)
1643 sbbq %r15,%r15
1644 shrq $63,%r10
1645 leaq (%rcx,%r11,2),%r8
1646 shrq $63,%r11
1647 orq %r10,%r8
1648 movq 0(%rdi),%r10
1649 movq %r11,%r14
1650 mulq %rax
1651 negq %r15
1652 movq 8(%rdi),%r11
1653 adcq %rax,%rbx
1654 movq 0(%rsi,%rbp,1),%rax
1655 movq %rbx,-16(%rdi)
1656 adcq %rdx,%r8
1657
1658 leaq (%r14,%r10,2),%r12
1659 movq %r8,-8(%rdi)
1660 sbbq %r15,%r15
1661 shrq $63,%r10
1662 leaq (%rcx,%r11,2),%r13
1663 shrq $63,%r11
1664 orq %r10,%r13
1665 movq 16(%rdi),%r10
1666 movq %r11,%r14
1667 mulq %rax
1668 negq %r15
1669 movq 24(%rdi),%r11
1670 adcq %rax,%r12
1671 movq 8(%rsi,%rbp,1),%rax
1672 movq %r12,0(%rdi)
1673 adcq %rdx,%r13
1674
1675 leaq (%r14,%r10,2),%rbx
1676 movq %r13,8(%rdi)
1677 sbbq %r15,%r15
1678 shrq $63,%r10
1679 leaq (%rcx,%r11,2),%r8
1680 shrq $63,%r11
1681 orq %r10,%r8
1682 movq 32(%rdi),%r10
1683 movq %r11,%r14
1684 mulq %rax
1685 negq %r15
1686 movq 40(%rdi),%r11
1687 adcq %rax,%rbx
1688 movq 16(%rsi,%rbp,1),%rax
1689 movq %rbx,16(%rdi)
1690 adcq %rdx,%r8
1691 movq %r8,24(%rdi)
1692 sbbq %r15,%r15
1693 leaq 64(%rdi),%rdi
1694 addq $32,%rbp
1695 jnz L$sqr4x_shift_n_add
1696
1697 leaq (%r14,%r10,2),%r12
1698.byte 0x67
1699 shrq $63,%r10
1700 leaq (%rcx,%r11,2),%r13
1701 shrq $63,%r11
1702 orq %r10,%r13
1703 movq -16(%rdi),%r10
1704 movq %r11,%r14
1705 mulq %rax
1706 negq %r15
1707 movq -8(%rdi),%r11
1708 adcq %rax,%r12
1709 movq -8(%rsi),%rax
1710 movq %r12,-32(%rdi)
1711 adcq %rdx,%r13
1712
1713 leaq (%r14,%r10,2),%rbx
1714 movq %r13,-24(%rdi)
1715 sbbq %r15,%r15
1716 shrq $63,%r10
1717 leaq (%rcx,%r11,2),%r8
1718 shrq $63,%r11
1719 orq %r10,%r8
1720 mulq %rax
1721 negq %r15
1722 adcq %rax,%rbx
1723 adcq %rdx,%r8
1724 movq %rbx,-16(%rdi)
1725 movq %r8,-8(%rdi)
1726.byte 102,72,15,126,213
David Benjamin4969cc92016-04-22 15:02:23 -04001727__bn_sqr8x_reduction:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001728 xorq %rax,%rax
David Benjamin4969cc92016-04-22 15:02:23 -04001729 leaq (%r9,%rbp,1),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001730 leaq 48+8(%rsp,%r9,2),%rdx
1731 movq %rcx,0+8(%rsp)
1732 leaq 48+8(%rsp,%r9,1),%rdi
1733 movq %rdx,8+8(%rsp)
1734 negq %r9
1735 jmp L$8x_reduction_loop
1736
1737.p2align 5
1738L$8x_reduction_loop:
1739 leaq (%rdi,%r9,1),%rdi
1740.byte 0x66
1741 movq 0(%rdi),%rbx
1742 movq 8(%rdi),%r9
1743 movq 16(%rdi),%r10
1744 movq 24(%rdi),%r11
1745 movq 32(%rdi),%r12
1746 movq 40(%rdi),%r13
1747 movq 48(%rdi),%r14
1748 movq 56(%rdi),%r15
1749 movq %rax,(%rdx)
1750 leaq 64(%rdi),%rdi
1751
1752.byte 0x67
1753 movq %rbx,%r8
1754 imulq 32+8(%rsp),%rbx
1755 movq 0(%rbp),%rax
1756 movl $8,%ecx
1757 jmp L$8x_reduce
1758
1759.p2align 5
1760L$8x_reduce:
1761 mulq %rbx
David Benjamin4969cc92016-04-22 15:02:23 -04001762 movq 8(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001763 negq %r8
1764 movq %rdx,%r8
1765 adcq $0,%r8
1766
1767 mulq %rbx
1768 addq %rax,%r9
David Benjamin4969cc92016-04-22 15:02:23 -04001769 movq 16(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001770 adcq $0,%rdx
1771 addq %r9,%r8
1772 movq %rbx,48-8+8(%rsp,%rcx,8)
1773 movq %rdx,%r9
1774 adcq $0,%r9
1775
1776 mulq %rbx
1777 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -04001778 movq 24(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001779 adcq $0,%rdx
1780 addq %r10,%r9
1781 movq 32+8(%rsp),%rsi
1782 movq %rdx,%r10
1783 adcq $0,%r10
1784
1785 mulq %rbx
1786 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -04001787 movq 32(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001788 adcq $0,%rdx
1789 imulq %r8,%rsi
1790 addq %r11,%r10
1791 movq %rdx,%r11
1792 adcq $0,%r11
1793
1794 mulq %rbx
1795 addq %rax,%r12
David Benjamin4969cc92016-04-22 15:02:23 -04001796 movq 40(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001797 adcq $0,%rdx
1798 addq %r12,%r11
1799 movq %rdx,%r12
1800 adcq $0,%r12
1801
1802 mulq %rbx
1803 addq %rax,%r13
David Benjamin4969cc92016-04-22 15:02:23 -04001804 movq 48(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001805 adcq $0,%rdx
1806 addq %r13,%r12
1807 movq %rdx,%r13
1808 adcq $0,%r13
1809
1810 mulq %rbx
1811 addq %rax,%r14
David Benjamin4969cc92016-04-22 15:02:23 -04001812 movq 56(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001813 adcq $0,%rdx
1814 addq %r14,%r13
1815 movq %rdx,%r14
1816 adcq $0,%r14
1817
1818 mulq %rbx
1819 movq %rsi,%rbx
1820 addq %rax,%r15
1821 movq 0(%rbp),%rax
1822 adcq $0,%rdx
1823 addq %r15,%r14
1824 movq %rdx,%r15
1825 adcq $0,%r15
1826
1827 decl %ecx
1828 jnz L$8x_reduce
1829
David Benjamin4969cc92016-04-22 15:02:23 -04001830 leaq 64(%rbp),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001831 xorq %rax,%rax
1832 movq 8+8(%rsp),%rdx
1833 cmpq 0+8(%rsp),%rbp
1834 jae L$8x_no_tail
1835
1836.byte 0x66
1837 addq 0(%rdi),%r8
1838 adcq 8(%rdi),%r9
1839 adcq 16(%rdi),%r10
1840 adcq 24(%rdi),%r11
1841 adcq 32(%rdi),%r12
1842 adcq 40(%rdi),%r13
1843 adcq 48(%rdi),%r14
1844 adcq 56(%rdi),%r15
1845 sbbq %rsi,%rsi
1846
1847 movq 48+56+8(%rsp),%rbx
1848 movl $8,%ecx
1849 movq 0(%rbp),%rax
1850 jmp L$8x_tail
1851
1852.p2align 5
1853L$8x_tail:
1854 mulq %rbx
1855 addq %rax,%r8
David Benjamin4969cc92016-04-22 15:02:23 -04001856 movq 8(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001857 movq %r8,(%rdi)
1858 movq %rdx,%r8
1859 adcq $0,%r8
1860
1861 mulq %rbx
1862 addq %rax,%r9
David Benjamin4969cc92016-04-22 15:02:23 -04001863 movq 16(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001864 adcq $0,%rdx
1865 addq %r9,%r8
1866 leaq 8(%rdi),%rdi
1867 movq %rdx,%r9
1868 adcq $0,%r9
1869
1870 mulq %rbx
1871 addq %rax,%r10
David Benjamin4969cc92016-04-22 15:02:23 -04001872 movq 24(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001873 adcq $0,%rdx
1874 addq %r10,%r9
1875 movq %rdx,%r10
1876 adcq $0,%r10
1877
1878 mulq %rbx
1879 addq %rax,%r11
David Benjamin4969cc92016-04-22 15:02:23 -04001880 movq 32(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001881 adcq $0,%rdx
1882 addq %r11,%r10
1883 movq %rdx,%r11
1884 adcq $0,%r11
1885
1886 mulq %rbx
1887 addq %rax,%r12
David Benjamin4969cc92016-04-22 15:02:23 -04001888 movq 40(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001889 adcq $0,%rdx
1890 addq %r12,%r11
1891 movq %rdx,%r12
1892 adcq $0,%r12
1893
1894 mulq %rbx
1895 addq %rax,%r13
David Benjamin4969cc92016-04-22 15:02:23 -04001896 movq 48(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001897 adcq $0,%rdx
1898 addq %r13,%r12
1899 movq %rdx,%r13
1900 adcq $0,%r13
1901
1902 mulq %rbx
1903 addq %rax,%r14
David Benjamin4969cc92016-04-22 15:02:23 -04001904 movq 56(%rbp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001905 adcq $0,%rdx
1906 addq %r14,%r13
1907 movq %rdx,%r14
1908 adcq $0,%r14
1909
1910 mulq %rbx
1911 movq 48-16+8(%rsp,%rcx,8),%rbx
1912 addq %rax,%r15
1913 adcq $0,%rdx
1914 addq %r15,%r14
1915 movq 0(%rbp),%rax
1916 movq %rdx,%r15
1917 adcq $0,%r15
1918
1919 decl %ecx
1920 jnz L$8x_tail
1921
David Benjamin4969cc92016-04-22 15:02:23 -04001922 leaq 64(%rbp),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001923 movq 8+8(%rsp),%rdx
1924 cmpq 0+8(%rsp),%rbp
1925 jae L$8x_tail_done
1926
1927 movq 48+56+8(%rsp),%rbx
1928 negq %rsi
1929 movq 0(%rbp),%rax
1930 adcq 0(%rdi),%r8
1931 adcq 8(%rdi),%r9
1932 adcq 16(%rdi),%r10
1933 adcq 24(%rdi),%r11
1934 adcq 32(%rdi),%r12
1935 adcq 40(%rdi),%r13
1936 adcq 48(%rdi),%r14
1937 adcq 56(%rdi),%r15
1938 sbbq %rsi,%rsi
1939
1940 movl $8,%ecx
1941 jmp L$8x_tail
1942
1943.p2align 5
1944L$8x_tail_done:
Robert Sloan4d1ac502017-02-06 08:36:14 -08001945 xorq %rax,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001946 addq (%rdx),%r8
Adam Langley4139edb2016-01-13 15:00:54 -08001947 adcq $0,%r9
1948 adcq $0,%r10
1949 adcq $0,%r11
1950 adcq $0,%r12
1951 adcq $0,%r13
1952 adcq $0,%r14
1953 adcq $0,%r15
Robert Sloan4d1ac502017-02-06 08:36:14 -08001954 adcq $0,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001955
1956 negq %rsi
1957L$8x_no_tail:
1958 adcq 0(%rdi),%r8
1959 adcq 8(%rdi),%r9
1960 adcq 16(%rdi),%r10
1961 adcq 24(%rdi),%r11
1962 adcq 32(%rdi),%r12
1963 adcq 40(%rdi),%r13
1964 adcq 48(%rdi),%r14
1965 adcq 56(%rdi),%r15
1966 adcq $0,%rax
David Benjamin4969cc92016-04-22 15:02:23 -04001967 movq -8(%rbp),%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001968 xorq %rsi,%rsi
1969
1970.byte 102,72,15,126,213
1971
1972 movq %r8,0(%rdi)
1973 movq %r9,8(%rdi)
1974.byte 102,73,15,126,217
1975 movq %r10,16(%rdi)
1976 movq %r11,24(%rdi)
1977 movq %r12,32(%rdi)
1978 movq %r13,40(%rdi)
1979 movq %r14,48(%rdi)
1980 movq %r15,56(%rdi)
1981 leaq 64(%rdi),%rdi
1982
1983 cmpq %rdx,%rdi
1984 jb L$8x_reduction_loop
David Benjamin4969cc92016-04-22 15:02:23 -04001985 .byte 0xf3,0xc3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001986
Adam Langleyd9e397b2015-01-22 14:27:53 -08001987
1988.p2align 5
David Benjamin4969cc92016-04-22 15:02:23 -04001989__bn_post4x_internal:
1990 movq 0(%rbp),%r12
1991 leaq (%rdi,%r9,1),%rbx
1992 movq %r9,%rcx
1993.byte 102,72,15,126,207
1994 negq %rax
1995.byte 102,72,15,126,206
1996 sarq $3+2,%rcx
1997 decq %r12
1998 xorq %r10,%r10
1999 movq 8(%rbp),%r13
2000 movq 16(%rbp),%r14
2001 movq 24(%rbp),%r15
2002 jmp L$sqr4x_sub_entry
2003
2004.p2align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -08002005L$sqr4x_sub:
David Benjamin4969cc92016-04-22 15:02:23 -04002006 movq 0(%rbp),%r12
2007 movq 8(%rbp),%r13
2008 movq 16(%rbp),%r14
2009 movq 24(%rbp),%r15
2010L$sqr4x_sub_entry:
2011 leaq 32(%rbp),%rbp
2012 notq %r12
2013 notq %r13
2014 notq %r14
2015 notq %r15
2016 andq %rax,%r12
2017 andq %rax,%r13
2018 andq %rax,%r14
2019 andq %rax,%r15
2020
2021 negq %r10
2022 adcq 0(%rbx),%r12
2023 adcq 8(%rbx),%r13
2024 adcq 16(%rbx),%r14
2025 adcq 24(%rbx),%r15
Adam Langleyd9e397b2015-01-22 14:27:53 -08002026 movq %r12,0(%rdi)
David Benjamin4969cc92016-04-22 15:02:23 -04002027 leaq 32(%rbx),%rbx
Adam Langleyd9e397b2015-01-22 14:27:53 -08002028 movq %r13,8(%rdi)
David Benjamin4969cc92016-04-22 15:02:23 -04002029 sbbq %r10,%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -08002030 movq %r14,16(%rdi)
2031 movq %r15,24(%rdi)
2032 leaq 32(%rdi),%rdi
2033
2034 incq %rcx
2035 jnz L$sqr4x_sub
David Benjamin4969cc92016-04-22 15:02:23 -04002036
Adam Langleyd9e397b2015-01-22 14:27:53 -08002037 movq %r9,%r10
2038 negq %r9
2039 .byte 0xf3,0xc3
2040
2041.globl _bn_from_montgomery
2042.private_extern _bn_from_montgomery
2043
2044.p2align 5
2045_bn_from_montgomery:
2046 testl $7,%r9d
2047 jz bn_from_mont8x
2048 xorl %eax,%eax
2049 .byte 0xf3,0xc3
2050
2051
2052
2053.p2align 5
2054bn_from_mont8x:
Robert Sloana94fe052017-02-21 08:49:28 -08002055
Adam Langleyd9e397b2015-01-22 14:27:53 -08002056.byte 0x67
2057 movq %rsp,%rax
Robert Sloana94fe052017-02-21 08:49:28 -08002058
Adam Langleyd9e397b2015-01-22 14:27:53 -08002059 pushq %rbx
Robert Sloana94fe052017-02-21 08:49:28 -08002060
Adam Langleyd9e397b2015-01-22 14:27:53 -08002061 pushq %rbp
Robert Sloana94fe052017-02-21 08:49:28 -08002062
Adam Langleyd9e397b2015-01-22 14:27:53 -08002063 pushq %r12
Robert Sloana94fe052017-02-21 08:49:28 -08002064
Adam Langleyd9e397b2015-01-22 14:27:53 -08002065 pushq %r13
Robert Sloana94fe052017-02-21 08:49:28 -08002066
Adam Langleyd9e397b2015-01-22 14:27:53 -08002067 pushq %r14
Robert Sloana94fe052017-02-21 08:49:28 -08002068
Adam Langleyd9e397b2015-01-22 14:27:53 -08002069 pushq %r15
David Benjamin4969cc92016-04-22 15:02:23 -04002070
Robert Sloana94fe052017-02-21 08:49:28 -08002071L$from_prologue:
2072
Adam Langleyd9e397b2015-01-22 14:27:53 -08002073 shll $3,%r9d
David Benjamin4969cc92016-04-22 15:02:23 -04002074 leaq (%r9,%r9,2),%r10
Adam Langleyd9e397b2015-01-22 14:27:53 -08002075 negq %r9
2076 movq (%r8),%r8
2077
2078
2079
2080
2081
2082
2083
David Benjamin4969cc92016-04-22 15:02:23 -04002084
2085 leaq -320(%rsp,%r9,2),%r11
Robert Sloana94fe052017-02-21 08:49:28 -08002086 movq %rsp,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -04002087 subq %rdi,%r11
Adam Langleyd9e397b2015-01-22 14:27:53 -08002088 andq $4095,%r11
2089 cmpq %r11,%r10
2090 jb L$from_sp_alt
Robert Sloana94fe052017-02-21 08:49:28 -08002091 subq %r11,%rbp
2092 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002093 jmp L$from_sp_done
2094
2095.p2align 5
2096L$from_sp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -04002097 leaq 4096-320(,%r9,2),%r10
Robert Sloana94fe052017-02-21 08:49:28 -08002098 leaq -320(%rbp,%r9,2),%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002099 subq %r10,%r11
2100 movq $0,%r10
2101 cmovcq %r10,%r11
Robert Sloana94fe052017-02-21 08:49:28 -08002102 subq %r11,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002103L$from_sp_done:
Robert Sloana94fe052017-02-21 08:49:28 -08002104 andq $-64,%rbp
2105 movq %rsp,%r11
2106 subq %rbp,%r11
2107 andq $-4096,%r11
2108 leaq (%r11,%rbp,1),%rsp
2109 movq (%rsp),%r10
2110 cmpq %rbp,%rsp
2111 ja L$from_page_walk
2112 jmp L$from_page_walk_done
2113
2114L$from_page_walk:
2115 leaq -4096(%rsp),%rsp
2116 movq (%rsp),%r10
2117 cmpq %rbp,%rsp
2118 ja L$from_page_walk
2119L$from_page_walk_done:
2120
Adam Langleyd9e397b2015-01-22 14:27:53 -08002121 movq %r9,%r10
2122 negq %r9
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133 movq %r8,32(%rsp)
2134 movq %rax,40(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -08002135
Adam Langleyd9e397b2015-01-22 14:27:53 -08002136L$from_body:
2137 movq %r9,%r11
2138 leaq 48(%rsp),%rax
2139 pxor %xmm0,%xmm0
2140 jmp L$mul_by_1
2141
2142.p2align 5
2143L$mul_by_1:
2144 movdqu (%rsi),%xmm1
2145 movdqu 16(%rsi),%xmm2
2146 movdqu 32(%rsi),%xmm3
2147 movdqa %xmm0,(%rax,%r9,1)
2148 movdqu 48(%rsi),%xmm4
2149 movdqa %xmm0,16(%rax,%r9,1)
2150.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2151 movdqa %xmm1,(%rax)
2152 movdqa %xmm0,32(%rax,%r9,1)
2153 movdqa %xmm2,16(%rax)
2154 movdqa %xmm0,48(%rax,%r9,1)
2155 movdqa %xmm3,32(%rax)
2156 movdqa %xmm4,48(%rax)
2157 leaq 64(%rax),%rax
2158 subq $64,%r11
2159 jnz L$mul_by_1
2160
2161.byte 102,72,15,110,207
2162.byte 102,72,15,110,209
2163.byte 0x67
2164 movq %rcx,%rbp
2165.byte 102,73,15,110,218
David Benjamin4969cc92016-04-22 15:02:23 -04002166 call __bn_sqr8x_reduction
2167 call __bn_post4x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -08002168
2169 pxor %xmm0,%xmm0
2170 leaq 48(%rsp),%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -08002171 jmp L$from_mont_zero
2172
2173.p2align 5
2174L$from_mont_zero:
Robert Sloana94fe052017-02-21 08:49:28 -08002175 movq 40(%rsp),%rsi
2176
Adam Langleyd9e397b2015-01-22 14:27:53 -08002177 movdqa %xmm0,0(%rax)
2178 movdqa %xmm0,16(%rax)
2179 movdqa %xmm0,32(%rax)
2180 movdqa %xmm0,48(%rax)
2181 leaq 64(%rax),%rax
2182 subq $32,%r9
2183 jnz L$from_mont_zero
2184
2185 movq $1,%rax
2186 movq -48(%rsi),%r15
Robert Sloana94fe052017-02-21 08:49:28 -08002187
Adam Langleyd9e397b2015-01-22 14:27:53 -08002188 movq -40(%rsi),%r14
Robert Sloana94fe052017-02-21 08:49:28 -08002189
Adam Langleyd9e397b2015-01-22 14:27:53 -08002190 movq -32(%rsi),%r13
Robert Sloana94fe052017-02-21 08:49:28 -08002191
Adam Langleyd9e397b2015-01-22 14:27:53 -08002192 movq -24(%rsi),%r12
Robert Sloana94fe052017-02-21 08:49:28 -08002193
Adam Langleyd9e397b2015-01-22 14:27:53 -08002194 movq -16(%rsi),%rbp
Robert Sloana94fe052017-02-21 08:49:28 -08002195
Adam Langleyd9e397b2015-01-22 14:27:53 -08002196 movq -8(%rsi),%rbx
Robert Sloana94fe052017-02-21 08:49:28 -08002197
Adam Langleyd9e397b2015-01-22 14:27:53 -08002198 leaq (%rsi),%rsp
Robert Sloana94fe052017-02-21 08:49:28 -08002199
Adam Langleyd9e397b2015-01-22 14:27:53 -08002200L$from_epilogue:
2201 .byte 0xf3,0xc3
2202
Robert Sloana94fe052017-02-21 08:49:28 -08002203
Adam Langleyd9e397b2015-01-22 14:27:53 -08002204.globl _bn_scatter5
2205.private_extern _bn_scatter5
2206
2207.p2align 4
2208_bn_scatter5:
2209 cmpl $0,%esi
2210 jz L$scatter_epilogue
2211 leaq (%rdx,%rcx,8),%rdx
2212L$scatter:
2213 movq (%rdi),%rax
2214 leaq 8(%rdi),%rdi
2215 movq %rax,(%rdx)
2216 leaq 256(%rdx),%rdx
2217 subl $1,%esi
2218 jnz L$scatter
2219L$scatter_epilogue:
2220 .byte 0xf3,0xc3
2221
2222
2223.globl _bn_gather5
2224.private_extern _bn_gather5
2225
David Benjamin4969cc92016-04-22 15:02:23 -04002226.p2align 5
Adam Langleyd9e397b2015-01-22 14:27:53 -08002227_bn_gather5:
David Benjamin4969cc92016-04-22 15:02:23 -04002228L$SEH_begin_bn_gather5:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002229
David Benjamin4969cc92016-04-22 15:02:23 -04002230.byte 0x4c,0x8d,0x14,0x24
2231.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
2232 leaq L$inc(%rip),%rax
2233 andq $-16,%rsp
2234
2235 movd %ecx,%xmm5
2236 movdqa 0(%rax),%xmm0
2237 movdqa 16(%rax),%xmm1
2238 leaq 128(%rdx),%r11
2239 leaq 128(%rsp),%rax
2240
2241 pshufd $0,%xmm5,%xmm5
2242 movdqa %xmm1,%xmm4
2243 movdqa %xmm1,%xmm2
2244 paddd %xmm0,%xmm1
2245 pcmpeqd %xmm5,%xmm0
2246 movdqa %xmm4,%xmm3
2247
2248 paddd %xmm1,%xmm2
2249 pcmpeqd %xmm5,%xmm1
2250 movdqa %xmm0,-128(%rax)
2251 movdqa %xmm4,%xmm0
2252
2253 paddd %xmm2,%xmm3
2254 pcmpeqd %xmm5,%xmm2
2255 movdqa %xmm1,-112(%rax)
2256 movdqa %xmm4,%xmm1
2257
2258 paddd %xmm3,%xmm0
2259 pcmpeqd %xmm5,%xmm3
2260 movdqa %xmm2,-96(%rax)
2261 movdqa %xmm4,%xmm2
2262 paddd %xmm0,%xmm1
2263 pcmpeqd %xmm5,%xmm0
2264 movdqa %xmm3,-80(%rax)
2265 movdqa %xmm4,%xmm3
2266
2267 paddd %xmm1,%xmm2
2268 pcmpeqd %xmm5,%xmm1
2269 movdqa %xmm0,-64(%rax)
2270 movdqa %xmm4,%xmm0
2271
2272 paddd %xmm2,%xmm3
2273 pcmpeqd %xmm5,%xmm2
2274 movdqa %xmm1,-48(%rax)
2275 movdqa %xmm4,%xmm1
2276
2277 paddd %xmm3,%xmm0
2278 pcmpeqd %xmm5,%xmm3
2279 movdqa %xmm2,-32(%rax)
2280 movdqa %xmm4,%xmm2
2281 paddd %xmm0,%xmm1
2282 pcmpeqd %xmm5,%xmm0
2283 movdqa %xmm3,-16(%rax)
2284 movdqa %xmm4,%xmm3
2285
2286 paddd %xmm1,%xmm2
2287 pcmpeqd %xmm5,%xmm1
2288 movdqa %xmm0,0(%rax)
2289 movdqa %xmm4,%xmm0
2290
2291 paddd %xmm2,%xmm3
2292 pcmpeqd %xmm5,%xmm2
2293 movdqa %xmm1,16(%rax)
2294 movdqa %xmm4,%xmm1
2295
2296 paddd %xmm3,%xmm0
2297 pcmpeqd %xmm5,%xmm3
2298 movdqa %xmm2,32(%rax)
2299 movdqa %xmm4,%xmm2
2300 paddd %xmm0,%xmm1
2301 pcmpeqd %xmm5,%xmm0
2302 movdqa %xmm3,48(%rax)
2303 movdqa %xmm4,%xmm3
2304
2305 paddd %xmm1,%xmm2
2306 pcmpeqd %xmm5,%xmm1
2307 movdqa %xmm0,64(%rax)
2308 movdqa %xmm4,%xmm0
2309
2310 paddd %xmm2,%xmm3
2311 pcmpeqd %xmm5,%xmm2
2312 movdqa %xmm1,80(%rax)
2313 movdqa %xmm4,%xmm1
2314
2315 paddd %xmm3,%xmm0
2316 pcmpeqd %xmm5,%xmm3
2317 movdqa %xmm2,96(%rax)
2318 movdqa %xmm4,%xmm2
2319 movdqa %xmm3,112(%rax)
2320 jmp L$gather
2321
2322.p2align 5
2323L$gather:
2324 pxor %xmm4,%xmm4
2325 pxor %xmm5,%xmm5
2326 movdqa -128(%r11),%xmm0
2327 movdqa -112(%r11),%xmm1
2328 movdqa -96(%r11),%xmm2
2329 pand -128(%rax),%xmm0
2330 movdqa -80(%r11),%xmm3
2331 pand -112(%rax),%xmm1
2332 por %xmm0,%xmm4
2333 pand -96(%rax),%xmm2
2334 por %xmm1,%xmm5
2335 pand -80(%rax),%xmm3
2336 por %xmm2,%xmm4
2337 por %xmm3,%xmm5
2338 movdqa -64(%r11),%xmm0
2339 movdqa -48(%r11),%xmm1
2340 movdqa -32(%r11),%xmm2
2341 pand -64(%rax),%xmm0
2342 movdqa -16(%r11),%xmm3
2343 pand -48(%rax),%xmm1
2344 por %xmm0,%xmm4
2345 pand -32(%rax),%xmm2
2346 por %xmm1,%xmm5
2347 pand -16(%rax),%xmm3
2348 por %xmm2,%xmm4
2349 por %xmm3,%xmm5
2350 movdqa 0(%r11),%xmm0
2351 movdqa 16(%r11),%xmm1
2352 movdqa 32(%r11),%xmm2
2353 pand 0(%rax),%xmm0
2354 movdqa 48(%r11),%xmm3
2355 pand 16(%rax),%xmm1
2356 por %xmm0,%xmm4
2357 pand 32(%rax),%xmm2
2358 por %xmm1,%xmm5
2359 pand 48(%rax),%xmm3
2360 por %xmm2,%xmm4
2361 por %xmm3,%xmm5
2362 movdqa 64(%r11),%xmm0
2363 movdqa 80(%r11),%xmm1
2364 movdqa 96(%r11),%xmm2
2365 pand 64(%rax),%xmm0
2366 movdqa 112(%r11),%xmm3
2367 pand 80(%rax),%xmm1
2368 por %xmm0,%xmm4
2369 pand 96(%rax),%xmm2
2370 por %xmm1,%xmm5
2371 pand 112(%rax),%xmm3
2372 por %xmm2,%xmm4
2373 por %xmm3,%xmm5
2374 por %xmm5,%xmm4
2375 leaq 256(%r11),%r11
2376 pshufd $0x4e,%xmm4,%xmm0
2377 por %xmm4,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002378 movq %xmm0,(%rdi)
2379 leaq 8(%rdi),%rdi
2380 subl $1,%esi
2381 jnz L$gather
David Benjamin4969cc92016-04-22 15:02:23 -04002382
2383 leaq (%r10),%rsp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002384 .byte 0xf3,0xc3
2385L$SEH_end_bn_gather5:
2386
2387.p2align 6
David Benjamin4969cc92016-04-22 15:02:23 -04002388L$inc:
2389.long 0,0, 1,1
2390.long 2,2, 2,2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002391.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2392#endif