blob: bc324888b6400a879facb28146a30f885dd2a540 [file] [log] [blame]
Robert Sloanc9abfe42018-11-26 12:19:07 -08001# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
David Benjamin4969cc92016-04-22 15:02:23 -04004#if defined(__i386__)
Robert Sloan6e8c9592018-12-03 11:20:49 -08005#if defined(BORINGSSL_PREFIX)
6#include <boringssl_prefix_symbols_asm.h>
7#endif
David Benjamin4969cc92016-04-22 15:02:23 -04008.text
9.globl _ChaCha20_ctr32
10.private_extern _ChaCha20_ctr32
11.align 4
12_ChaCha20_ctr32:
13L_ChaCha20_ctr32_begin:
14 pushl %ebp
15 pushl %ebx
16 pushl %esi
17 pushl %edi
18 xorl %eax,%eax
19 cmpl 28(%esp),%eax
20 je L000no_data
21 call Lpic_point
22Lpic_point:
23 popl %eax
24 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
25 testl $16777216,(%ebp)
26 jz L001x86
27 testl $512,4(%ebp)
28 jz L001x86
29 jmp Lssse3_shortcut
30L001x86:
31 movl 32(%esp),%esi
32 movl 36(%esp),%edi
33 subl $132,%esp
34 movl (%esi),%eax
35 movl 4(%esi),%ebx
36 movl 8(%esi),%ecx
37 movl 12(%esi),%edx
38 movl %eax,80(%esp)
39 movl %ebx,84(%esp)
40 movl %ecx,88(%esp)
41 movl %edx,92(%esp)
42 movl 16(%esi),%eax
43 movl 20(%esi),%ebx
44 movl 24(%esi),%ecx
45 movl 28(%esi),%edx
46 movl %eax,96(%esp)
47 movl %ebx,100(%esp)
48 movl %ecx,104(%esp)
49 movl %edx,108(%esp)
50 movl (%edi),%eax
51 movl 4(%edi),%ebx
52 movl 8(%edi),%ecx
53 movl 12(%edi),%edx
54 subl $1,%eax
55 movl %eax,112(%esp)
56 movl %ebx,116(%esp)
57 movl %ecx,120(%esp)
58 movl %edx,124(%esp)
59 jmp L002entry
60.align 4,0x90
61L003outer_loop:
62 movl %ebx,156(%esp)
63 movl %eax,152(%esp)
64 movl %ecx,160(%esp)
65L002entry:
66 movl $1634760805,%eax
67 movl $857760878,4(%esp)
68 movl $2036477234,8(%esp)
69 movl $1797285236,12(%esp)
70 movl 84(%esp),%ebx
71 movl 88(%esp),%ebp
72 movl 104(%esp),%ecx
73 movl 108(%esp),%esi
74 movl 116(%esp),%edx
75 movl 120(%esp),%edi
76 movl %ebx,20(%esp)
77 movl %ebp,24(%esp)
78 movl %ecx,40(%esp)
79 movl %esi,44(%esp)
80 movl %edx,52(%esp)
81 movl %edi,56(%esp)
82 movl 92(%esp),%ebx
83 movl 124(%esp),%edi
84 movl 112(%esp),%edx
85 movl 80(%esp),%ebp
86 movl 96(%esp),%ecx
87 movl 100(%esp),%esi
88 addl $1,%edx
89 movl %ebx,28(%esp)
90 movl %edi,60(%esp)
91 movl %edx,112(%esp)
92 movl $10,%ebx
93 jmp L004loop
94.align 4,0x90
95L004loop:
96 addl %ebp,%eax
97 movl %ebx,128(%esp)
98 movl %ebp,%ebx
99 xorl %eax,%edx
100 roll $16,%edx
101 addl %edx,%ecx
102 xorl %ecx,%ebx
103 movl 52(%esp),%edi
104 roll $12,%ebx
105 movl 20(%esp),%ebp
106 addl %ebx,%eax
107 xorl %eax,%edx
108 movl %eax,(%esp)
109 roll $8,%edx
110 movl 4(%esp),%eax
111 addl %edx,%ecx
112 movl %edx,48(%esp)
113 xorl %ecx,%ebx
114 addl %ebp,%eax
115 roll $7,%ebx
116 xorl %eax,%edi
117 movl %ecx,32(%esp)
118 roll $16,%edi
119 movl %ebx,16(%esp)
120 addl %edi,%esi
121 movl 40(%esp),%ecx
122 xorl %esi,%ebp
123 movl 56(%esp),%edx
124 roll $12,%ebp
125 movl 24(%esp),%ebx
126 addl %ebp,%eax
127 xorl %eax,%edi
128 movl %eax,4(%esp)
129 roll $8,%edi
130 movl 8(%esp),%eax
131 addl %edi,%esi
132 movl %edi,52(%esp)
133 xorl %esi,%ebp
134 addl %ebx,%eax
135 roll $7,%ebp
136 xorl %eax,%edx
137 movl %esi,36(%esp)
138 roll $16,%edx
139 movl %ebp,20(%esp)
140 addl %edx,%ecx
141 movl 44(%esp),%esi
142 xorl %ecx,%ebx
143 movl 60(%esp),%edi
144 roll $12,%ebx
145 movl 28(%esp),%ebp
146 addl %ebx,%eax
147 xorl %eax,%edx
148 movl %eax,8(%esp)
149 roll $8,%edx
150 movl 12(%esp),%eax
151 addl %edx,%ecx
152 movl %edx,56(%esp)
153 xorl %ecx,%ebx
154 addl %ebp,%eax
155 roll $7,%ebx
156 xorl %eax,%edi
157 roll $16,%edi
158 movl %ebx,24(%esp)
159 addl %edi,%esi
160 xorl %esi,%ebp
161 roll $12,%ebp
162 movl 20(%esp),%ebx
163 addl %ebp,%eax
164 xorl %eax,%edi
165 movl %eax,12(%esp)
166 roll $8,%edi
167 movl (%esp),%eax
168 addl %edi,%esi
169 movl %edi,%edx
170 xorl %esi,%ebp
171 addl %ebx,%eax
172 roll $7,%ebp
173 xorl %eax,%edx
174 roll $16,%edx
175 movl %ebp,28(%esp)
176 addl %edx,%ecx
177 xorl %ecx,%ebx
178 movl 48(%esp),%edi
179 roll $12,%ebx
180 movl 24(%esp),%ebp
181 addl %ebx,%eax
182 xorl %eax,%edx
183 movl %eax,(%esp)
184 roll $8,%edx
185 movl 4(%esp),%eax
186 addl %edx,%ecx
187 movl %edx,60(%esp)
188 xorl %ecx,%ebx
189 addl %ebp,%eax
190 roll $7,%ebx
191 xorl %eax,%edi
192 movl %ecx,40(%esp)
193 roll $16,%edi
194 movl %ebx,20(%esp)
195 addl %edi,%esi
196 movl 32(%esp),%ecx
197 xorl %esi,%ebp
198 movl 52(%esp),%edx
199 roll $12,%ebp
200 movl 28(%esp),%ebx
201 addl %ebp,%eax
202 xorl %eax,%edi
203 movl %eax,4(%esp)
204 roll $8,%edi
205 movl 8(%esp),%eax
206 addl %edi,%esi
207 movl %edi,48(%esp)
208 xorl %esi,%ebp
209 addl %ebx,%eax
210 roll $7,%ebp
211 xorl %eax,%edx
212 movl %esi,44(%esp)
213 roll $16,%edx
214 movl %ebp,24(%esp)
215 addl %edx,%ecx
216 movl 36(%esp),%esi
217 xorl %ecx,%ebx
218 movl 56(%esp),%edi
219 roll $12,%ebx
220 movl 16(%esp),%ebp
221 addl %ebx,%eax
222 xorl %eax,%edx
223 movl %eax,8(%esp)
224 roll $8,%edx
225 movl 12(%esp),%eax
226 addl %edx,%ecx
227 movl %edx,52(%esp)
228 xorl %ecx,%ebx
229 addl %ebp,%eax
230 roll $7,%ebx
231 xorl %eax,%edi
232 roll $16,%edi
233 movl %ebx,28(%esp)
234 addl %edi,%esi
235 xorl %esi,%ebp
236 movl 48(%esp),%edx
237 roll $12,%ebp
238 movl 128(%esp),%ebx
239 addl %ebp,%eax
240 xorl %eax,%edi
241 movl %eax,12(%esp)
242 roll $8,%edi
243 movl (%esp),%eax
244 addl %edi,%esi
245 movl %edi,56(%esp)
246 xorl %esi,%ebp
247 roll $7,%ebp
248 decl %ebx
249 jnz L004loop
250 movl 160(%esp),%ebx
251 addl $1634760805,%eax
252 addl 80(%esp),%ebp
253 addl 96(%esp),%ecx
254 addl 100(%esp),%esi
255 cmpl $64,%ebx
256 jb L005tail
257 movl 156(%esp),%ebx
258 addl 112(%esp),%edx
259 addl 120(%esp),%edi
260 xorl (%ebx),%eax
261 xorl 16(%ebx),%ebp
262 movl %eax,(%esp)
263 movl 152(%esp),%eax
264 xorl 32(%ebx),%ecx
265 xorl 36(%ebx),%esi
266 xorl 48(%ebx),%edx
267 xorl 56(%ebx),%edi
David Benjamin6e899c72016-06-09 18:02:18 -0400268 movl %ebp,16(%eax)
269 movl %ecx,32(%eax)
270 movl %esi,36(%eax)
271 movl %edx,48(%eax)
272 movl %edi,56(%eax)
David Benjamin4969cc92016-04-22 15:02:23 -0400273 movl 4(%esp),%ebp
274 movl 8(%esp),%ecx
275 movl 12(%esp),%esi
276 movl 20(%esp),%edx
277 movl 24(%esp),%edi
278 addl $857760878,%ebp
279 addl $2036477234,%ecx
280 addl $1797285236,%esi
281 addl 84(%esp),%edx
282 addl 88(%esp),%edi
283 xorl 4(%ebx),%ebp
284 xorl 8(%ebx),%ecx
285 xorl 12(%ebx),%esi
286 xorl 20(%ebx),%edx
287 xorl 24(%ebx),%edi
288 movl %ebp,4(%eax)
David Benjamin4969cc92016-04-22 15:02:23 -0400289 movl %ecx,8(%eax)
290 movl %esi,12(%eax)
David Benjamin4969cc92016-04-22 15:02:23 -0400291 movl %edx,20(%eax)
292 movl %edi,24(%eax)
David Benjamin6e899c72016-06-09 18:02:18 -0400293 movl 28(%esp),%ebp
294 movl 40(%esp),%ecx
David Benjamin4969cc92016-04-22 15:02:23 -0400295 movl 44(%esp),%esi
David Benjamin4969cc92016-04-22 15:02:23 -0400296 movl 52(%esp),%edx
297 movl 60(%esp),%edi
David Benjamin6e899c72016-06-09 18:02:18 -0400298 addl 92(%esp),%ebp
299 addl 104(%esp),%ecx
300 addl 108(%esp),%esi
David Benjamin4969cc92016-04-22 15:02:23 -0400301 addl 116(%esp),%edx
302 addl 124(%esp),%edi
David Benjamin6e899c72016-06-09 18:02:18 -0400303 xorl 28(%ebx),%ebp
304 xorl 40(%ebx),%ecx
305 xorl 44(%ebx),%esi
David Benjamin4969cc92016-04-22 15:02:23 -0400306 xorl 52(%ebx),%edx
307 xorl 60(%ebx),%edi
308 leal 64(%ebx),%ebx
David Benjamin6e899c72016-06-09 18:02:18 -0400309 movl %ebp,28(%eax)
310 movl (%esp),%ebp
311 movl %ecx,40(%eax)
David Benjamin4969cc92016-04-22 15:02:23 -0400312 movl 160(%esp),%ecx
David Benjamin6e899c72016-06-09 18:02:18 -0400313 movl %esi,44(%eax)
David Benjamin4969cc92016-04-22 15:02:23 -0400314 movl %edx,52(%eax)
David Benjamin4969cc92016-04-22 15:02:23 -0400315 movl %edi,60(%eax)
David Benjamin6e899c72016-06-09 18:02:18 -0400316 movl %ebp,(%eax)
David Benjamin4969cc92016-04-22 15:02:23 -0400317 leal 64(%eax),%eax
318 subl $64,%ecx
319 jnz L003outer_loop
320 jmp L006done
321L005tail:
322 addl 112(%esp),%edx
323 addl 120(%esp),%edi
324 movl %eax,(%esp)
325 movl %ebp,16(%esp)
326 movl %ecx,32(%esp)
327 movl %esi,36(%esp)
328 movl %edx,48(%esp)
329 movl %edi,56(%esp)
330 movl 4(%esp),%ebp
331 movl 8(%esp),%ecx
332 movl 12(%esp),%esi
333 movl 20(%esp),%edx
334 movl 24(%esp),%edi
335 addl $857760878,%ebp
336 addl $2036477234,%ecx
337 addl $1797285236,%esi
338 addl 84(%esp),%edx
339 addl 88(%esp),%edi
340 movl %ebp,4(%esp)
341 movl %ecx,8(%esp)
342 movl %esi,12(%esp)
343 movl %edx,20(%esp)
344 movl %edi,24(%esp)
345 movl 28(%esp),%ebp
346 movl 40(%esp),%ecx
347 movl 44(%esp),%esi
348 movl 52(%esp),%edx
349 movl 60(%esp),%edi
350 addl 92(%esp),%ebp
351 addl 104(%esp),%ecx
352 addl 108(%esp),%esi
353 addl 116(%esp),%edx
354 addl 124(%esp),%edi
355 movl %ebp,28(%esp)
356 movl 156(%esp),%ebp
357 movl %ecx,40(%esp)
358 movl 152(%esp),%ecx
359 movl %esi,44(%esp)
360 xorl %esi,%esi
361 movl %edx,52(%esp)
362 movl %edi,60(%esp)
363 xorl %eax,%eax
364 xorl %edx,%edx
365L007tail_loop:
366 movb (%esi,%ebp,1),%al
367 movb (%esp,%esi,1),%dl
368 leal 1(%esi),%esi
369 xorb %dl,%al
370 movb %al,-1(%ecx,%esi,1)
371 decl %ebx
372 jnz L007tail_loop
373L006done:
374 addl $132,%esp
375L000no_data:
376 popl %edi
377 popl %esi
378 popl %ebx
379 popl %ebp
380 ret
381.globl _ChaCha20_ssse3
382.private_extern _ChaCha20_ssse3
383.align 4
384_ChaCha20_ssse3:
385L_ChaCha20_ssse3_begin:
386 pushl %ebp
387 pushl %ebx
388 pushl %esi
389 pushl %edi
390Lssse3_shortcut:
391 movl 20(%esp),%edi
392 movl 24(%esp),%esi
393 movl 28(%esp),%ecx
394 movl 32(%esp),%edx
395 movl 36(%esp),%ebx
396 movl %esp,%ebp
397 subl $524,%esp
398 andl $-64,%esp
399 movl %ebp,512(%esp)
400 leal Lssse3_data-Lpic_point(%eax),%eax
401 movdqu (%ebx),%xmm3
402 cmpl $256,%ecx
403 jb L0081x
404 movl %edx,516(%esp)
405 movl %ebx,520(%esp)
406 subl $256,%ecx
407 leal 384(%esp),%ebp
408 movdqu (%edx),%xmm7
409 pshufd $0,%xmm3,%xmm0
410 pshufd $85,%xmm3,%xmm1
411 pshufd $170,%xmm3,%xmm2
412 pshufd $255,%xmm3,%xmm3
413 paddd 48(%eax),%xmm0
414 pshufd $0,%xmm7,%xmm4
415 pshufd $85,%xmm7,%xmm5
416 psubd 64(%eax),%xmm0
417 pshufd $170,%xmm7,%xmm6
418 pshufd $255,%xmm7,%xmm7
419 movdqa %xmm0,64(%ebp)
420 movdqa %xmm1,80(%ebp)
421 movdqa %xmm2,96(%ebp)
422 movdqa %xmm3,112(%ebp)
423 movdqu 16(%edx),%xmm3
424 movdqa %xmm4,-64(%ebp)
425 movdqa %xmm5,-48(%ebp)
426 movdqa %xmm6,-32(%ebp)
427 movdqa %xmm7,-16(%ebp)
428 movdqa 32(%eax),%xmm7
429 leal 128(%esp),%ebx
430 pshufd $0,%xmm3,%xmm0
431 pshufd $85,%xmm3,%xmm1
432 pshufd $170,%xmm3,%xmm2
433 pshufd $255,%xmm3,%xmm3
434 pshufd $0,%xmm7,%xmm4
435 pshufd $85,%xmm7,%xmm5
436 pshufd $170,%xmm7,%xmm6
437 pshufd $255,%xmm7,%xmm7
438 movdqa %xmm0,(%ebp)
439 movdqa %xmm1,16(%ebp)
440 movdqa %xmm2,32(%ebp)
441 movdqa %xmm3,48(%ebp)
442 movdqa %xmm4,-128(%ebp)
443 movdqa %xmm5,-112(%ebp)
444 movdqa %xmm6,-96(%ebp)
445 movdqa %xmm7,-80(%ebp)
446 leal 128(%esi),%esi
447 leal 128(%edi),%edi
448 jmp L009outer_loop
449.align 4,0x90
450L009outer_loop:
451 movdqa -112(%ebp),%xmm1
452 movdqa -96(%ebp),%xmm2
453 movdqa -80(%ebp),%xmm3
454 movdqa -48(%ebp),%xmm5
455 movdqa -32(%ebp),%xmm6
456 movdqa -16(%ebp),%xmm7
457 movdqa %xmm1,-112(%ebx)
458 movdqa %xmm2,-96(%ebx)
459 movdqa %xmm3,-80(%ebx)
460 movdqa %xmm5,-48(%ebx)
461 movdqa %xmm6,-32(%ebx)
462 movdqa %xmm7,-16(%ebx)
463 movdqa 32(%ebp),%xmm2
464 movdqa 48(%ebp),%xmm3
465 movdqa 64(%ebp),%xmm4
466 movdqa 80(%ebp),%xmm5
467 movdqa 96(%ebp),%xmm6
468 movdqa 112(%ebp),%xmm7
469 paddd 64(%eax),%xmm4
470 movdqa %xmm2,32(%ebx)
471 movdqa %xmm3,48(%ebx)
472 movdqa %xmm4,64(%ebx)
473 movdqa %xmm5,80(%ebx)
474 movdqa %xmm6,96(%ebx)
475 movdqa %xmm7,112(%ebx)
476 movdqa %xmm4,64(%ebp)
477 movdqa -128(%ebp),%xmm0
478 movdqa %xmm4,%xmm6
479 movdqa -64(%ebp),%xmm3
480 movdqa (%ebp),%xmm4
481 movdqa 16(%ebp),%xmm5
482 movl $10,%edx
483 nop
484.align 4,0x90
485L010loop:
486 paddd %xmm3,%xmm0
487 movdqa %xmm3,%xmm2
488 pxor %xmm0,%xmm6
489 pshufb (%eax),%xmm6
490 paddd %xmm6,%xmm4
491 pxor %xmm4,%xmm2
492 movdqa -48(%ebx),%xmm3
493 movdqa %xmm2,%xmm1
494 pslld $12,%xmm2
495 psrld $20,%xmm1
496 por %xmm1,%xmm2
497 movdqa -112(%ebx),%xmm1
498 paddd %xmm2,%xmm0
499 movdqa 80(%ebx),%xmm7
500 pxor %xmm0,%xmm6
501 movdqa %xmm0,-128(%ebx)
502 pshufb 16(%eax),%xmm6
503 paddd %xmm6,%xmm4
504 movdqa %xmm6,64(%ebx)
505 pxor %xmm4,%xmm2
506 paddd %xmm3,%xmm1
507 movdqa %xmm2,%xmm0
508 pslld $7,%xmm2
509 psrld $25,%xmm0
510 pxor %xmm1,%xmm7
511 por %xmm0,%xmm2
512 movdqa %xmm4,(%ebx)
513 pshufb (%eax),%xmm7
514 movdqa %xmm2,-64(%ebx)
515 paddd %xmm7,%xmm5
516 movdqa 32(%ebx),%xmm4
517 pxor %xmm5,%xmm3
518 movdqa -32(%ebx),%xmm2
519 movdqa %xmm3,%xmm0
520 pslld $12,%xmm3
521 psrld $20,%xmm0
522 por %xmm0,%xmm3
523 movdqa -96(%ebx),%xmm0
524 paddd %xmm3,%xmm1
525 movdqa 96(%ebx),%xmm6
526 pxor %xmm1,%xmm7
527 movdqa %xmm1,-112(%ebx)
528 pshufb 16(%eax),%xmm7
529 paddd %xmm7,%xmm5
530 movdqa %xmm7,80(%ebx)
531 pxor %xmm5,%xmm3
532 paddd %xmm2,%xmm0
533 movdqa %xmm3,%xmm1
534 pslld $7,%xmm3
535 psrld $25,%xmm1
536 pxor %xmm0,%xmm6
537 por %xmm1,%xmm3
538 movdqa %xmm5,16(%ebx)
539 pshufb (%eax),%xmm6
540 movdqa %xmm3,-48(%ebx)
541 paddd %xmm6,%xmm4
542 movdqa 48(%ebx),%xmm5
543 pxor %xmm4,%xmm2
544 movdqa -16(%ebx),%xmm3
545 movdqa %xmm2,%xmm1
546 pslld $12,%xmm2
547 psrld $20,%xmm1
548 por %xmm1,%xmm2
549 movdqa -80(%ebx),%xmm1
550 paddd %xmm2,%xmm0
551 movdqa 112(%ebx),%xmm7
552 pxor %xmm0,%xmm6
553 movdqa %xmm0,-96(%ebx)
554 pshufb 16(%eax),%xmm6
555 paddd %xmm6,%xmm4
556 movdqa %xmm6,96(%ebx)
557 pxor %xmm4,%xmm2
558 paddd %xmm3,%xmm1
559 movdqa %xmm2,%xmm0
560 pslld $7,%xmm2
561 psrld $25,%xmm0
562 pxor %xmm1,%xmm7
563 por %xmm0,%xmm2
564 pshufb (%eax),%xmm7
565 movdqa %xmm2,-32(%ebx)
566 paddd %xmm7,%xmm5
567 pxor %xmm5,%xmm3
568 movdqa -48(%ebx),%xmm2
569 movdqa %xmm3,%xmm0
570 pslld $12,%xmm3
571 psrld $20,%xmm0
572 por %xmm0,%xmm3
573 movdqa -128(%ebx),%xmm0
574 paddd %xmm3,%xmm1
575 pxor %xmm1,%xmm7
576 movdqa %xmm1,-80(%ebx)
577 pshufb 16(%eax),%xmm7
578 paddd %xmm7,%xmm5
579 movdqa %xmm7,%xmm6
580 pxor %xmm5,%xmm3
581 paddd %xmm2,%xmm0
582 movdqa %xmm3,%xmm1
583 pslld $7,%xmm3
584 psrld $25,%xmm1
585 pxor %xmm0,%xmm6
586 por %xmm1,%xmm3
587 pshufb (%eax),%xmm6
588 movdqa %xmm3,-16(%ebx)
589 paddd %xmm6,%xmm4
590 pxor %xmm4,%xmm2
591 movdqa -32(%ebx),%xmm3
592 movdqa %xmm2,%xmm1
593 pslld $12,%xmm2
594 psrld $20,%xmm1
595 por %xmm1,%xmm2
596 movdqa -112(%ebx),%xmm1
597 paddd %xmm2,%xmm0
598 movdqa 64(%ebx),%xmm7
599 pxor %xmm0,%xmm6
600 movdqa %xmm0,-128(%ebx)
601 pshufb 16(%eax),%xmm6
602 paddd %xmm6,%xmm4
603 movdqa %xmm6,112(%ebx)
604 pxor %xmm4,%xmm2
605 paddd %xmm3,%xmm1
606 movdqa %xmm2,%xmm0
607 pslld $7,%xmm2
608 psrld $25,%xmm0
609 pxor %xmm1,%xmm7
610 por %xmm0,%xmm2
611 movdqa %xmm4,32(%ebx)
612 pshufb (%eax),%xmm7
613 movdqa %xmm2,-48(%ebx)
614 paddd %xmm7,%xmm5
615 movdqa (%ebx),%xmm4
616 pxor %xmm5,%xmm3
617 movdqa -16(%ebx),%xmm2
618 movdqa %xmm3,%xmm0
619 pslld $12,%xmm3
620 psrld $20,%xmm0
621 por %xmm0,%xmm3
622 movdqa -96(%ebx),%xmm0
623 paddd %xmm3,%xmm1
624 movdqa 80(%ebx),%xmm6
625 pxor %xmm1,%xmm7
626 movdqa %xmm1,-112(%ebx)
627 pshufb 16(%eax),%xmm7
628 paddd %xmm7,%xmm5
629 movdqa %xmm7,64(%ebx)
630 pxor %xmm5,%xmm3
631 paddd %xmm2,%xmm0
632 movdqa %xmm3,%xmm1
633 pslld $7,%xmm3
634 psrld $25,%xmm1
635 pxor %xmm0,%xmm6
636 por %xmm1,%xmm3
637 movdqa %xmm5,48(%ebx)
638 pshufb (%eax),%xmm6
639 movdqa %xmm3,-32(%ebx)
640 paddd %xmm6,%xmm4
641 movdqa 16(%ebx),%xmm5
642 pxor %xmm4,%xmm2
643 movdqa -64(%ebx),%xmm3
644 movdqa %xmm2,%xmm1
645 pslld $12,%xmm2
646 psrld $20,%xmm1
647 por %xmm1,%xmm2
648 movdqa -80(%ebx),%xmm1
649 paddd %xmm2,%xmm0
650 movdqa 96(%ebx),%xmm7
651 pxor %xmm0,%xmm6
652 movdqa %xmm0,-96(%ebx)
653 pshufb 16(%eax),%xmm6
654 paddd %xmm6,%xmm4
655 movdqa %xmm6,80(%ebx)
656 pxor %xmm4,%xmm2
657 paddd %xmm3,%xmm1
658 movdqa %xmm2,%xmm0
659 pslld $7,%xmm2
660 psrld $25,%xmm0
661 pxor %xmm1,%xmm7
662 por %xmm0,%xmm2
663 pshufb (%eax),%xmm7
664 movdqa %xmm2,-16(%ebx)
665 paddd %xmm7,%xmm5
666 pxor %xmm5,%xmm3
667 movdqa %xmm3,%xmm0
668 pslld $12,%xmm3
669 psrld $20,%xmm0
670 por %xmm0,%xmm3
671 movdqa -128(%ebx),%xmm0
672 paddd %xmm3,%xmm1
673 movdqa 64(%ebx),%xmm6
674 pxor %xmm1,%xmm7
675 movdqa %xmm1,-80(%ebx)
676 pshufb 16(%eax),%xmm7
677 paddd %xmm7,%xmm5
678 movdqa %xmm7,96(%ebx)
679 pxor %xmm5,%xmm3
680 movdqa %xmm3,%xmm1
681 pslld $7,%xmm3
682 psrld $25,%xmm1
683 por %xmm1,%xmm3
684 decl %edx
685 jnz L010loop
686 movdqa %xmm3,-64(%ebx)
687 movdqa %xmm4,(%ebx)
688 movdqa %xmm5,16(%ebx)
689 movdqa %xmm6,64(%ebx)
690 movdqa %xmm7,96(%ebx)
691 movdqa -112(%ebx),%xmm1
692 movdqa -96(%ebx),%xmm2
693 movdqa -80(%ebx),%xmm3
694 paddd -128(%ebp),%xmm0
695 paddd -112(%ebp),%xmm1
696 paddd -96(%ebp),%xmm2
697 paddd -80(%ebp),%xmm3
698 movdqa %xmm0,%xmm6
699 punpckldq %xmm1,%xmm0
700 movdqa %xmm2,%xmm7
701 punpckldq %xmm3,%xmm2
702 punpckhdq %xmm1,%xmm6
703 punpckhdq %xmm3,%xmm7
704 movdqa %xmm0,%xmm1
705 punpcklqdq %xmm2,%xmm0
706 movdqa %xmm6,%xmm3
707 punpcklqdq %xmm7,%xmm6
708 punpckhqdq %xmm2,%xmm1
709 punpckhqdq %xmm7,%xmm3
David Benjamin6e899c72016-06-09 18:02:18 -0400710 movdqu -128(%esi),%xmm4
711 movdqu -64(%esi),%xmm5
712 movdqu (%esi),%xmm2
713 movdqu 64(%esi),%xmm7
714 leal 16(%esi),%esi
715 pxor %xmm0,%xmm4
David Benjamin4969cc92016-04-22 15:02:23 -0400716 movdqa -64(%ebx),%xmm0
David Benjamin6e899c72016-06-09 18:02:18 -0400717 pxor %xmm1,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -0400718 movdqa -48(%ebx),%xmm1
David Benjamin6e899c72016-06-09 18:02:18 -0400719 pxor %xmm2,%xmm6
David Benjamin4969cc92016-04-22 15:02:23 -0400720 movdqa -32(%ebx),%xmm2
David Benjamin6e899c72016-06-09 18:02:18 -0400721 pxor %xmm3,%xmm7
David Benjamin4969cc92016-04-22 15:02:23 -0400722 movdqa -16(%ebx),%xmm3
David Benjamin6e899c72016-06-09 18:02:18 -0400723 movdqu %xmm4,-128(%edi)
724 movdqu %xmm5,-64(%edi)
725 movdqu %xmm6,(%edi)
726 movdqu %xmm7,64(%edi)
727 leal 16(%edi),%edi
David Benjamin4969cc92016-04-22 15:02:23 -0400728 paddd -64(%ebp),%xmm0
729 paddd -48(%ebp),%xmm1
730 paddd -32(%ebp),%xmm2
731 paddd -16(%ebp),%xmm3
732 movdqa %xmm0,%xmm6
733 punpckldq %xmm1,%xmm0
734 movdqa %xmm2,%xmm7
735 punpckldq %xmm3,%xmm2
736 punpckhdq %xmm1,%xmm6
737 punpckhdq %xmm3,%xmm7
738 movdqa %xmm0,%xmm1
739 punpcklqdq %xmm2,%xmm0
740 movdqa %xmm6,%xmm3
741 punpcklqdq %xmm7,%xmm6
742 punpckhqdq %xmm2,%xmm1
743 punpckhqdq %xmm7,%xmm3
David Benjamin6e899c72016-06-09 18:02:18 -0400744 movdqu -128(%esi),%xmm4
745 movdqu -64(%esi),%xmm5
746 movdqu (%esi),%xmm2
747 movdqu 64(%esi),%xmm7
748 leal 16(%esi),%esi
749 pxor %xmm0,%xmm4
David Benjamin4969cc92016-04-22 15:02:23 -0400750 movdqa (%ebx),%xmm0
David Benjamin6e899c72016-06-09 18:02:18 -0400751 pxor %xmm1,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -0400752 movdqa 16(%ebx),%xmm1
David Benjamin6e899c72016-06-09 18:02:18 -0400753 pxor %xmm2,%xmm6
David Benjamin4969cc92016-04-22 15:02:23 -0400754 movdqa 32(%ebx),%xmm2
David Benjamin6e899c72016-06-09 18:02:18 -0400755 pxor %xmm3,%xmm7
David Benjamin4969cc92016-04-22 15:02:23 -0400756 movdqa 48(%ebx),%xmm3
David Benjamin6e899c72016-06-09 18:02:18 -0400757 movdqu %xmm4,-128(%edi)
758 movdqu %xmm5,-64(%edi)
759 movdqu %xmm6,(%edi)
760 movdqu %xmm7,64(%edi)
761 leal 16(%edi),%edi
David Benjamin4969cc92016-04-22 15:02:23 -0400762 paddd (%ebp),%xmm0
763 paddd 16(%ebp),%xmm1
764 paddd 32(%ebp),%xmm2
765 paddd 48(%ebp),%xmm3
766 movdqa %xmm0,%xmm6
767 punpckldq %xmm1,%xmm0
768 movdqa %xmm2,%xmm7
769 punpckldq %xmm3,%xmm2
770 punpckhdq %xmm1,%xmm6
771 punpckhdq %xmm3,%xmm7
772 movdqa %xmm0,%xmm1
773 punpcklqdq %xmm2,%xmm0
774 movdqa %xmm6,%xmm3
775 punpcklqdq %xmm7,%xmm6
776 punpckhqdq %xmm2,%xmm1
777 punpckhqdq %xmm7,%xmm3
David Benjamin6e899c72016-06-09 18:02:18 -0400778 movdqu -128(%esi),%xmm4
779 movdqu -64(%esi),%xmm5
780 movdqu (%esi),%xmm2
781 movdqu 64(%esi),%xmm7
782 leal 16(%esi),%esi
783 pxor %xmm0,%xmm4
David Benjamin4969cc92016-04-22 15:02:23 -0400784 movdqa 64(%ebx),%xmm0
David Benjamin6e899c72016-06-09 18:02:18 -0400785 pxor %xmm1,%xmm5
David Benjamin4969cc92016-04-22 15:02:23 -0400786 movdqa 80(%ebx),%xmm1
David Benjamin6e899c72016-06-09 18:02:18 -0400787 pxor %xmm2,%xmm6
David Benjamin4969cc92016-04-22 15:02:23 -0400788 movdqa 96(%ebx),%xmm2
David Benjamin6e899c72016-06-09 18:02:18 -0400789 pxor %xmm3,%xmm7
David Benjamin4969cc92016-04-22 15:02:23 -0400790 movdqa 112(%ebx),%xmm3
David Benjamin6e899c72016-06-09 18:02:18 -0400791 movdqu %xmm4,-128(%edi)
792 movdqu %xmm5,-64(%edi)
793 movdqu %xmm6,(%edi)
794 movdqu %xmm7,64(%edi)
795 leal 16(%edi),%edi
David Benjamin4969cc92016-04-22 15:02:23 -0400796 paddd 64(%ebp),%xmm0
797 paddd 80(%ebp),%xmm1
798 paddd 96(%ebp),%xmm2
799 paddd 112(%ebp),%xmm3
800 movdqa %xmm0,%xmm6
801 punpckldq %xmm1,%xmm0
802 movdqa %xmm2,%xmm7
803 punpckldq %xmm3,%xmm2
804 punpckhdq %xmm1,%xmm6
805 punpckhdq %xmm3,%xmm7
806 movdqa %xmm0,%xmm1
807 punpcklqdq %xmm2,%xmm0
808 movdqa %xmm6,%xmm3
809 punpcklqdq %xmm7,%xmm6
810 punpckhqdq %xmm2,%xmm1
811 punpckhqdq %xmm7,%xmm3
David Benjamin6e899c72016-06-09 18:02:18 -0400812 movdqu -128(%esi),%xmm4
813 movdqu -64(%esi),%xmm5
814 movdqu (%esi),%xmm2
815 movdqu 64(%esi),%xmm7
816 leal 208(%esi),%esi
817 pxor %xmm0,%xmm4
818 pxor %xmm1,%xmm5
819 pxor %xmm2,%xmm6
820 pxor %xmm3,%xmm7
821 movdqu %xmm4,-128(%edi)
822 movdqu %xmm5,-64(%edi)
823 movdqu %xmm6,(%edi)
824 movdqu %xmm7,64(%edi)
825 leal 208(%edi),%edi
David Benjamin4969cc92016-04-22 15:02:23 -0400826 subl $256,%ecx
827 jnc L009outer_loop
828 addl $256,%ecx
829 jz L011done
830 movl 520(%esp),%ebx
831 leal -128(%esi),%esi
832 movl 516(%esp),%edx
833 leal -128(%edi),%edi
834 movd 64(%ebp),%xmm2
835 movdqu (%ebx),%xmm3
836 paddd 96(%eax),%xmm2
837 pand 112(%eax),%xmm3
838 por %xmm2,%xmm3
839L0081x:
840 movdqa 32(%eax),%xmm0
841 movdqu (%edx),%xmm1
842 movdqu 16(%edx),%xmm2
843 movdqa (%eax),%xmm6
844 movdqa 16(%eax),%xmm7
845 movl %ebp,48(%esp)
846 movdqa %xmm0,(%esp)
847 movdqa %xmm1,16(%esp)
848 movdqa %xmm2,32(%esp)
849 movdqa %xmm3,48(%esp)
850 movl $10,%edx
851 jmp L012loop1x
852.align 4,0x90
853L013outer1x:
854 movdqa 80(%eax),%xmm3
855 movdqa (%esp),%xmm0
856 movdqa 16(%esp),%xmm1
857 movdqa 32(%esp),%xmm2
858 paddd 48(%esp),%xmm3
859 movl $10,%edx
860 movdqa %xmm3,48(%esp)
861 jmp L012loop1x
862.align 4,0x90
863L012loop1x:
864 paddd %xmm1,%xmm0
865 pxor %xmm0,%xmm3
866.byte 102,15,56,0,222
867 paddd %xmm3,%xmm2
868 pxor %xmm2,%xmm1
869 movdqa %xmm1,%xmm4
870 psrld $20,%xmm1
871 pslld $12,%xmm4
872 por %xmm4,%xmm1
873 paddd %xmm1,%xmm0
874 pxor %xmm0,%xmm3
875.byte 102,15,56,0,223
876 paddd %xmm3,%xmm2
877 pxor %xmm2,%xmm1
878 movdqa %xmm1,%xmm4
879 psrld $25,%xmm1
880 pslld $7,%xmm4
881 por %xmm4,%xmm1
882 pshufd $78,%xmm2,%xmm2
883 pshufd $57,%xmm1,%xmm1
884 pshufd $147,%xmm3,%xmm3
885 nop
886 paddd %xmm1,%xmm0
887 pxor %xmm0,%xmm3
888.byte 102,15,56,0,222
889 paddd %xmm3,%xmm2
890 pxor %xmm2,%xmm1
891 movdqa %xmm1,%xmm4
892 psrld $20,%xmm1
893 pslld $12,%xmm4
894 por %xmm4,%xmm1
895 paddd %xmm1,%xmm0
896 pxor %xmm0,%xmm3
897.byte 102,15,56,0,223
898 paddd %xmm3,%xmm2
899 pxor %xmm2,%xmm1
900 movdqa %xmm1,%xmm4
901 psrld $25,%xmm1
902 pslld $7,%xmm4
903 por %xmm4,%xmm1
904 pshufd $78,%xmm2,%xmm2
905 pshufd $147,%xmm1,%xmm1
906 pshufd $57,%xmm3,%xmm3
907 decl %edx
908 jnz L012loop1x
909 paddd (%esp),%xmm0
910 paddd 16(%esp),%xmm1
911 paddd 32(%esp),%xmm2
912 paddd 48(%esp),%xmm3
913 cmpl $64,%ecx
914 jb L014tail
915 movdqu (%esi),%xmm4
916 movdqu 16(%esi),%xmm5
917 pxor %xmm4,%xmm0
918 movdqu 32(%esi),%xmm4
919 pxor %xmm5,%xmm1
920 movdqu 48(%esi),%xmm5
921 pxor %xmm4,%xmm2
922 pxor %xmm5,%xmm3
923 leal 64(%esi),%esi
924 movdqu %xmm0,(%edi)
925 movdqu %xmm1,16(%edi)
926 movdqu %xmm2,32(%edi)
927 movdqu %xmm3,48(%edi)
928 leal 64(%edi),%edi
929 subl $64,%ecx
930 jnz L013outer1x
931 jmp L011done
932L014tail:
933 movdqa %xmm0,(%esp)
934 movdqa %xmm1,16(%esp)
935 movdqa %xmm2,32(%esp)
936 movdqa %xmm3,48(%esp)
937 xorl %eax,%eax
938 xorl %edx,%edx
939 xorl %ebp,%ebp
940L015tail_loop:
941 movb (%esp,%ebp,1),%al
942 movb (%esi,%ebp,1),%dl
943 leal 1(%ebp),%ebp
944 xorb %dl,%al
945 movb %al,-1(%edi,%ebp,1)
946 decl %ecx
947 jnz L015tail_loop
948L011done:
949 movl 512(%esp),%esp
950 popl %edi
951 popl %esi
952 popl %ebx
953 popl %ebp
954 ret
955.align 6,0x90
956Lssse3_data:
957.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
958.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
959.long 1634760805,857760878,2036477234,1797285236
960.long 0,1,2,3
961.long 4,4,4,4
962.long 1,0,0,0
963.long 4,0,0,0
964.long 0,-1,-1,-1
965.align 6,0x90
966.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
967.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
968.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
969.byte 114,103,62,0
970.section __IMPORT,__pointers,non_lazy_symbol_pointers
971L_OPENSSL_ia32cap_P$non_lazy_ptr:
972.indirect_symbol _OPENSSL_ia32cap_P
973.long 0
974#endif