blob: 07c7e1fdb1d98f3387c86f43fb1681e9353f0980 [file] [log] [blame]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001#if defined(__i386__)
Robert Sloan572a4e22017-04-17 10:52:19 -07002.file "src/crypto/fipsmodule/aes/asm/aesni-x86.S"
Adam Langleyd9e397b2015-01-22 14:27:53 -08003.text
4.globl _aesni_encrypt
5.private_extern _aesni_encrypt
6.align 4
7_aesni_encrypt:
8L_aesni_encrypt_begin:
9 movl 4(%esp),%eax
10 movl 12(%esp),%edx
11 movups (%eax),%xmm2
12 movl 240(%edx),%ecx
13 movl 8(%esp),%eax
14 movups (%edx),%xmm0
15 movups 16(%edx),%xmm1
16 leal 32(%edx),%edx
17 xorps %xmm0,%xmm2
18L000enc1_loop_1:
19.byte 102,15,56,220,209
20 decl %ecx
21 movups (%edx),%xmm1
22 leal 16(%edx),%edx
23 jnz L000enc1_loop_1
24.byte 102,15,56,221,209
Adam Langleye9ada862015-05-11 17:20:37 -070025 pxor %xmm0,%xmm0
26 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -080027 movups %xmm2,(%eax)
Adam Langleye9ada862015-05-11 17:20:37 -070028 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080029 ret
30.globl _aesni_decrypt
31.private_extern _aesni_decrypt
32.align 4
33_aesni_decrypt:
34L_aesni_decrypt_begin:
35 movl 4(%esp),%eax
36 movl 12(%esp),%edx
37 movups (%eax),%xmm2
38 movl 240(%edx),%ecx
39 movl 8(%esp),%eax
40 movups (%edx),%xmm0
41 movups 16(%edx),%xmm1
42 leal 32(%edx),%edx
43 xorps %xmm0,%xmm2
44L001dec1_loop_2:
45.byte 102,15,56,222,209
46 decl %ecx
47 movups (%edx),%xmm1
48 leal 16(%edx),%edx
49 jnz L001dec1_loop_2
50.byte 102,15,56,223,209
Adam Langleye9ada862015-05-11 17:20:37 -070051 pxor %xmm0,%xmm0
52 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -080053 movups %xmm2,(%eax)
Adam Langleye9ada862015-05-11 17:20:37 -070054 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080055 ret
56.private_extern __aesni_encrypt2
57.align 4
58__aesni_encrypt2:
59 movups (%edx),%xmm0
60 shll $4,%ecx
61 movups 16(%edx),%xmm1
62 xorps %xmm0,%xmm2
63 pxor %xmm0,%xmm3
64 movups 32(%edx),%xmm0
65 leal 32(%edx,%ecx,1),%edx
66 negl %ecx
67 addl $16,%ecx
68L002enc2_loop:
69.byte 102,15,56,220,209
70.byte 102,15,56,220,217
71 movups (%edx,%ecx,1),%xmm1
72 addl $32,%ecx
73.byte 102,15,56,220,208
74.byte 102,15,56,220,216
75 movups -16(%edx,%ecx,1),%xmm0
76 jnz L002enc2_loop
77.byte 102,15,56,220,209
78.byte 102,15,56,220,217
79.byte 102,15,56,221,208
80.byte 102,15,56,221,216
81 ret
82.private_extern __aesni_decrypt2
83.align 4
84__aesni_decrypt2:
85 movups (%edx),%xmm0
86 shll $4,%ecx
87 movups 16(%edx),%xmm1
88 xorps %xmm0,%xmm2
89 pxor %xmm0,%xmm3
90 movups 32(%edx),%xmm0
91 leal 32(%edx,%ecx,1),%edx
92 negl %ecx
93 addl $16,%ecx
94L003dec2_loop:
95.byte 102,15,56,222,209
96.byte 102,15,56,222,217
97 movups (%edx,%ecx,1),%xmm1
98 addl $32,%ecx
99.byte 102,15,56,222,208
100.byte 102,15,56,222,216
101 movups -16(%edx,%ecx,1),%xmm0
102 jnz L003dec2_loop
103.byte 102,15,56,222,209
104.byte 102,15,56,222,217
105.byte 102,15,56,223,208
106.byte 102,15,56,223,216
107 ret
108.private_extern __aesni_encrypt3
109.align 4
110__aesni_encrypt3:
111 movups (%edx),%xmm0
112 shll $4,%ecx
113 movups 16(%edx),%xmm1
114 xorps %xmm0,%xmm2
115 pxor %xmm0,%xmm3
116 pxor %xmm0,%xmm4
117 movups 32(%edx),%xmm0
118 leal 32(%edx,%ecx,1),%edx
119 negl %ecx
120 addl $16,%ecx
121L004enc3_loop:
122.byte 102,15,56,220,209
123.byte 102,15,56,220,217
124.byte 102,15,56,220,225
125 movups (%edx,%ecx,1),%xmm1
126 addl $32,%ecx
127.byte 102,15,56,220,208
128.byte 102,15,56,220,216
129.byte 102,15,56,220,224
130 movups -16(%edx,%ecx,1),%xmm0
131 jnz L004enc3_loop
132.byte 102,15,56,220,209
133.byte 102,15,56,220,217
134.byte 102,15,56,220,225
135.byte 102,15,56,221,208
136.byte 102,15,56,221,216
137.byte 102,15,56,221,224
138 ret
139.private_extern __aesni_decrypt3
140.align 4
141__aesni_decrypt3:
142 movups (%edx),%xmm0
143 shll $4,%ecx
144 movups 16(%edx),%xmm1
145 xorps %xmm0,%xmm2
146 pxor %xmm0,%xmm3
147 pxor %xmm0,%xmm4
148 movups 32(%edx),%xmm0
149 leal 32(%edx,%ecx,1),%edx
150 negl %ecx
151 addl $16,%ecx
152L005dec3_loop:
153.byte 102,15,56,222,209
154.byte 102,15,56,222,217
155.byte 102,15,56,222,225
156 movups (%edx,%ecx,1),%xmm1
157 addl $32,%ecx
158.byte 102,15,56,222,208
159.byte 102,15,56,222,216
160.byte 102,15,56,222,224
161 movups -16(%edx,%ecx,1),%xmm0
162 jnz L005dec3_loop
163.byte 102,15,56,222,209
164.byte 102,15,56,222,217
165.byte 102,15,56,222,225
166.byte 102,15,56,223,208
167.byte 102,15,56,223,216
168.byte 102,15,56,223,224
169 ret
170.private_extern __aesni_encrypt4
171.align 4
172__aesni_encrypt4:
173 movups (%edx),%xmm0
174 movups 16(%edx),%xmm1
175 shll $4,%ecx
176 xorps %xmm0,%xmm2
177 pxor %xmm0,%xmm3
178 pxor %xmm0,%xmm4
179 pxor %xmm0,%xmm5
180 movups 32(%edx),%xmm0
181 leal 32(%edx,%ecx,1),%edx
182 negl %ecx
183.byte 15,31,64,0
184 addl $16,%ecx
185L006enc4_loop:
186.byte 102,15,56,220,209
187.byte 102,15,56,220,217
188.byte 102,15,56,220,225
189.byte 102,15,56,220,233
190 movups (%edx,%ecx,1),%xmm1
191 addl $32,%ecx
192.byte 102,15,56,220,208
193.byte 102,15,56,220,216
194.byte 102,15,56,220,224
195.byte 102,15,56,220,232
196 movups -16(%edx,%ecx,1),%xmm0
197 jnz L006enc4_loop
198.byte 102,15,56,220,209
199.byte 102,15,56,220,217
200.byte 102,15,56,220,225
201.byte 102,15,56,220,233
202.byte 102,15,56,221,208
203.byte 102,15,56,221,216
204.byte 102,15,56,221,224
205.byte 102,15,56,221,232
206 ret
207.private_extern __aesni_decrypt4
208.align 4
209__aesni_decrypt4:
210 movups (%edx),%xmm0
211 movups 16(%edx),%xmm1
212 shll $4,%ecx
213 xorps %xmm0,%xmm2
214 pxor %xmm0,%xmm3
215 pxor %xmm0,%xmm4
216 pxor %xmm0,%xmm5
217 movups 32(%edx),%xmm0
218 leal 32(%edx,%ecx,1),%edx
219 negl %ecx
220.byte 15,31,64,0
221 addl $16,%ecx
222L007dec4_loop:
223.byte 102,15,56,222,209
224.byte 102,15,56,222,217
225.byte 102,15,56,222,225
226.byte 102,15,56,222,233
227 movups (%edx,%ecx,1),%xmm1
228 addl $32,%ecx
229.byte 102,15,56,222,208
230.byte 102,15,56,222,216
231.byte 102,15,56,222,224
232.byte 102,15,56,222,232
233 movups -16(%edx,%ecx,1),%xmm0
234 jnz L007dec4_loop
235.byte 102,15,56,222,209
236.byte 102,15,56,222,217
237.byte 102,15,56,222,225
238.byte 102,15,56,222,233
239.byte 102,15,56,223,208
240.byte 102,15,56,223,216
241.byte 102,15,56,223,224
242.byte 102,15,56,223,232
243 ret
244.private_extern __aesni_encrypt6
245.align 4
246__aesni_encrypt6:
247 movups (%edx),%xmm0
248 shll $4,%ecx
249 movups 16(%edx),%xmm1
250 xorps %xmm0,%xmm2
251 pxor %xmm0,%xmm3
252 pxor %xmm0,%xmm4
253.byte 102,15,56,220,209
254 pxor %xmm0,%xmm5
255 pxor %xmm0,%xmm6
256.byte 102,15,56,220,217
257 leal 32(%edx,%ecx,1),%edx
258 negl %ecx
259.byte 102,15,56,220,225
260 pxor %xmm0,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -0700261 movups (%edx,%ecx,1),%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800262 addl $16,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700263 jmp L008_aesni_encrypt6_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800264.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700265L009enc6_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800266.byte 102,15,56,220,209
267.byte 102,15,56,220,217
268.byte 102,15,56,220,225
Adam Langleye9ada862015-05-11 17:20:37 -0700269L008_aesni_encrypt6_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800270.byte 102,15,56,220,233
271.byte 102,15,56,220,241
272.byte 102,15,56,220,249
273L_aesni_encrypt6_enter:
274 movups (%edx,%ecx,1),%xmm1
275 addl $32,%ecx
276.byte 102,15,56,220,208
277.byte 102,15,56,220,216
278.byte 102,15,56,220,224
279.byte 102,15,56,220,232
280.byte 102,15,56,220,240
281.byte 102,15,56,220,248
282 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700283 jnz L009enc6_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800284.byte 102,15,56,220,209
285.byte 102,15,56,220,217
286.byte 102,15,56,220,225
287.byte 102,15,56,220,233
288.byte 102,15,56,220,241
289.byte 102,15,56,220,249
290.byte 102,15,56,221,208
291.byte 102,15,56,221,216
292.byte 102,15,56,221,224
293.byte 102,15,56,221,232
294.byte 102,15,56,221,240
295.byte 102,15,56,221,248
296 ret
297.private_extern __aesni_decrypt6
298.align 4
299__aesni_decrypt6:
300 movups (%edx),%xmm0
301 shll $4,%ecx
302 movups 16(%edx),%xmm1
303 xorps %xmm0,%xmm2
304 pxor %xmm0,%xmm3
305 pxor %xmm0,%xmm4
306.byte 102,15,56,222,209
307 pxor %xmm0,%xmm5
308 pxor %xmm0,%xmm6
309.byte 102,15,56,222,217
310 leal 32(%edx,%ecx,1),%edx
311 negl %ecx
312.byte 102,15,56,222,225
313 pxor %xmm0,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -0700314 movups (%edx,%ecx,1),%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800315 addl $16,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700316 jmp L010_aesni_decrypt6_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800317.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700318L011dec6_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800319.byte 102,15,56,222,209
320.byte 102,15,56,222,217
321.byte 102,15,56,222,225
Adam Langleye9ada862015-05-11 17:20:37 -0700322L010_aesni_decrypt6_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800323.byte 102,15,56,222,233
324.byte 102,15,56,222,241
325.byte 102,15,56,222,249
326L_aesni_decrypt6_enter:
327 movups (%edx,%ecx,1),%xmm1
328 addl $32,%ecx
329.byte 102,15,56,222,208
330.byte 102,15,56,222,216
331.byte 102,15,56,222,224
332.byte 102,15,56,222,232
333.byte 102,15,56,222,240
334.byte 102,15,56,222,248
335 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700336 jnz L011dec6_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800337.byte 102,15,56,222,209
338.byte 102,15,56,222,217
339.byte 102,15,56,222,225
340.byte 102,15,56,222,233
341.byte 102,15,56,222,241
342.byte 102,15,56,222,249
343.byte 102,15,56,223,208
344.byte 102,15,56,223,216
345.byte 102,15,56,223,224
346.byte 102,15,56,223,232
347.byte 102,15,56,223,240
348.byte 102,15,56,223,248
349 ret
350.globl _aesni_ecb_encrypt
351.private_extern _aesni_ecb_encrypt
352.align 4
353_aesni_ecb_encrypt:
354L_aesni_ecb_encrypt_begin:
355 pushl %ebp
356 pushl %ebx
357 pushl %esi
358 pushl %edi
359 movl 20(%esp),%esi
360 movl 24(%esp),%edi
361 movl 28(%esp),%eax
362 movl 32(%esp),%edx
363 movl 36(%esp),%ebx
364 andl $-16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700365 jz L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800366 movl 240(%edx),%ecx
367 testl %ebx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -0700368 jz L013ecb_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -0800369 movl %edx,%ebp
370 movl %ecx,%ebx
371 cmpl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700372 jb L014ecb_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800373 movdqu (%esi),%xmm2
374 movdqu 16(%esi),%xmm3
375 movdqu 32(%esi),%xmm4
376 movdqu 48(%esi),%xmm5
377 movdqu 64(%esi),%xmm6
378 movdqu 80(%esi),%xmm7
379 leal 96(%esi),%esi
380 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700381 jmp L015ecb_enc_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800382.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700383L016ecb_enc_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800384 movups %xmm2,(%edi)
385 movdqu (%esi),%xmm2
386 movups %xmm3,16(%edi)
387 movdqu 16(%esi),%xmm3
388 movups %xmm4,32(%edi)
389 movdqu 32(%esi),%xmm4
390 movups %xmm5,48(%edi)
391 movdqu 48(%esi),%xmm5
392 movups %xmm6,64(%edi)
393 movdqu 64(%esi),%xmm6
394 movups %xmm7,80(%edi)
395 leal 96(%edi),%edi
396 movdqu 80(%esi),%xmm7
397 leal 96(%esi),%esi
Adam Langleye9ada862015-05-11 17:20:37 -0700398L015ecb_enc_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800399 call __aesni_encrypt6
400 movl %ebp,%edx
401 movl %ebx,%ecx
402 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700403 jnc L016ecb_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800404 movups %xmm2,(%edi)
405 movups %xmm3,16(%edi)
406 movups %xmm4,32(%edi)
407 movups %xmm5,48(%edi)
408 movups %xmm6,64(%edi)
409 movups %xmm7,80(%edi)
410 leal 96(%edi),%edi
411 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700412 jz L012ecb_ret
413L014ecb_enc_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800414 movups (%esi),%xmm2
415 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700416 jb L017ecb_enc_one
Adam Langleyd9e397b2015-01-22 14:27:53 -0800417 movups 16(%esi),%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700418 je L018ecb_enc_two
Adam Langleyd9e397b2015-01-22 14:27:53 -0800419 movups 32(%esi),%xmm4
420 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700421 jb L019ecb_enc_three
Adam Langleyd9e397b2015-01-22 14:27:53 -0800422 movups 48(%esi),%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -0700423 je L020ecb_enc_four
Adam Langleyd9e397b2015-01-22 14:27:53 -0800424 movups 64(%esi),%xmm6
425 xorps %xmm7,%xmm7
426 call __aesni_encrypt6
427 movups %xmm2,(%edi)
428 movups %xmm3,16(%edi)
429 movups %xmm4,32(%edi)
430 movups %xmm5,48(%edi)
431 movups %xmm6,64(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700432 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800433.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700434L017ecb_enc_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800435 movups (%edx),%xmm0
436 movups 16(%edx),%xmm1
437 leal 32(%edx),%edx
438 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700439L021enc1_loop_3:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800440.byte 102,15,56,220,209
441 decl %ecx
442 movups (%edx),%xmm1
443 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700444 jnz L021enc1_loop_3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800445.byte 102,15,56,221,209
446 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700447 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800448.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700449L018ecb_enc_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800450 call __aesni_encrypt2
451 movups %xmm2,(%edi)
452 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700453 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800454.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700455L019ecb_enc_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800456 call __aesni_encrypt3
457 movups %xmm2,(%edi)
458 movups %xmm3,16(%edi)
459 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700460 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800461.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700462L020ecb_enc_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800463 call __aesni_encrypt4
464 movups %xmm2,(%edi)
465 movups %xmm3,16(%edi)
466 movups %xmm4,32(%edi)
467 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700468 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800469.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700470L013ecb_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800471 movl %edx,%ebp
472 movl %ecx,%ebx
473 cmpl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700474 jb L022ecb_dec_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800475 movdqu (%esi),%xmm2
476 movdqu 16(%esi),%xmm3
477 movdqu 32(%esi),%xmm4
478 movdqu 48(%esi),%xmm5
479 movdqu 64(%esi),%xmm6
480 movdqu 80(%esi),%xmm7
481 leal 96(%esi),%esi
482 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700483 jmp L023ecb_dec_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800484.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700485L024ecb_dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800486 movups %xmm2,(%edi)
487 movdqu (%esi),%xmm2
488 movups %xmm3,16(%edi)
489 movdqu 16(%esi),%xmm3
490 movups %xmm4,32(%edi)
491 movdqu 32(%esi),%xmm4
492 movups %xmm5,48(%edi)
493 movdqu 48(%esi),%xmm5
494 movups %xmm6,64(%edi)
495 movdqu 64(%esi),%xmm6
496 movups %xmm7,80(%edi)
497 leal 96(%edi),%edi
498 movdqu 80(%esi),%xmm7
499 leal 96(%esi),%esi
Adam Langleye9ada862015-05-11 17:20:37 -0700500L023ecb_dec_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800501 call __aesni_decrypt6
502 movl %ebp,%edx
503 movl %ebx,%ecx
504 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700505 jnc L024ecb_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800506 movups %xmm2,(%edi)
507 movups %xmm3,16(%edi)
508 movups %xmm4,32(%edi)
509 movups %xmm5,48(%edi)
510 movups %xmm6,64(%edi)
511 movups %xmm7,80(%edi)
512 leal 96(%edi),%edi
513 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700514 jz L012ecb_ret
515L022ecb_dec_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800516 movups (%esi),%xmm2
517 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700518 jb L025ecb_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -0800519 movups 16(%esi),%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700520 je L026ecb_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -0800521 movups 32(%esi),%xmm4
522 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700523 jb L027ecb_dec_three
Adam Langleyd9e397b2015-01-22 14:27:53 -0800524 movups 48(%esi),%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -0700525 je L028ecb_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -0800526 movups 64(%esi),%xmm6
527 xorps %xmm7,%xmm7
528 call __aesni_decrypt6
529 movups %xmm2,(%edi)
530 movups %xmm3,16(%edi)
531 movups %xmm4,32(%edi)
532 movups %xmm5,48(%edi)
533 movups %xmm6,64(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700534 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800535.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700536L025ecb_dec_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800537 movups (%edx),%xmm0
538 movups 16(%edx),%xmm1
539 leal 32(%edx),%edx
540 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700541L029dec1_loop_4:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800542.byte 102,15,56,222,209
543 decl %ecx
544 movups (%edx),%xmm1
545 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700546 jnz L029dec1_loop_4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800547.byte 102,15,56,223,209
548 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700549 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800550.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700551L026ecb_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800552 call __aesni_decrypt2
553 movups %xmm2,(%edi)
554 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700555 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800556.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700557L027ecb_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800558 call __aesni_decrypt3
559 movups %xmm2,(%edi)
560 movups %xmm3,16(%edi)
561 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700562 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800563.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700564L028ecb_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800565 call __aesni_decrypt4
566 movups %xmm2,(%edi)
567 movups %xmm3,16(%edi)
568 movups %xmm4,32(%edi)
569 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700570L012ecb_ret:
571 pxor %xmm0,%xmm0
572 pxor %xmm1,%xmm1
573 pxor %xmm2,%xmm2
574 pxor %xmm3,%xmm3
575 pxor %xmm4,%xmm4
576 pxor %xmm5,%xmm5
577 pxor %xmm6,%xmm6
578 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800579 popl %edi
580 popl %esi
581 popl %ebx
582 popl %ebp
583 ret
584.globl _aesni_ccm64_encrypt_blocks
585.private_extern _aesni_ccm64_encrypt_blocks
586.align 4
587_aesni_ccm64_encrypt_blocks:
588L_aesni_ccm64_encrypt_blocks_begin:
589 pushl %ebp
590 pushl %ebx
591 pushl %esi
592 pushl %edi
593 movl 20(%esp),%esi
594 movl 24(%esp),%edi
595 movl 28(%esp),%eax
596 movl 32(%esp),%edx
597 movl 36(%esp),%ebx
598 movl 40(%esp),%ecx
599 movl %esp,%ebp
600 subl $60,%esp
601 andl $-16,%esp
602 movl %ebp,48(%esp)
603 movdqu (%ebx),%xmm7
604 movdqu (%ecx),%xmm3
605 movl 240(%edx),%ecx
606 movl $202182159,(%esp)
607 movl $134810123,4(%esp)
608 movl $67438087,8(%esp)
609 movl $66051,12(%esp)
610 movl $1,%ebx
611 xorl %ebp,%ebp
612 movl %ebx,16(%esp)
613 movl %ebp,20(%esp)
614 movl %ebp,24(%esp)
615 movl %ebp,28(%esp)
616 shll $4,%ecx
617 movl $16,%ebx
618 leal (%edx),%ebp
619 movdqa (%esp),%xmm5
620 movdqa %xmm7,%xmm2
621 leal 32(%edx,%ecx,1),%edx
622 subl %ecx,%ebx
623.byte 102,15,56,0,253
Adam Langleye9ada862015-05-11 17:20:37 -0700624L030ccm64_enc_outer:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800625 movups (%ebp),%xmm0
626 movl %ebx,%ecx
627 movups (%esi),%xmm6
628 xorps %xmm0,%xmm2
629 movups 16(%ebp),%xmm1
630 xorps %xmm6,%xmm0
631 xorps %xmm0,%xmm3
632 movups 32(%ebp),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700633L031ccm64_enc2_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800634.byte 102,15,56,220,209
635.byte 102,15,56,220,217
636 movups (%edx,%ecx,1),%xmm1
637 addl $32,%ecx
638.byte 102,15,56,220,208
639.byte 102,15,56,220,216
640 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700641 jnz L031ccm64_enc2_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800642.byte 102,15,56,220,209
643.byte 102,15,56,220,217
644 paddq 16(%esp),%xmm7
645 decl %eax
646.byte 102,15,56,221,208
647.byte 102,15,56,221,216
648 leal 16(%esi),%esi
649 xorps %xmm2,%xmm6
650 movdqa %xmm7,%xmm2
651 movups %xmm6,(%edi)
652.byte 102,15,56,0,213
653 leal 16(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -0700654 jnz L030ccm64_enc_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800655 movl 48(%esp),%esp
656 movl 40(%esp),%edi
657 movups %xmm3,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700658 pxor %xmm0,%xmm0
659 pxor %xmm1,%xmm1
660 pxor %xmm2,%xmm2
661 pxor %xmm3,%xmm3
662 pxor %xmm4,%xmm4
663 pxor %xmm5,%xmm5
664 pxor %xmm6,%xmm6
665 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800666 popl %edi
667 popl %esi
668 popl %ebx
669 popl %ebp
670 ret
671.globl _aesni_ccm64_decrypt_blocks
672.private_extern _aesni_ccm64_decrypt_blocks
673.align 4
674_aesni_ccm64_decrypt_blocks:
675L_aesni_ccm64_decrypt_blocks_begin:
676 pushl %ebp
677 pushl %ebx
678 pushl %esi
679 pushl %edi
680 movl 20(%esp),%esi
681 movl 24(%esp),%edi
682 movl 28(%esp),%eax
683 movl 32(%esp),%edx
684 movl 36(%esp),%ebx
685 movl 40(%esp),%ecx
686 movl %esp,%ebp
687 subl $60,%esp
688 andl $-16,%esp
689 movl %ebp,48(%esp)
690 movdqu (%ebx),%xmm7
691 movdqu (%ecx),%xmm3
692 movl 240(%edx),%ecx
693 movl $202182159,(%esp)
694 movl $134810123,4(%esp)
695 movl $67438087,8(%esp)
696 movl $66051,12(%esp)
697 movl $1,%ebx
698 xorl %ebp,%ebp
699 movl %ebx,16(%esp)
700 movl %ebp,20(%esp)
701 movl %ebp,24(%esp)
702 movl %ebp,28(%esp)
703 movdqa (%esp),%xmm5
704 movdqa %xmm7,%xmm2
705 movl %edx,%ebp
706 movl %ecx,%ebx
707.byte 102,15,56,0,253
708 movups (%edx),%xmm0
709 movups 16(%edx),%xmm1
710 leal 32(%edx),%edx
711 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700712L032enc1_loop_5:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800713.byte 102,15,56,220,209
714 decl %ecx
715 movups (%edx),%xmm1
716 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700717 jnz L032enc1_loop_5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800718.byte 102,15,56,221,209
719 shll $4,%ebx
720 movl $16,%ecx
721 movups (%esi),%xmm6
722 paddq 16(%esp),%xmm7
723 leal 16(%esi),%esi
724 subl %ebx,%ecx
725 leal 32(%ebp,%ebx,1),%edx
726 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -0700727 jmp L033ccm64_dec_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800728.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700729L033ccm64_dec_outer:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800730 xorps %xmm2,%xmm6
731 movdqa %xmm7,%xmm2
732 movups %xmm6,(%edi)
733 leal 16(%edi),%edi
734.byte 102,15,56,0,213
735 subl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700736 jz L034ccm64_dec_break
Adam Langleyd9e397b2015-01-22 14:27:53 -0800737 movups (%ebp),%xmm0
738 movl %ebx,%ecx
739 movups 16(%ebp),%xmm1
740 xorps %xmm0,%xmm6
741 xorps %xmm0,%xmm2
742 xorps %xmm6,%xmm3
743 movups 32(%ebp),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700744L035ccm64_dec2_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800745.byte 102,15,56,220,209
746.byte 102,15,56,220,217
747 movups (%edx,%ecx,1),%xmm1
748 addl $32,%ecx
749.byte 102,15,56,220,208
750.byte 102,15,56,220,216
751 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700752 jnz L035ccm64_dec2_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800753 movups (%esi),%xmm6
754 paddq 16(%esp),%xmm7
755.byte 102,15,56,220,209
756.byte 102,15,56,220,217
757.byte 102,15,56,221,208
758.byte 102,15,56,221,216
759 leal 16(%esi),%esi
Adam Langleye9ada862015-05-11 17:20:37 -0700760 jmp L033ccm64_dec_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800761.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700762L034ccm64_dec_break:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800763 movl 240(%ebp),%ecx
764 movl %ebp,%edx
765 movups (%edx),%xmm0
766 movups 16(%edx),%xmm1
767 xorps %xmm0,%xmm6
768 leal 32(%edx),%edx
769 xorps %xmm6,%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700770L036enc1_loop_6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800771.byte 102,15,56,220,217
772 decl %ecx
773 movups (%edx),%xmm1
774 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700775 jnz L036enc1_loop_6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800776.byte 102,15,56,221,217
777 movl 48(%esp),%esp
778 movl 40(%esp),%edi
779 movups %xmm3,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700780 pxor %xmm0,%xmm0
781 pxor %xmm1,%xmm1
782 pxor %xmm2,%xmm2
783 pxor %xmm3,%xmm3
784 pxor %xmm4,%xmm4
785 pxor %xmm5,%xmm5
786 pxor %xmm6,%xmm6
787 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800788 popl %edi
789 popl %esi
790 popl %ebx
791 popl %ebp
792 ret
793.globl _aesni_ctr32_encrypt_blocks
794.private_extern _aesni_ctr32_encrypt_blocks
795.align 4
796_aesni_ctr32_encrypt_blocks:
797L_aesni_ctr32_encrypt_blocks_begin:
798 pushl %ebp
799 pushl %ebx
800 pushl %esi
801 pushl %edi
802 movl 20(%esp),%esi
803 movl 24(%esp),%edi
804 movl 28(%esp),%eax
805 movl 32(%esp),%edx
806 movl 36(%esp),%ebx
807 movl %esp,%ebp
808 subl $88,%esp
809 andl $-16,%esp
810 movl %ebp,80(%esp)
811 cmpl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700812 je L037ctr32_one_shortcut
Adam Langleyd9e397b2015-01-22 14:27:53 -0800813 movdqu (%ebx),%xmm7
814 movl $202182159,(%esp)
815 movl $134810123,4(%esp)
816 movl $67438087,8(%esp)
817 movl $66051,12(%esp)
818 movl $6,%ecx
819 xorl %ebp,%ebp
820 movl %ecx,16(%esp)
821 movl %ecx,20(%esp)
822 movl %ecx,24(%esp)
823 movl %ebp,28(%esp)
824.byte 102,15,58,22,251,3
825.byte 102,15,58,34,253,3
826 movl 240(%edx),%ecx
827 bswap %ebx
828 pxor %xmm0,%xmm0
829 pxor %xmm1,%xmm1
830 movdqa (%esp),%xmm2
831.byte 102,15,58,34,195,0
832 leal 3(%ebx),%ebp
833.byte 102,15,58,34,205,0
834 incl %ebx
835.byte 102,15,58,34,195,1
836 incl %ebp
837.byte 102,15,58,34,205,1
838 incl %ebx
839.byte 102,15,58,34,195,2
840 incl %ebp
841.byte 102,15,58,34,205,2
842 movdqa %xmm0,48(%esp)
843.byte 102,15,56,0,194
844 movdqu (%edx),%xmm6
845 movdqa %xmm1,64(%esp)
846.byte 102,15,56,0,202
847 pshufd $192,%xmm0,%xmm2
848 pshufd $128,%xmm0,%xmm3
849 cmpl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700850 jb L038ctr32_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800851 pxor %xmm6,%xmm7
852 shll $4,%ecx
853 movl $16,%ebx
854 movdqa %xmm7,32(%esp)
855 movl %edx,%ebp
856 subl %ecx,%ebx
857 leal 32(%edx,%ecx,1),%edx
858 subl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700859 jmp L039ctr32_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800860.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700861L039ctr32_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800862 pshufd $64,%xmm0,%xmm4
863 movdqa 32(%esp),%xmm0
864 pshufd $192,%xmm1,%xmm5
865 pxor %xmm0,%xmm2
866 pshufd $128,%xmm1,%xmm6
867 pxor %xmm0,%xmm3
868 pshufd $64,%xmm1,%xmm7
869 movups 16(%ebp),%xmm1
870 pxor %xmm0,%xmm4
871 pxor %xmm0,%xmm5
872.byte 102,15,56,220,209
873 pxor %xmm0,%xmm6
874 pxor %xmm0,%xmm7
875.byte 102,15,56,220,217
876 movups 32(%ebp),%xmm0
877 movl %ebx,%ecx
878.byte 102,15,56,220,225
879.byte 102,15,56,220,233
880.byte 102,15,56,220,241
881.byte 102,15,56,220,249
882 call L_aesni_encrypt6_enter
883 movups (%esi),%xmm1
884 movups 16(%esi),%xmm0
885 xorps %xmm1,%xmm2
886 movups 32(%esi),%xmm1
887 xorps %xmm0,%xmm3
888 movups %xmm2,(%edi)
889 movdqa 16(%esp),%xmm0
890 xorps %xmm1,%xmm4
891 movdqa 64(%esp),%xmm1
892 movups %xmm3,16(%edi)
893 movups %xmm4,32(%edi)
894 paddd %xmm0,%xmm1
895 paddd 48(%esp),%xmm0
896 movdqa (%esp),%xmm2
897 movups 48(%esi),%xmm3
898 movups 64(%esi),%xmm4
899 xorps %xmm3,%xmm5
900 movups 80(%esi),%xmm3
901 leal 96(%esi),%esi
902 movdqa %xmm0,48(%esp)
903.byte 102,15,56,0,194
904 xorps %xmm4,%xmm6
905 movups %xmm5,48(%edi)
906 xorps %xmm3,%xmm7
907 movdqa %xmm1,64(%esp)
908.byte 102,15,56,0,202
909 movups %xmm6,64(%edi)
910 pshufd $192,%xmm0,%xmm2
911 movups %xmm7,80(%edi)
912 leal 96(%edi),%edi
913 pshufd $128,%xmm0,%xmm3
914 subl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700915 jnc L039ctr32_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800916 addl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700917 jz L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800918 movdqu (%ebp),%xmm7
919 movl %ebp,%edx
920 pxor 32(%esp),%xmm7
921 movl 240(%ebp),%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700922L038ctr32_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800923 por %xmm7,%xmm2
924 cmpl $2,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700925 jb L041ctr32_one
Adam Langleyd9e397b2015-01-22 14:27:53 -0800926 pshufd $64,%xmm0,%xmm4
927 por %xmm7,%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700928 je L042ctr32_two
Adam Langleyd9e397b2015-01-22 14:27:53 -0800929 pshufd $192,%xmm1,%xmm5
930 por %xmm7,%xmm4
931 cmpl $4,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700932 jb L043ctr32_three
Adam Langleyd9e397b2015-01-22 14:27:53 -0800933 pshufd $128,%xmm1,%xmm6
934 por %xmm7,%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -0700935 je L044ctr32_four
Adam Langleyd9e397b2015-01-22 14:27:53 -0800936 por %xmm7,%xmm6
937 call __aesni_encrypt6
938 movups (%esi),%xmm1
939 movups 16(%esi),%xmm0
940 xorps %xmm1,%xmm2
941 movups 32(%esi),%xmm1
942 xorps %xmm0,%xmm3
943 movups 48(%esi),%xmm0
944 xorps %xmm1,%xmm4
945 movups 64(%esi),%xmm1
946 xorps %xmm0,%xmm5
947 movups %xmm2,(%edi)
948 xorps %xmm1,%xmm6
949 movups %xmm3,16(%edi)
950 movups %xmm4,32(%edi)
951 movups %xmm5,48(%edi)
952 movups %xmm6,64(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700953 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800954.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700955L037ctr32_one_shortcut:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800956 movups (%ebx),%xmm2
957 movl 240(%edx),%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700958L041ctr32_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800959 movups (%edx),%xmm0
960 movups 16(%edx),%xmm1
961 leal 32(%edx),%edx
962 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700963L045enc1_loop_7:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800964.byte 102,15,56,220,209
965 decl %ecx
966 movups (%edx),%xmm1
967 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700968 jnz L045enc1_loop_7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800969.byte 102,15,56,221,209
970 movups (%esi),%xmm6
971 xorps %xmm2,%xmm6
972 movups %xmm6,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700973 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800974.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700975L042ctr32_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800976 call __aesni_encrypt2
977 movups (%esi),%xmm5
978 movups 16(%esi),%xmm6
979 xorps %xmm5,%xmm2
980 xorps %xmm6,%xmm3
981 movups %xmm2,(%edi)
982 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700983 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800984.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700985L043ctr32_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800986 call __aesni_encrypt3
987 movups (%esi),%xmm5
988 movups 16(%esi),%xmm6
989 xorps %xmm5,%xmm2
990 movups 32(%esi),%xmm7
991 xorps %xmm6,%xmm3
992 movups %xmm2,(%edi)
993 xorps %xmm7,%xmm4
994 movups %xmm3,16(%edi)
995 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700996 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800997.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700998L044ctr32_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800999 call __aesni_encrypt4
1000 movups (%esi),%xmm6
1001 movups 16(%esi),%xmm7
1002 movups 32(%esi),%xmm1
1003 xorps %xmm6,%xmm2
1004 movups 48(%esi),%xmm0
1005 xorps %xmm7,%xmm3
1006 movups %xmm2,(%edi)
1007 xorps %xmm1,%xmm4
1008 movups %xmm3,16(%edi)
1009 xorps %xmm0,%xmm5
1010 movups %xmm4,32(%edi)
1011 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001012L040ctr32_ret:
1013 pxor %xmm0,%xmm0
1014 pxor %xmm1,%xmm1
1015 pxor %xmm2,%xmm2
1016 pxor %xmm3,%xmm3
1017 pxor %xmm4,%xmm4
1018 movdqa %xmm0,32(%esp)
1019 pxor %xmm5,%xmm5
1020 movdqa %xmm0,48(%esp)
1021 pxor %xmm6,%xmm6
1022 movdqa %xmm0,64(%esp)
1023 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08001024 movl 80(%esp),%esp
1025 popl %edi
1026 popl %esi
1027 popl %ebx
1028 popl %ebp
1029 ret
1030.globl _aesni_xts_encrypt
1031.private_extern _aesni_xts_encrypt
1032.align 4
1033_aesni_xts_encrypt:
1034L_aesni_xts_encrypt_begin:
1035 pushl %ebp
1036 pushl %ebx
1037 pushl %esi
1038 pushl %edi
1039 movl 36(%esp),%edx
1040 movl 40(%esp),%esi
1041 movl 240(%edx),%ecx
1042 movups (%esi),%xmm2
1043 movups (%edx),%xmm0
1044 movups 16(%edx),%xmm1
1045 leal 32(%edx),%edx
1046 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001047L046enc1_loop_8:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001048.byte 102,15,56,220,209
1049 decl %ecx
1050 movups (%edx),%xmm1
1051 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001052 jnz L046enc1_loop_8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001053.byte 102,15,56,221,209
1054 movl 20(%esp),%esi
1055 movl 24(%esp),%edi
1056 movl 28(%esp),%eax
1057 movl 32(%esp),%edx
1058 movl %esp,%ebp
1059 subl $120,%esp
1060 movl 240(%edx),%ecx
1061 andl $-16,%esp
1062 movl $135,96(%esp)
1063 movl $0,100(%esp)
1064 movl $1,104(%esp)
1065 movl $0,108(%esp)
1066 movl %eax,112(%esp)
1067 movl %ebp,116(%esp)
1068 movdqa %xmm2,%xmm1
1069 pxor %xmm0,%xmm0
1070 movdqa 96(%esp),%xmm3
1071 pcmpgtd %xmm1,%xmm0
1072 andl $-16,%eax
1073 movl %edx,%ebp
1074 movl %ecx,%ebx
1075 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001076 jc L047xts_enc_short
Adam Langleyd9e397b2015-01-22 14:27:53 -08001077 shll $4,%ecx
1078 movl $16,%ebx
1079 subl %ecx,%ebx
1080 leal 32(%edx,%ecx,1),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001081 jmp L048xts_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001082.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001083L048xts_enc_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001084 pshufd $19,%xmm0,%xmm2
1085 pxor %xmm0,%xmm0
1086 movdqa %xmm1,(%esp)
1087 paddq %xmm1,%xmm1
1088 pand %xmm3,%xmm2
1089 pcmpgtd %xmm1,%xmm0
1090 pxor %xmm2,%xmm1
1091 pshufd $19,%xmm0,%xmm2
1092 pxor %xmm0,%xmm0
1093 movdqa %xmm1,16(%esp)
1094 paddq %xmm1,%xmm1
1095 pand %xmm3,%xmm2
1096 pcmpgtd %xmm1,%xmm0
1097 pxor %xmm2,%xmm1
1098 pshufd $19,%xmm0,%xmm2
1099 pxor %xmm0,%xmm0
1100 movdqa %xmm1,32(%esp)
1101 paddq %xmm1,%xmm1
1102 pand %xmm3,%xmm2
1103 pcmpgtd %xmm1,%xmm0
1104 pxor %xmm2,%xmm1
1105 pshufd $19,%xmm0,%xmm2
1106 pxor %xmm0,%xmm0
1107 movdqa %xmm1,48(%esp)
1108 paddq %xmm1,%xmm1
1109 pand %xmm3,%xmm2
1110 pcmpgtd %xmm1,%xmm0
1111 pxor %xmm2,%xmm1
1112 pshufd $19,%xmm0,%xmm7
1113 movdqa %xmm1,64(%esp)
1114 paddq %xmm1,%xmm1
1115 movups (%ebp),%xmm0
1116 pand %xmm3,%xmm7
1117 movups (%esi),%xmm2
1118 pxor %xmm1,%xmm7
1119 movl %ebx,%ecx
1120 movdqu 16(%esi),%xmm3
1121 xorps %xmm0,%xmm2
1122 movdqu 32(%esi),%xmm4
1123 pxor %xmm0,%xmm3
1124 movdqu 48(%esi),%xmm5
1125 pxor %xmm0,%xmm4
1126 movdqu 64(%esi),%xmm6
1127 pxor %xmm0,%xmm5
1128 movdqu 80(%esi),%xmm1
1129 pxor %xmm0,%xmm6
1130 leal 96(%esi),%esi
1131 pxor (%esp),%xmm2
1132 movdqa %xmm7,80(%esp)
1133 pxor %xmm1,%xmm7
1134 movups 16(%ebp),%xmm1
1135 pxor 16(%esp),%xmm3
1136 pxor 32(%esp),%xmm4
1137.byte 102,15,56,220,209
1138 pxor 48(%esp),%xmm5
1139 pxor 64(%esp),%xmm6
1140.byte 102,15,56,220,217
1141 pxor %xmm0,%xmm7
1142 movups 32(%ebp),%xmm0
1143.byte 102,15,56,220,225
1144.byte 102,15,56,220,233
1145.byte 102,15,56,220,241
1146.byte 102,15,56,220,249
1147 call L_aesni_encrypt6_enter
1148 movdqa 80(%esp),%xmm1
1149 pxor %xmm0,%xmm0
1150 xorps (%esp),%xmm2
1151 pcmpgtd %xmm1,%xmm0
1152 xorps 16(%esp),%xmm3
1153 movups %xmm2,(%edi)
1154 xorps 32(%esp),%xmm4
1155 movups %xmm3,16(%edi)
1156 xorps 48(%esp),%xmm5
1157 movups %xmm4,32(%edi)
1158 xorps 64(%esp),%xmm6
1159 movups %xmm5,48(%edi)
1160 xorps %xmm1,%xmm7
1161 movups %xmm6,64(%edi)
1162 pshufd $19,%xmm0,%xmm2
1163 movups %xmm7,80(%edi)
1164 leal 96(%edi),%edi
1165 movdqa 96(%esp),%xmm3
1166 pxor %xmm0,%xmm0
1167 paddq %xmm1,%xmm1
1168 pand %xmm3,%xmm2
1169 pcmpgtd %xmm1,%xmm0
1170 pxor %xmm2,%xmm1
1171 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001172 jnc L048xts_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001173 movl 240(%ebp),%ecx
1174 movl %ebp,%edx
1175 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -07001176L047xts_enc_short:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001177 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001178 jz L049xts_enc_done6x
Adam Langleyd9e397b2015-01-22 14:27:53 -08001179 movdqa %xmm1,%xmm5
1180 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001181 jb L050xts_enc_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08001182 pshufd $19,%xmm0,%xmm2
1183 pxor %xmm0,%xmm0
1184 paddq %xmm1,%xmm1
1185 pand %xmm3,%xmm2
1186 pcmpgtd %xmm1,%xmm0
1187 pxor %xmm2,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001188 je L051xts_enc_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08001189 pshufd $19,%xmm0,%xmm2
1190 pxor %xmm0,%xmm0
1191 movdqa %xmm1,%xmm6
1192 paddq %xmm1,%xmm1
1193 pand %xmm3,%xmm2
1194 pcmpgtd %xmm1,%xmm0
1195 pxor %xmm2,%xmm1
1196 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001197 jb L052xts_enc_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08001198 pshufd $19,%xmm0,%xmm2
1199 pxor %xmm0,%xmm0
1200 movdqa %xmm1,%xmm7
1201 paddq %xmm1,%xmm1
1202 pand %xmm3,%xmm2
1203 pcmpgtd %xmm1,%xmm0
1204 pxor %xmm2,%xmm1
1205 movdqa %xmm5,(%esp)
1206 movdqa %xmm6,16(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001207 je L053xts_enc_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08001208 movdqa %xmm7,32(%esp)
1209 pshufd $19,%xmm0,%xmm7
1210 movdqa %xmm1,48(%esp)
1211 paddq %xmm1,%xmm1
1212 pand %xmm3,%xmm7
1213 pxor %xmm1,%xmm7
1214 movdqu (%esi),%xmm2
1215 movdqu 16(%esi),%xmm3
1216 movdqu 32(%esi),%xmm4
1217 pxor (%esp),%xmm2
1218 movdqu 48(%esi),%xmm5
1219 pxor 16(%esp),%xmm3
1220 movdqu 64(%esi),%xmm6
1221 pxor 32(%esp),%xmm4
1222 leal 80(%esi),%esi
1223 pxor 48(%esp),%xmm5
1224 movdqa %xmm7,64(%esp)
1225 pxor %xmm7,%xmm6
1226 call __aesni_encrypt6
1227 movaps 64(%esp),%xmm1
1228 xorps (%esp),%xmm2
1229 xorps 16(%esp),%xmm3
1230 xorps 32(%esp),%xmm4
1231 movups %xmm2,(%edi)
1232 xorps 48(%esp),%xmm5
1233 movups %xmm3,16(%edi)
1234 xorps %xmm1,%xmm6
1235 movups %xmm4,32(%edi)
1236 movups %xmm5,48(%edi)
1237 movups %xmm6,64(%edi)
1238 leal 80(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001239 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001240.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001241L050xts_enc_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001242 movups (%esi),%xmm2
1243 leal 16(%esi),%esi
1244 xorps %xmm5,%xmm2
1245 movups (%edx),%xmm0
1246 movups 16(%edx),%xmm1
1247 leal 32(%edx),%edx
1248 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001249L055enc1_loop_9:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001250.byte 102,15,56,220,209
1251 decl %ecx
1252 movups (%edx),%xmm1
1253 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001254 jnz L055enc1_loop_9
Adam Langleyd9e397b2015-01-22 14:27:53 -08001255.byte 102,15,56,221,209
1256 xorps %xmm5,%xmm2
1257 movups %xmm2,(%edi)
1258 leal 16(%edi),%edi
1259 movdqa %xmm5,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001260 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001261.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001262L051xts_enc_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001263 movaps %xmm1,%xmm6
1264 movups (%esi),%xmm2
1265 movups 16(%esi),%xmm3
1266 leal 32(%esi),%esi
1267 xorps %xmm5,%xmm2
1268 xorps %xmm6,%xmm3
1269 call __aesni_encrypt2
1270 xorps %xmm5,%xmm2
1271 xorps %xmm6,%xmm3
1272 movups %xmm2,(%edi)
1273 movups %xmm3,16(%edi)
1274 leal 32(%edi),%edi
1275 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001276 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001277.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001278L052xts_enc_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001279 movaps %xmm1,%xmm7
1280 movups (%esi),%xmm2
1281 movups 16(%esi),%xmm3
1282 movups 32(%esi),%xmm4
1283 leal 48(%esi),%esi
1284 xorps %xmm5,%xmm2
1285 xorps %xmm6,%xmm3
1286 xorps %xmm7,%xmm4
1287 call __aesni_encrypt3
1288 xorps %xmm5,%xmm2
1289 xorps %xmm6,%xmm3
1290 xorps %xmm7,%xmm4
1291 movups %xmm2,(%edi)
1292 movups %xmm3,16(%edi)
1293 movups %xmm4,32(%edi)
1294 leal 48(%edi),%edi
1295 movdqa %xmm7,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001296 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001297.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001298L053xts_enc_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001299 movaps %xmm1,%xmm6
1300 movups (%esi),%xmm2
1301 movups 16(%esi),%xmm3
1302 movups 32(%esi),%xmm4
1303 xorps (%esp),%xmm2
1304 movups 48(%esi),%xmm5
1305 leal 64(%esi),%esi
1306 xorps 16(%esp),%xmm3
1307 xorps %xmm7,%xmm4
1308 xorps %xmm6,%xmm5
1309 call __aesni_encrypt4
1310 xorps (%esp),%xmm2
1311 xorps 16(%esp),%xmm3
1312 xorps %xmm7,%xmm4
1313 movups %xmm2,(%edi)
1314 xorps %xmm6,%xmm5
1315 movups %xmm3,16(%edi)
1316 movups %xmm4,32(%edi)
1317 movups %xmm5,48(%edi)
1318 leal 64(%edi),%edi
1319 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001320 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001321.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001322L049xts_enc_done6x:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001323 movl 112(%esp),%eax
1324 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001325 jz L056xts_enc_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001326 movdqa %xmm1,%xmm5
1327 movl %eax,112(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001328 jmp L057xts_enc_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001329.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001330L054xts_enc_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001331 movl 112(%esp),%eax
1332 pxor %xmm0,%xmm0
1333 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001334 jz L056xts_enc_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001335 pcmpgtd %xmm1,%xmm0
1336 movl %eax,112(%esp)
1337 pshufd $19,%xmm0,%xmm5
1338 paddq %xmm1,%xmm1
1339 pand 96(%esp),%xmm5
1340 pxor %xmm1,%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07001341L057xts_enc_steal:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001342 movzbl (%esi),%ecx
1343 movzbl -16(%edi),%edx
1344 leal 1(%esi),%esi
1345 movb %cl,-16(%edi)
1346 movb %dl,(%edi)
1347 leal 1(%edi),%edi
1348 subl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001349 jnz L057xts_enc_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001350 subl 112(%esp),%edi
1351 movl %ebp,%edx
1352 movl %ebx,%ecx
1353 movups -16(%edi),%xmm2
1354 xorps %xmm5,%xmm2
1355 movups (%edx),%xmm0
1356 movups 16(%edx),%xmm1
1357 leal 32(%edx),%edx
1358 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001359L058enc1_loop_10:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001360.byte 102,15,56,220,209
1361 decl %ecx
1362 movups (%edx),%xmm1
1363 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001364 jnz L058enc1_loop_10
Adam Langleyd9e397b2015-01-22 14:27:53 -08001365.byte 102,15,56,221,209
1366 xorps %xmm5,%xmm2
1367 movups %xmm2,-16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001368L056xts_enc_ret:
1369 pxor %xmm0,%xmm0
1370 pxor %xmm1,%xmm1
1371 pxor %xmm2,%xmm2
1372 movdqa %xmm0,(%esp)
1373 pxor %xmm3,%xmm3
1374 movdqa %xmm0,16(%esp)
1375 pxor %xmm4,%xmm4
1376 movdqa %xmm0,32(%esp)
1377 pxor %xmm5,%xmm5
1378 movdqa %xmm0,48(%esp)
1379 pxor %xmm6,%xmm6
1380 movdqa %xmm0,64(%esp)
1381 pxor %xmm7,%xmm7
1382 movdqa %xmm0,80(%esp)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001383 movl 116(%esp),%esp
1384 popl %edi
1385 popl %esi
1386 popl %ebx
1387 popl %ebp
1388 ret
1389.globl _aesni_xts_decrypt
1390.private_extern _aesni_xts_decrypt
1391.align 4
1392_aesni_xts_decrypt:
1393L_aesni_xts_decrypt_begin:
1394 pushl %ebp
1395 pushl %ebx
1396 pushl %esi
1397 pushl %edi
1398 movl 36(%esp),%edx
1399 movl 40(%esp),%esi
1400 movl 240(%edx),%ecx
1401 movups (%esi),%xmm2
1402 movups (%edx),%xmm0
1403 movups 16(%edx),%xmm1
1404 leal 32(%edx),%edx
1405 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001406L059enc1_loop_11:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001407.byte 102,15,56,220,209
1408 decl %ecx
1409 movups (%edx),%xmm1
1410 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001411 jnz L059enc1_loop_11
Adam Langleyd9e397b2015-01-22 14:27:53 -08001412.byte 102,15,56,221,209
1413 movl 20(%esp),%esi
1414 movl 24(%esp),%edi
1415 movl 28(%esp),%eax
1416 movl 32(%esp),%edx
1417 movl %esp,%ebp
1418 subl $120,%esp
1419 andl $-16,%esp
1420 xorl %ebx,%ebx
1421 testl $15,%eax
1422 setnz %bl
1423 shll $4,%ebx
1424 subl %ebx,%eax
1425 movl $135,96(%esp)
1426 movl $0,100(%esp)
1427 movl $1,104(%esp)
1428 movl $0,108(%esp)
1429 movl %eax,112(%esp)
1430 movl %ebp,116(%esp)
1431 movl 240(%edx),%ecx
1432 movl %edx,%ebp
1433 movl %ecx,%ebx
1434 movdqa %xmm2,%xmm1
1435 pxor %xmm0,%xmm0
1436 movdqa 96(%esp),%xmm3
1437 pcmpgtd %xmm1,%xmm0
1438 andl $-16,%eax
1439 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001440 jc L060xts_dec_short
Adam Langleyd9e397b2015-01-22 14:27:53 -08001441 shll $4,%ecx
1442 movl $16,%ebx
1443 subl %ecx,%ebx
1444 leal 32(%edx,%ecx,1),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001445 jmp L061xts_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001446.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001447L061xts_dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001448 pshufd $19,%xmm0,%xmm2
1449 pxor %xmm0,%xmm0
1450 movdqa %xmm1,(%esp)
1451 paddq %xmm1,%xmm1
1452 pand %xmm3,%xmm2
1453 pcmpgtd %xmm1,%xmm0
1454 pxor %xmm2,%xmm1
1455 pshufd $19,%xmm0,%xmm2
1456 pxor %xmm0,%xmm0
1457 movdqa %xmm1,16(%esp)
1458 paddq %xmm1,%xmm1
1459 pand %xmm3,%xmm2
1460 pcmpgtd %xmm1,%xmm0
1461 pxor %xmm2,%xmm1
1462 pshufd $19,%xmm0,%xmm2
1463 pxor %xmm0,%xmm0
1464 movdqa %xmm1,32(%esp)
1465 paddq %xmm1,%xmm1
1466 pand %xmm3,%xmm2
1467 pcmpgtd %xmm1,%xmm0
1468 pxor %xmm2,%xmm1
1469 pshufd $19,%xmm0,%xmm2
1470 pxor %xmm0,%xmm0
1471 movdqa %xmm1,48(%esp)
1472 paddq %xmm1,%xmm1
1473 pand %xmm3,%xmm2
1474 pcmpgtd %xmm1,%xmm0
1475 pxor %xmm2,%xmm1
1476 pshufd $19,%xmm0,%xmm7
1477 movdqa %xmm1,64(%esp)
1478 paddq %xmm1,%xmm1
1479 movups (%ebp),%xmm0
1480 pand %xmm3,%xmm7
1481 movups (%esi),%xmm2
1482 pxor %xmm1,%xmm7
1483 movl %ebx,%ecx
1484 movdqu 16(%esi),%xmm3
1485 xorps %xmm0,%xmm2
1486 movdqu 32(%esi),%xmm4
1487 pxor %xmm0,%xmm3
1488 movdqu 48(%esi),%xmm5
1489 pxor %xmm0,%xmm4
1490 movdqu 64(%esi),%xmm6
1491 pxor %xmm0,%xmm5
1492 movdqu 80(%esi),%xmm1
1493 pxor %xmm0,%xmm6
1494 leal 96(%esi),%esi
1495 pxor (%esp),%xmm2
1496 movdqa %xmm7,80(%esp)
1497 pxor %xmm1,%xmm7
1498 movups 16(%ebp),%xmm1
1499 pxor 16(%esp),%xmm3
1500 pxor 32(%esp),%xmm4
1501.byte 102,15,56,222,209
1502 pxor 48(%esp),%xmm5
1503 pxor 64(%esp),%xmm6
1504.byte 102,15,56,222,217
1505 pxor %xmm0,%xmm7
1506 movups 32(%ebp),%xmm0
1507.byte 102,15,56,222,225
1508.byte 102,15,56,222,233
1509.byte 102,15,56,222,241
1510.byte 102,15,56,222,249
1511 call L_aesni_decrypt6_enter
1512 movdqa 80(%esp),%xmm1
1513 pxor %xmm0,%xmm0
1514 xorps (%esp),%xmm2
1515 pcmpgtd %xmm1,%xmm0
1516 xorps 16(%esp),%xmm3
1517 movups %xmm2,(%edi)
1518 xorps 32(%esp),%xmm4
1519 movups %xmm3,16(%edi)
1520 xorps 48(%esp),%xmm5
1521 movups %xmm4,32(%edi)
1522 xorps 64(%esp),%xmm6
1523 movups %xmm5,48(%edi)
1524 xorps %xmm1,%xmm7
1525 movups %xmm6,64(%edi)
1526 pshufd $19,%xmm0,%xmm2
1527 movups %xmm7,80(%edi)
1528 leal 96(%edi),%edi
1529 movdqa 96(%esp),%xmm3
1530 pxor %xmm0,%xmm0
1531 paddq %xmm1,%xmm1
1532 pand %xmm3,%xmm2
1533 pcmpgtd %xmm1,%xmm0
1534 pxor %xmm2,%xmm1
1535 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001536 jnc L061xts_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001537 movl 240(%ebp),%ecx
1538 movl %ebp,%edx
1539 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -07001540L060xts_dec_short:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001541 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001542 jz L062xts_dec_done6x
Adam Langleyd9e397b2015-01-22 14:27:53 -08001543 movdqa %xmm1,%xmm5
1544 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001545 jb L063xts_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08001546 pshufd $19,%xmm0,%xmm2
1547 pxor %xmm0,%xmm0
1548 paddq %xmm1,%xmm1
1549 pand %xmm3,%xmm2
1550 pcmpgtd %xmm1,%xmm0
1551 pxor %xmm2,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001552 je L064xts_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08001553 pshufd $19,%xmm0,%xmm2
1554 pxor %xmm0,%xmm0
1555 movdqa %xmm1,%xmm6
1556 paddq %xmm1,%xmm1
1557 pand %xmm3,%xmm2
1558 pcmpgtd %xmm1,%xmm0
1559 pxor %xmm2,%xmm1
1560 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001561 jb L065xts_dec_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08001562 pshufd $19,%xmm0,%xmm2
1563 pxor %xmm0,%xmm0
1564 movdqa %xmm1,%xmm7
1565 paddq %xmm1,%xmm1
1566 pand %xmm3,%xmm2
1567 pcmpgtd %xmm1,%xmm0
1568 pxor %xmm2,%xmm1
1569 movdqa %xmm5,(%esp)
1570 movdqa %xmm6,16(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001571 je L066xts_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08001572 movdqa %xmm7,32(%esp)
1573 pshufd $19,%xmm0,%xmm7
1574 movdqa %xmm1,48(%esp)
1575 paddq %xmm1,%xmm1
1576 pand %xmm3,%xmm7
1577 pxor %xmm1,%xmm7
1578 movdqu (%esi),%xmm2
1579 movdqu 16(%esi),%xmm3
1580 movdqu 32(%esi),%xmm4
1581 pxor (%esp),%xmm2
1582 movdqu 48(%esi),%xmm5
1583 pxor 16(%esp),%xmm3
1584 movdqu 64(%esi),%xmm6
1585 pxor 32(%esp),%xmm4
1586 leal 80(%esi),%esi
1587 pxor 48(%esp),%xmm5
1588 movdqa %xmm7,64(%esp)
1589 pxor %xmm7,%xmm6
1590 call __aesni_decrypt6
1591 movaps 64(%esp),%xmm1
1592 xorps (%esp),%xmm2
1593 xorps 16(%esp),%xmm3
1594 xorps 32(%esp),%xmm4
1595 movups %xmm2,(%edi)
1596 xorps 48(%esp),%xmm5
1597 movups %xmm3,16(%edi)
1598 xorps %xmm1,%xmm6
1599 movups %xmm4,32(%edi)
1600 movups %xmm5,48(%edi)
1601 movups %xmm6,64(%edi)
1602 leal 80(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001603 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001604.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001605L063xts_dec_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001606 movups (%esi),%xmm2
1607 leal 16(%esi),%esi
1608 xorps %xmm5,%xmm2
1609 movups (%edx),%xmm0
1610 movups 16(%edx),%xmm1
1611 leal 32(%edx),%edx
1612 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001613L068dec1_loop_12:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001614.byte 102,15,56,222,209
1615 decl %ecx
1616 movups (%edx),%xmm1
1617 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001618 jnz L068dec1_loop_12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001619.byte 102,15,56,223,209
1620 xorps %xmm5,%xmm2
1621 movups %xmm2,(%edi)
1622 leal 16(%edi),%edi
1623 movdqa %xmm5,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001624 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001625.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001626L064xts_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001627 movaps %xmm1,%xmm6
1628 movups (%esi),%xmm2
1629 movups 16(%esi),%xmm3
1630 leal 32(%esi),%esi
1631 xorps %xmm5,%xmm2
1632 xorps %xmm6,%xmm3
1633 call __aesni_decrypt2
1634 xorps %xmm5,%xmm2
1635 xorps %xmm6,%xmm3
1636 movups %xmm2,(%edi)
1637 movups %xmm3,16(%edi)
1638 leal 32(%edi),%edi
1639 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001640 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001641.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001642L065xts_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001643 movaps %xmm1,%xmm7
1644 movups (%esi),%xmm2
1645 movups 16(%esi),%xmm3
1646 movups 32(%esi),%xmm4
1647 leal 48(%esi),%esi
1648 xorps %xmm5,%xmm2
1649 xorps %xmm6,%xmm3
1650 xorps %xmm7,%xmm4
1651 call __aesni_decrypt3
1652 xorps %xmm5,%xmm2
1653 xorps %xmm6,%xmm3
1654 xorps %xmm7,%xmm4
1655 movups %xmm2,(%edi)
1656 movups %xmm3,16(%edi)
1657 movups %xmm4,32(%edi)
1658 leal 48(%edi),%edi
1659 movdqa %xmm7,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001660 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001661.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001662L066xts_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001663 movaps %xmm1,%xmm6
1664 movups (%esi),%xmm2
1665 movups 16(%esi),%xmm3
1666 movups 32(%esi),%xmm4
1667 xorps (%esp),%xmm2
1668 movups 48(%esi),%xmm5
1669 leal 64(%esi),%esi
1670 xorps 16(%esp),%xmm3
1671 xorps %xmm7,%xmm4
1672 xorps %xmm6,%xmm5
1673 call __aesni_decrypt4
1674 xorps (%esp),%xmm2
1675 xorps 16(%esp),%xmm3
1676 xorps %xmm7,%xmm4
1677 movups %xmm2,(%edi)
1678 xorps %xmm6,%xmm5
1679 movups %xmm3,16(%edi)
1680 movups %xmm4,32(%edi)
1681 movups %xmm5,48(%edi)
1682 leal 64(%edi),%edi
1683 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001684 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001685.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001686L062xts_dec_done6x:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001687 movl 112(%esp),%eax
1688 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001689 jz L069xts_dec_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001690 movl %eax,112(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001691 jmp L070xts_dec_only_one_more
Adam Langleyd9e397b2015-01-22 14:27:53 -08001692.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001693L067xts_dec_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001694 movl 112(%esp),%eax
1695 pxor %xmm0,%xmm0
1696 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001697 jz L069xts_dec_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001698 pcmpgtd %xmm1,%xmm0
1699 movl %eax,112(%esp)
1700 pshufd $19,%xmm0,%xmm2
1701 pxor %xmm0,%xmm0
1702 movdqa 96(%esp),%xmm3
1703 paddq %xmm1,%xmm1
1704 pand %xmm3,%xmm2
1705 pcmpgtd %xmm1,%xmm0
1706 pxor %xmm2,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001707L070xts_dec_only_one_more:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001708 pshufd $19,%xmm0,%xmm5
1709 movdqa %xmm1,%xmm6
1710 paddq %xmm1,%xmm1
1711 pand %xmm3,%xmm5
1712 pxor %xmm1,%xmm5
1713 movl %ebp,%edx
1714 movl %ebx,%ecx
1715 movups (%esi),%xmm2
1716 xorps %xmm5,%xmm2
1717 movups (%edx),%xmm0
1718 movups 16(%edx),%xmm1
1719 leal 32(%edx),%edx
1720 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001721L071dec1_loop_13:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001722.byte 102,15,56,222,209
1723 decl %ecx
1724 movups (%edx),%xmm1
1725 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001726 jnz L071dec1_loop_13
Adam Langleyd9e397b2015-01-22 14:27:53 -08001727.byte 102,15,56,223,209
1728 xorps %xmm5,%xmm2
1729 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001730L072xts_dec_steal:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001731 movzbl 16(%esi),%ecx
1732 movzbl (%edi),%edx
1733 leal 1(%esi),%esi
1734 movb %cl,(%edi)
1735 movb %dl,16(%edi)
1736 leal 1(%edi),%edi
1737 subl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001738 jnz L072xts_dec_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001739 subl 112(%esp),%edi
1740 movl %ebp,%edx
1741 movl %ebx,%ecx
1742 movups (%edi),%xmm2
1743 xorps %xmm6,%xmm2
1744 movups (%edx),%xmm0
1745 movups 16(%edx),%xmm1
1746 leal 32(%edx),%edx
1747 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001748L073dec1_loop_14:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001749.byte 102,15,56,222,209
1750 decl %ecx
1751 movups (%edx),%xmm1
1752 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001753 jnz L073dec1_loop_14
Adam Langleyd9e397b2015-01-22 14:27:53 -08001754.byte 102,15,56,223,209
1755 xorps %xmm6,%xmm2
1756 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001757L069xts_dec_ret:
1758 pxor %xmm0,%xmm0
1759 pxor %xmm1,%xmm1
1760 pxor %xmm2,%xmm2
1761 movdqa %xmm0,(%esp)
1762 pxor %xmm3,%xmm3
1763 movdqa %xmm0,16(%esp)
1764 pxor %xmm4,%xmm4
1765 movdqa %xmm0,32(%esp)
1766 pxor %xmm5,%xmm5
1767 movdqa %xmm0,48(%esp)
1768 pxor %xmm6,%xmm6
1769 movdqa %xmm0,64(%esp)
1770 pxor %xmm7,%xmm7
1771 movdqa %xmm0,80(%esp)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001772 movl 116(%esp),%esp
1773 popl %edi
1774 popl %esi
1775 popl %ebx
1776 popl %ebp
1777 ret
1778.globl _aesni_cbc_encrypt
1779.private_extern _aesni_cbc_encrypt
1780.align 4
1781_aesni_cbc_encrypt:
1782L_aesni_cbc_encrypt_begin:
1783 pushl %ebp
1784 pushl %ebx
1785 pushl %esi
1786 pushl %edi
1787 movl 20(%esp),%esi
1788 movl %esp,%ebx
1789 movl 24(%esp),%edi
1790 subl $24,%ebx
1791 movl 28(%esp),%eax
1792 andl $-16,%ebx
1793 movl 32(%esp),%edx
1794 movl 36(%esp),%ebp
1795 testl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001796 jz L074cbc_abort
Adam Langleyd9e397b2015-01-22 14:27:53 -08001797 cmpl $0,40(%esp)
1798 xchgl %esp,%ebx
1799 movups (%ebp),%xmm7
1800 movl 240(%edx),%ecx
1801 movl %edx,%ebp
1802 movl %ebx,16(%esp)
1803 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -07001804 je L075cbc_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -08001805 movaps %xmm7,%xmm2
1806 cmpl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001807 jb L076cbc_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001808 subl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001809 jmp L077cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001810.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001811L077cbc_enc_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001812 movups (%esi),%xmm7
1813 leal 16(%esi),%esi
1814 movups (%edx),%xmm0
1815 movups 16(%edx),%xmm1
1816 xorps %xmm0,%xmm7
1817 leal 32(%edx),%edx
1818 xorps %xmm7,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001819L078enc1_loop_15:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001820.byte 102,15,56,220,209
1821 decl %ecx
1822 movups (%edx),%xmm1
1823 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001824 jnz L078enc1_loop_15
Adam Langleyd9e397b2015-01-22 14:27:53 -08001825.byte 102,15,56,221,209
1826 movl %ebx,%ecx
1827 movl %ebp,%edx
1828 movups %xmm2,(%edi)
1829 leal 16(%edi),%edi
1830 subl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001831 jnc L077cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001832 addl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001833 jnz L076cbc_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001834 movaps %xmm2,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -07001835 pxor %xmm2,%xmm2
1836 jmp L079cbc_ret
1837L076cbc_enc_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001838 movl %eax,%ecx
1839.long 2767451785
1840 movl $16,%ecx
1841 subl %eax,%ecx
1842 xorl %eax,%eax
1843.long 2868115081
1844 leal -16(%edi),%edi
1845 movl %ebx,%ecx
1846 movl %edi,%esi
1847 movl %ebp,%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001848 jmp L077cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001849.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001850L075cbc_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001851 cmpl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001852 jbe L080cbc_dec_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001853 movaps %xmm7,(%esp)
1854 subl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001855 jmp L081cbc_dec_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -08001856.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001857L082cbc_dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001858 movaps %xmm0,(%esp)
1859 movups %xmm7,(%edi)
1860 leal 16(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001861L081cbc_dec_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001862 movdqu (%esi),%xmm2
1863 movdqu 16(%esi),%xmm3
1864 movdqu 32(%esi),%xmm4
1865 movdqu 48(%esi),%xmm5
1866 movdqu 64(%esi),%xmm6
1867 movdqu 80(%esi),%xmm7
1868 call __aesni_decrypt6
1869 movups (%esi),%xmm1
1870 movups 16(%esi),%xmm0
1871 xorps (%esp),%xmm2
1872 xorps %xmm1,%xmm3
1873 movups 32(%esi),%xmm1
1874 xorps %xmm0,%xmm4
1875 movups 48(%esi),%xmm0
1876 xorps %xmm1,%xmm5
1877 movups 64(%esi),%xmm1
1878 xorps %xmm0,%xmm6
1879 movups 80(%esi),%xmm0
1880 xorps %xmm1,%xmm7
1881 movups %xmm2,(%edi)
1882 movups %xmm3,16(%edi)
1883 leal 96(%esi),%esi
1884 movups %xmm4,32(%edi)
1885 movl %ebx,%ecx
1886 movups %xmm5,48(%edi)
1887 movl %ebp,%edx
1888 movups %xmm6,64(%edi)
1889 leal 80(%edi),%edi
1890 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001891 ja L082cbc_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001892 movaps %xmm7,%xmm2
1893 movaps %xmm0,%xmm7
1894 addl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001895 jle L083cbc_dec_clear_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001896 movups %xmm2,(%edi)
1897 leal 16(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001898L080cbc_dec_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001899 movups (%esi),%xmm2
1900 movaps %xmm2,%xmm6
1901 cmpl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001902 jbe L084cbc_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08001903 movups 16(%esi),%xmm3
1904 movaps %xmm3,%xmm5
1905 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001906 jbe L085cbc_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08001907 movups 32(%esi),%xmm4
1908 cmpl $48,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001909 jbe L086cbc_dec_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08001910 movups 48(%esi),%xmm5
1911 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001912 jbe L087cbc_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08001913 movups 64(%esi),%xmm6
1914 movaps %xmm7,(%esp)
1915 movups (%esi),%xmm2
1916 xorps %xmm7,%xmm7
1917 call __aesni_decrypt6
1918 movups (%esi),%xmm1
1919 movups 16(%esi),%xmm0
1920 xorps (%esp),%xmm2
1921 xorps %xmm1,%xmm3
1922 movups 32(%esi),%xmm1
1923 xorps %xmm0,%xmm4
1924 movups 48(%esi),%xmm0
1925 xorps %xmm1,%xmm5
1926 movups 64(%esi),%xmm7
1927 xorps %xmm0,%xmm6
1928 movups %xmm2,(%edi)
1929 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001930 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001931 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001932 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001933 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001934 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08001935 leal 64(%edi),%edi
1936 movaps %xmm6,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001937 pxor %xmm6,%xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001938 subl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001939 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001940.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001941L084cbc_dec_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001942 movups (%edx),%xmm0
1943 movups 16(%edx),%xmm1
1944 leal 32(%edx),%edx
1945 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001946L089dec1_loop_16:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001947.byte 102,15,56,222,209
1948 decl %ecx
1949 movups (%edx),%xmm1
1950 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001951 jnz L089dec1_loop_16
Adam Langleyd9e397b2015-01-22 14:27:53 -08001952.byte 102,15,56,223,209
1953 xorps %xmm7,%xmm2
1954 movaps %xmm6,%xmm7
1955 subl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001956 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001957.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001958L085cbc_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001959 call __aesni_decrypt2
1960 xorps %xmm7,%xmm2
1961 xorps %xmm6,%xmm3
1962 movups %xmm2,(%edi)
1963 movaps %xmm3,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001964 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001965 leal 16(%edi),%edi
1966 movaps %xmm5,%xmm7
1967 subl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001968 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001969.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001970L086cbc_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001971 call __aesni_decrypt3
1972 xorps %xmm7,%xmm2
1973 xorps %xmm6,%xmm3
1974 xorps %xmm5,%xmm4
1975 movups %xmm2,(%edi)
1976 movaps %xmm4,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001977 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001978 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001979 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001980 leal 32(%edi),%edi
1981 movups 32(%esi),%xmm7
1982 subl $48,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001983 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001984.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001985L087cbc_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001986 call __aesni_decrypt4
1987 movups 16(%esi),%xmm1
1988 movups 32(%esi),%xmm0
1989 xorps %xmm7,%xmm2
1990 movups 48(%esi),%xmm7
1991 xorps %xmm6,%xmm3
1992 movups %xmm2,(%edi)
1993 xorps %xmm1,%xmm4
1994 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001995 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001996 xorps %xmm0,%xmm5
1997 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001998 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001999 leal 48(%edi),%edi
2000 movaps %xmm5,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07002001 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08002002 subl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002003 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08002004.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002005L083cbc_dec_clear_tail_collected:
2006 pxor %xmm3,%xmm3
2007 pxor %xmm4,%xmm4
2008 pxor %xmm5,%xmm5
2009 pxor %xmm6,%xmm6
2010L088cbc_dec_tail_collected:
2011 andl $15,%eax
2012 jnz L090cbc_dec_tail_partial
2013 movups %xmm2,(%edi)
2014 pxor %xmm0,%xmm0
2015 jmp L079cbc_ret
2016.align 4,0x90
2017L090cbc_dec_tail_partial:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002018 movaps %xmm2,(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07002019 pxor %xmm0,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002020 movl $16,%ecx
2021 movl %esp,%esi
2022 subl %eax,%ecx
2023.long 2767451785
Adam Langleye9ada862015-05-11 17:20:37 -07002024 movdqa %xmm2,(%esp)
2025L079cbc_ret:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002026 movl 16(%esp),%esp
2027 movl 36(%esp),%ebp
Adam Langleye9ada862015-05-11 17:20:37 -07002028 pxor %xmm2,%xmm2
2029 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08002030 movups %xmm7,(%ebp)
Adam Langleye9ada862015-05-11 17:20:37 -07002031 pxor %xmm7,%xmm7
2032L074cbc_abort:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002033 popl %edi
2034 popl %esi
2035 popl %ebx
2036 popl %ebp
2037 ret
2038.private_extern __aesni_set_encrypt_key
2039.align 4
2040__aesni_set_encrypt_key:
Adam Langleye9ada862015-05-11 17:20:37 -07002041 pushl %ebp
2042 pushl %ebx
Adam Langleyd9e397b2015-01-22 14:27:53 -08002043 testl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002044 jz L091bad_pointer
Adam Langleyd9e397b2015-01-22 14:27:53 -08002045 testl %edx,%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002046 jz L091bad_pointer
2047 call L092pic
2048L092pic:
2049 popl %ebx
2050 leal Lkey_const-L092pic(%ebx),%ebx
2051 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002052 movups (%eax),%xmm0
2053 xorps %xmm4,%xmm4
Adam Langleye9ada862015-05-11 17:20:37 -07002054 movl 4(%ebp),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002055 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002056 andl $268437504,%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002057 cmpl $256,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -07002058 je L09314rounds
Adam Langleyd9e397b2015-01-22 14:27:53 -08002059 cmpl $192,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -07002060 je L09412rounds
Adam Langleyd9e397b2015-01-22 14:27:53 -08002061 cmpl $128,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -07002062 jne L095bad_keybits
Adam Langleyd9e397b2015-01-22 14:27:53 -08002063.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002064L09610rounds:
2065 cmpl $268435456,%ebp
2066 je L09710rounds_alt
Adam Langleyd9e397b2015-01-22 14:27:53 -08002067 movl $9,%ecx
2068 movups %xmm0,-16(%edx)
2069.byte 102,15,58,223,200,1
Adam Langleye9ada862015-05-11 17:20:37 -07002070 call L098key_128_cold
Adam Langleyd9e397b2015-01-22 14:27:53 -08002071.byte 102,15,58,223,200,2
Adam Langleye9ada862015-05-11 17:20:37 -07002072 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002073.byte 102,15,58,223,200,4
Adam Langleye9ada862015-05-11 17:20:37 -07002074 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002075.byte 102,15,58,223,200,8
Adam Langleye9ada862015-05-11 17:20:37 -07002076 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002077.byte 102,15,58,223,200,16
Adam Langleye9ada862015-05-11 17:20:37 -07002078 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002079.byte 102,15,58,223,200,32
Adam Langleye9ada862015-05-11 17:20:37 -07002080 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002081.byte 102,15,58,223,200,64
Adam Langleye9ada862015-05-11 17:20:37 -07002082 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002083.byte 102,15,58,223,200,128
Adam Langleye9ada862015-05-11 17:20:37 -07002084 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002085.byte 102,15,58,223,200,27
Adam Langleye9ada862015-05-11 17:20:37 -07002086 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002087.byte 102,15,58,223,200,54
Adam Langleye9ada862015-05-11 17:20:37 -07002088 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002089 movups %xmm0,(%edx)
2090 movl %ecx,80(%edx)
Adam Langleye9ada862015-05-11 17:20:37 -07002091 jmp L100good_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08002092.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002093L099key_128:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002094 movups %xmm0,(%edx)
2095 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002096L098key_128_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002097 shufps $16,%xmm0,%xmm4
2098 xorps %xmm4,%xmm0
2099 shufps $140,%xmm0,%xmm4
2100 xorps %xmm4,%xmm0
2101 shufps $255,%xmm1,%xmm1
2102 xorps %xmm1,%xmm0
2103 ret
2104.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002105L09710rounds_alt:
2106 movdqa (%ebx),%xmm5
2107 movl $8,%ecx
2108 movdqa 32(%ebx),%xmm4
2109 movdqa %xmm0,%xmm2
2110 movdqu %xmm0,-16(%edx)
2111L101loop_key128:
2112.byte 102,15,56,0,197
2113.byte 102,15,56,221,196
2114 pslld $1,%xmm4
2115 leal 16(%edx),%edx
2116 movdqa %xmm2,%xmm3
2117 pslldq $4,%xmm2
2118 pxor %xmm2,%xmm3
2119 pslldq $4,%xmm2
2120 pxor %xmm2,%xmm3
2121 pslldq $4,%xmm2
2122 pxor %xmm3,%xmm2
2123 pxor %xmm2,%xmm0
2124 movdqu %xmm0,-16(%edx)
2125 movdqa %xmm0,%xmm2
2126 decl %ecx
2127 jnz L101loop_key128
2128 movdqa 48(%ebx),%xmm4
2129.byte 102,15,56,0,197
2130.byte 102,15,56,221,196
2131 pslld $1,%xmm4
2132 movdqa %xmm2,%xmm3
2133 pslldq $4,%xmm2
2134 pxor %xmm2,%xmm3
2135 pslldq $4,%xmm2
2136 pxor %xmm2,%xmm3
2137 pslldq $4,%xmm2
2138 pxor %xmm3,%xmm2
2139 pxor %xmm2,%xmm0
2140 movdqu %xmm0,(%edx)
2141 movdqa %xmm0,%xmm2
2142.byte 102,15,56,0,197
2143.byte 102,15,56,221,196
2144 movdqa %xmm2,%xmm3
2145 pslldq $4,%xmm2
2146 pxor %xmm2,%xmm3
2147 pslldq $4,%xmm2
2148 pxor %xmm2,%xmm3
2149 pslldq $4,%xmm2
2150 pxor %xmm3,%xmm2
2151 pxor %xmm2,%xmm0
2152 movdqu %xmm0,16(%edx)
2153 movl $9,%ecx
2154 movl %ecx,96(%edx)
2155 jmp L100good_key
2156.align 4,0x90
2157L09412rounds:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002158 movq 16(%eax),%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07002159 cmpl $268435456,%ebp
2160 je L10212rounds_alt
Adam Langleyd9e397b2015-01-22 14:27:53 -08002161 movl $11,%ecx
2162 movups %xmm0,-16(%edx)
2163.byte 102,15,58,223,202,1
Adam Langleye9ada862015-05-11 17:20:37 -07002164 call L103key_192a_cold
Adam Langleyd9e397b2015-01-22 14:27:53 -08002165.byte 102,15,58,223,202,2
Adam Langleye9ada862015-05-11 17:20:37 -07002166 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002167.byte 102,15,58,223,202,4
Adam Langleye9ada862015-05-11 17:20:37 -07002168 call L105key_192a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002169.byte 102,15,58,223,202,8
Adam Langleye9ada862015-05-11 17:20:37 -07002170 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002171.byte 102,15,58,223,202,16
Adam Langleye9ada862015-05-11 17:20:37 -07002172 call L105key_192a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002173.byte 102,15,58,223,202,32
Adam Langleye9ada862015-05-11 17:20:37 -07002174 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002175.byte 102,15,58,223,202,64
Adam Langleye9ada862015-05-11 17:20:37 -07002176 call L105key_192a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002177.byte 102,15,58,223,202,128
Adam Langleye9ada862015-05-11 17:20:37 -07002178 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002179 movups %xmm0,(%edx)
2180 movl %ecx,48(%edx)
Adam Langleye9ada862015-05-11 17:20:37 -07002181 jmp L100good_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08002182.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002183L105key_192a:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002184 movups %xmm0,(%edx)
2185 leal 16(%edx),%edx
2186.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002187L103key_192a_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002188 movaps %xmm2,%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07002189L106key_192b_warm:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002190 shufps $16,%xmm0,%xmm4
2191 movdqa %xmm2,%xmm3
2192 xorps %xmm4,%xmm0
2193 shufps $140,%xmm0,%xmm4
2194 pslldq $4,%xmm3
2195 xorps %xmm4,%xmm0
2196 pshufd $85,%xmm1,%xmm1
2197 pxor %xmm3,%xmm2
2198 pxor %xmm1,%xmm0
2199 pshufd $255,%xmm0,%xmm3
2200 pxor %xmm3,%xmm2
2201 ret
2202.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002203L104key_192b:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002204 movaps %xmm0,%xmm3
2205 shufps $68,%xmm0,%xmm5
2206 movups %xmm5,(%edx)
2207 shufps $78,%xmm2,%xmm3
2208 movups %xmm3,16(%edx)
2209 leal 32(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002210 jmp L106key_192b_warm
Adam Langleyd9e397b2015-01-22 14:27:53 -08002211.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002212L10212rounds_alt:
2213 movdqa 16(%ebx),%xmm5
2214 movdqa 32(%ebx),%xmm4
2215 movl $8,%ecx
2216 movdqu %xmm0,-16(%edx)
2217L107loop_key192:
2218 movq %xmm2,(%edx)
2219 movdqa %xmm2,%xmm1
2220.byte 102,15,56,0,213
2221.byte 102,15,56,221,212
2222 pslld $1,%xmm4
2223 leal 24(%edx),%edx
2224 movdqa %xmm0,%xmm3
2225 pslldq $4,%xmm0
2226 pxor %xmm0,%xmm3
2227 pslldq $4,%xmm0
2228 pxor %xmm0,%xmm3
2229 pslldq $4,%xmm0
2230 pxor %xmm3,%xmm0
2231 pshufd $255,%xmm0,%xmm3
2232 pxor %xmm1,%xmm3
2233 pslldq $4,%xmm1
2234 pxor %xmm1,%xmm3
2235 pxor %xmm2,%xmm0
2236 pxor %xmm3,%xmm2
2237 movdqu %xmm0,-16(%edx)
2238 decl %ecx
2239 jnz L107loop_key192
2240 movl $11,%ecx
2241 movl %ecx,32(%edx)
2242 jmp L100good_key
2243.align 4,0x90
2244L09314rounds:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002245 movups 16(%eax),%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002246 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002247 cmpl $268435456,%ebp
2248 je L10814rounds_alt
2249 movl $13,%ecx
Adam Langleyd9e397b2015-01-22 14:27:53 -08002250 movups %xmm0,-32(%edx)
2251 movups %xmm2,-16(%edx)
2252.byte 102,15,58,223,202,1
Adam Langleye9ada862015-05-11 17:20:37 -07002253 call L109key_256a_cold
Adam Langleyd9e397b2015-01-22 14:27:53 -08002254.byte 102,15,58,223,200,1
Adam Langleye9ada862015-05-11 17:20:37 -07002255 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002256.byte 102,15,58,223,202,2
Adam Langleye9ada862015-05-11 17:20:37 -07002257 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002258.byte 102,15,58,223,200,2
Adam Langleye9ada862015-05-11 17:20:37 -07002259 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002260.byte 102,15,58,223,202,4
Adam Langleye9ada862015-05-11 17:20:37 -07002261 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002262.byte 102,15,58,223,200,4
Adam Langleye9ada862015-05-11 17:20:37 -07002263 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002264.byte 102,15,58,223,202,8
Adam Langleye9ada862015-05-11 17:20:37 -07002265 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002266.byte 102,15,58,223,200,8
Adam Langleye9ada862015-05-11 17:20:37 -07002267 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002268.byte 102,15,58,223,202,16
Adam Langleye9ada862015-05-11 17:20:37 -07002269 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002270.byte 102,15,58,223,200,16
Adam Langleye9ada862015-05-11 17:20:37 -07002271 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002272.byte 102,15,58,223,202,32
Adam Langleye9ada862015-05-11 17:20:37 -07002273 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002274.byte 102,15,58,223,200,32
Adam Langleye9ada862015-05-11 17:20:37 -07002275 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002276.byte 102,15,58,223,202,64
Adam Langleye9ada862015-05-11 17:20:37 -07002277 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002278 movups %xmm0,(%edx)
2279 movl %ecx,16(%edx)
2280 xorl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002281 jmp L100good_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08002282.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002283L111key_256a:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002284 movups %xmm2,(%edx)
2285 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002286L109key_256a_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002287 shufps $16,%xmm0,%xmm4
2288 xorps %xmm4,%xmm0
2289 shufps $140,%xmm0,%xmm4
2290 xorps %xmm4,%xmm0
2291 shufps $255,%xmm1,%xmm1
2292 xorps %xmm1,%xmm0
2293 ret
2294.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002295L110key_256b:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002296 movups %xmm0,(%edx)
2297 leal 16(%edx),%edx
2298 shufps $16,%xmm2,%xmm4
2299 xorps %xmm4,%xmm2
2300 shufps $140,%xmm2,%xmm4
2301 xorps %xmm4,%xmm2
2302 shufps $170,%xmm1,%xmm1
2303 xorps %xmm1,%xmm2
2304 ret
Adam Langleye9ada862015-05-11 17:20:37 -07002305.align 4,0x90
2306L10814rounds_alt:
2307 movdqa (%ebx),%xmm5
2308 movdqa 32(%ebx),%xmm4
2309 movl $7,%ecx
2310 movdqu %xmm0,-32(%edx)
2311 movdqa %xmm2,%xmm1
2312 movdqu %xmm2,-16(%edx)
2313L112loop_key256:
2314.byte 102,15,56,0,213
2315.byte 102,15,56,221,212
2316 movdqa %xmm0,%xmm3
2317 pslldq $4,%xmm0
2318 pxor %xmm0,%xmm3
2319 pslldq $4,%xmm0
2320 pxor %xmm0,%xmm3
2321 pslldq $4,%xmm0
2322 pxor %xmm3,%xmm0
2323 pslld $1,%xmm4
2324 pxor %xmm2,%xmm0
2325 movdqu %xmm0,(%edx)
2326 decl %ecx
2327 jz L113done_key256
2328 pshufd $255,%xmm0,%xmm2
2329 pxor %xmm3,%xmm3
2330.byte 102,15,56,221,211
2331 movdqa %xmm1,%xmm3
2332 pslldq $4,%xmm1
2333 pxor %xmm1,%xmm3
2334 pslldq $4,%xmm1
2335 pxor %xmm1,%xmm3
2336 pslldq $4,%xmm1
2337 pxor %xmm3,%xmm1
2338 pxor %xmm1,%xmm2
2339 movdqu %xmm2,16(%edx)
2340 leal 32(%edx),%edx
2341 movdqa %xmm2,%xmm1
2342 jmp L112loop_key256
2343L113done_key256:
2344 movl $13,%ecx
2345 movl %ecx,16(%edx)
2346L100good_key:
2347 pxor %xmm0,%xmm0
2348 pxor %xmm1,%xmm1
2349 pxor %xmm2,%xmm2
2350 pxor %xmm3,%xmm3
2351 pxor %xmm4,%xmm4
2352 pxor %xmm5,%xmm5
2353 xorl %eax,%eax
2354 popl %ebx
2355 popl %ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002356 ret
2357.align 2,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002358L091bad_pointer:
2359 movl $-1,%eax
2360 popl %ebx
2361 popl %ebp
2362 ret
2363.align 2,0x90
2364L095bad_keybits:
2365 pxor %xmm0,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002366 movl $-2,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002367 popl %ebx
2368 popl %ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002369 ret
2370.globl _aesni_set_encrypt_key
2371.private_extern _aesni_set_encrypt_key
2372.align 4
2373_aesni_set_encrypt_key:
2374L_aesni_set_encrypt_key_begin:
2375 movl 4(%esp),%eax
2376 movl 8(%esp),%ecx
2377 movl 12(%esp),%edx
2378 call __aesni_set_encrypt_key
2379 ret
2380.globl _aesni_set_decrypt_key
2381.private_extern _aesni_set_decrypt_key
2382.align 4
2383_aesni_set_decrypt_key:
2384L_aesni_set_decrypt_key_begin:
2385 movl 4(%esp),%eax
2386 movl 8(%esp),%ecx
2387 movl 12(%esp),%edx
2388 call __aesni_set_encrypt_key
2389 movl 12(%esp),%edx
2390 shll $4,%ecx
2391 testl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002392 jnz L114dec_key_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08002393 leal 16(%edx,%ecx,1),%eax
2394 movups (%edx),%xmm0
2395 movups (%eax),%xmm1
2396 movups %xmm0,(%eax)
2397 movups %xmm1,(%edx)
2398 leal 16(%edx),%edx
2399 leal -16(%eax),%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002400L115dec_key_inverse:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002401 movups (%edx),%xmm0
2402 movups (%eax),%xmm1
2403.byte 102,15,56,219,192
2404.byte 102,15,56,219,201
2405 leal 16(%edx),%edx
2406 leal -16(%eax),%eax
2407 movups %xmm0,16(%eax)
2408 movups %xmm1,-16(%edx)
2409 cmpl %edx,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002410 ja L115dec_key_inverse
Adam Langleyd9e397b2015-01-22 14:27:53 -08002411 movups (%edx),%xmm0
2412.byte 102,15,56,219,192
2413 movups %xmm0,(%edx)
Adam Langleye9ada862015-05-11 17:20:37 -07002414 pxor %xmm0,%xmm0
2415 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08002416 xorl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002417L114dec_key_ret:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002418 ret
Adam Langleye9ada862015-05-11 17:20:37 -07002419.align 6,0x90
2420Lkey_const:
2421.long 202313229,202313229,202313229,202313229
2422.long 67569157,67569157,67569157,67569157
2423.long 1,1,1,1
2424.long 27,27,27,27
Adam Langleyd9e397b2015-01-22 14:27:53 -08002425.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2426.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2427.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2428.byte 115,108,46,111,114,103,62,0
Adam Langleye9ada862015-05-11 17:20:37 -07002429.section __IMPORT,__pointers,non_lazy_symbol_pointers
2430L_OPENSSL_ia32cap_P$non_lazy_ptr:
2431.indirect_symbol _OPENSSL_ia32cap_P
2432.long 0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002433#endif