blob: 3fe0e7543fcff6476f4b316611de88d09c5350e5 [file] [log] [blame]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001#if defined(__i386__)
Adam Langleyd9e397b2015-01-22 14:27:53 -08002.text
3.globl _aesni_encrypt
4.private_extern _aesni_encrypt
5.align 4
6_aesni_encrypt:
7L_aesni_encrypt_begin:
8 movl 4(%esp),%eax
9 movl 12(%esp),%edx
10 movups (%eax),%xmm2
11 movl 240(%edx),%ecx
12 movl 8(%esp),%eax
13 movups (%edx),%xmm0
14 movups 16(%edx),%xmm1
15 leal 32(%edx),%edx
16 xorps %xmm0,%xmm2
17L000enc1_loop_1:
18.byte 102,15,56,220,209
19 decl %ecx
20 movups (%edx),%xmm1
21 leal 16(%edx),%edx
22 jnz L000enc1_loop_1
23.byte 102,15,56,221,209
Adam Langleye9ada862015-05-11 17:20:37 -070024 pxor %xmm0,%xmm0
25 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -080026 movups %xmm2,(%eax)
Adam Langleye9ada862015-05-11 17:20:37 -070027 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080028 ret
29.globl _aesni_decrypt
30.private_extern _aesni_decrypt
31.align 4
32_aesni_decrypt:
33L_aesni_decrypt_begin:
34 movl 4(%esp),%eax
35 movl 12(%esp),%edx
36 movups (%eax),%xmm2
37 movl 240(%edx),%ecx
38 movl 8(%esp),%eax
39 movups (%edx),%xmm0
40 movups 16(%edx),%xmm1
41 leal 32(%edx),%edx
42 xorps %xmm0,%xmm2
43L001dec1_loop_2:
44.byte 102,15,56,222,209
45 decl %ecx
46 movups (%edx),%xmm1
47 leal 16(%edx),%edx
48 jnz L001dec1_loop_2
49.byte 102,15,56,223,209
Adam Langleye9ada862015-05-11 17:20:37 -070050 pxor %xmm0,%xmm0
51 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -080052 movups %xmm2,(%eax)
Adam Langleye9ada862015-05-11 17:20:37 -070053 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080054 ret
55.private_extern __aesni_encrypt2
56.align 4
57__aesni_encrypt2:
58 movups (%edx),%xmm0
59 shll $4,%ecx
60 movups 16(%edx),%xmm1
61 xorps %xmm0,%xmm2
62 pxor %xmm0,%xmm3
63 movups 32(%edx),%xmm0
64 leal 32(%edx,%ecx,1),%edx
65 negl %ecx
66 addl $16,%ecx
67L002enc2_loop:
68.byte 102,15,56,220,209
69.byte 102,15,56,220,217
70 movups (%edx,%ecx,1),%xmm1
71 addl $32,%ecx
72.byte 102,15,56,220,208
73.byte 102,15,56,220,216
74 movups -16(%edx,%ecx,1),%xmm0
75 jnz L002enc2_loop
76.byte 102,15,56,220,209
77.byte 102,15,56,220,217
78.byte 102,15,56,221,208
79.byte 102,15,56,221,216
80 ret
81.private_extern __aesni_decrypt2
82.align 4
83__aesni_decrypt2:
84 movups (%edx),%xmm0
85 shll $4,%ecx
86 movups 16(%edx),%xmm1
87 xorps %xmm0,%xmm2
88 pxor %xmm0,%xmm3
89 movups 32(%edx),%xmm0
90 leal 32(%edx,%ecx,1),%edx
91 negl %ecx
92 addl $16,%ecx
93L003dec2_loop:
94.byte 102,15,56,222,209
95.byte 102,15,56,222,217
96 movups (%edx,%ecx,1),%xmm1
97 addl $32,%ecx
98.byte 102,15,56,222,208
99.byte 102,15,56,222,216
100 movups -16(%edx,%ecx,1),%xmm0
101 jnz L003dec2_loop
102.byte 102,15,56,222,209
103.byte 102,15,56,222,217
104.byte 102,15,56,223,208
105.byte 102,15,56,223,216
106 ret
107.private_extern __aesni_encrypt3
108.align 4
109__aesni_encrypt3:
110 movups (%edx),%xmm0
111 shll $4,%ecx
112 movups 16(%edx),%xmm1
113 xorps %xmm0,%xmm2
114 pxor %xmm0,%xmm3
115 pxor %xmm0,%xmm4
116 movups 32(%edx),%xmm0
117 leal 32(%edx,%ecx,1),%edx
118 negl %ecx
119 addl $16,%ecx
120L004enc3_loop:
121.byte 102,15,56,220,209
122.byte 102,15,56,220,217
123.byte 102,15,56,220,225
124 movups (%edx,%ecx,1),%xmm1
125 addl $32,%ecx
126.byte 102,15,56,220,208
127.byte 102,15,56,220,216
128.byte 102,15,56,220,224
129 movups -16(%edx,%ecx,1),%xmm0
130 jnz L004enc3_loop
131.byte 102,15,56,220,209
132.byte 102,15,56,220,217
133.byte 102,15,56,220,225
134.byte 102,15,56,221,208
135.byte 102,15,56,221,216
136.byte 102,15,56,221,224
137 ret
138.private_extern __aesni_decrypt3
139.align 4
140__aesni_decrypt3:
141 movups (%edx),%xmm0
142 shll $4,%ecx
143 movups 16(%edx),%xmm1
144 xorps %xmm0,%xmm2
145 pxor %xmm0,%xmm3
146 pxor %xmm0,%xmm4
147 movups 32(%edx),%xmm0
148 leal 32(%edx,%ecx,1),%edx
149 negl %ecx
150 addl $16,%ecx
151L005dec3_loop:
152.byte 102,15,56,222,209
153.byte 102,15,56,222,217
154.byte 102,15,56,222,225
155 movups (%edx,%ecx,1),%xmm1
156 addl $32,%ecx
157.byte 102,15,56,222,208
158.byte 102,15,56,222,216
159.byte 102,15,56,222,224
160 movups -16(%edx,%ecx,1),%xmm0
161 jnz L005dec3_loop
162.byte 102,15,56,222,209
163.byte 102,15,56,222,217
164.byte 102,15,56,222,225
165.byte 102,15,56,223,208
166.byte 102,15,56,223,216
167.byte 102,15,56,223,224
168 ret
169.private_extern __aesni_encrypt4
170.align 4
171__aesni_encrypt4:
172 movups (%edx),%xmm0
173 movups 16(%edx),%xmm1
174 shll $4,%ecx
175 xorps %xmm0,%xmm2
176 pxor %xmm0,%xmm3
177 pxor %xmm0,%xmm4
178 pxor %xmm0,%xmm5
179 movups 32(%edx),%xmm0
180 leal 32(%edx,%ecx,1),%edx
181 negl %ecx
182.byte 15,31,64,0
183 addl $16,%ecx
184L006enc4_loop:
185.byte 102,15,56,220,209
186.byte 102,15,56,220,217
187.byte 102,15,56,220,225
188.byte 102,15,56,220,233
189 movups (%edx,%ecx,1),%xmm1
190 addl $32,%ecx
191.byte 102,15,56,220,208
192.byte 102,15,56,220,216
193.byte 102,15,56,220,224
194.byte 102,15,56,220,232
195 movups -16(%edx,%ecx,1),%xmm0
196 jnz L006enc4_loop
197.byte 102,15,56,220,209
198.byte 102,15,56,220,217
199.byte 102,15,56,220,225
200.byte 102,15,56,220,233
201.byte 102,15,56,221,208
202.byte 102,15,56,221,216
203.byte 102,15,56,221,224
204.byte 102,15,56,221,232
205 ret
206.private_extern __aesni_decrypt4
207.align 4
208__aesni_decrypt4:
209 movups (%edx),%xmm0
210 movups 16(%edx),%xmm1
211 shll $4,%ecx
212 xorps %xmm0,%xmm2
213 pxor %xmm0,%xmm3
214 pxor %xmm0,%xmm4
215 pxor %xmm0,%xmm5
216 movups 32(%edx),%xmm0
217 leal 32(%edx,%ecx,1),%edx
218 negl %ecx
219.byte 15,31,64,0
220 addl $16,%ecx
221L007dec4_loop:
222.byte 102,15,56,222,209
223.byte 102,15,56,222,217
224.byte 102,15,56,222,225
225.byte 102,15,56,222,233
226 movups (%edx,%ecx,1),%xmm1
227 addl $32,%ecx
228.byte 102,15,56,222,208
229.byte 102,15,56,222,216
230.byte 102,15,56,222,224
231.byte 102,15,56,222,232
232 movups -16(%edx,%ecx,1),%xmm0
233 jnz L007dec4_loop
234.byte 102,15,56,222,209
235.byte 102,15,56,222,217
236.byte 102,15,56,222,225
237.byte 102,15,56,222,233
238.byte 102,15,56,223,208
239.byte 102,15,56,223,216
240.byte 102,15,56,223,224
241.byte 102,15,56,223,232
242 ret
243.private_extern __aesni_encrypt6
244.align 4
245__aesni_encrypt6:
246 movups (%edx),%xmm0
247 shll $4,%ecx
248 movups 16(%edx),%xmm1
249 xorps %xmm0,%xmm2
250 pxor %xmm0,%xmm3
251 pxor %xmm0,%xmm4
252.byte 102,15,56,220,209
253 pxor %xmm0,%xmm5
254 pxor %xmm0,%xmm6
255.byte 102,15,56,220,217
256 leal 32(%edx,%ecx,1),%edx
257 negl %ecx
258.byte 102,15,56,220,225
259 pxor %xmm0,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -0700260 movups (%edx,%ecx,1),%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800261 addl $16,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700262 jmp L008_aesni_encrypt6_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800263.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700264L009enc6_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800265.byte 102,15,56,220,209
266.byte 102,15,56,220,217
267.byte 102,15,56,220,225
Adam Langleye9ada862015-05-11 17:20:37 -0700268L008_aesni_encrypt6_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800269.byte 102,15,56,220,233
270.byte 102,15,56,220,241
271.byte 102,15,56,220,249
272L_aesni_encrypt6_enter:
273 movups (%edx,%ecx,1),%xmm1
274 addl $32,%ecx
275.byte 102,15,56,220,208
276.byte 102,15,56,220,216
277.byte 102,15,56,220,224
278.byte 102,15,56,220,232
279.byte 102,15,56,220,240
280.byte 102,15,56,220,248
281 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700282 jnz L009enc6_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800283.byte 102,15,56,220,209
284.byte 102,15,56,220,217
285.byte 102,15,56,220,225
286.byte 102,15,56,220,233
287.byte 102,15,56,220,241
288.byte 102,15,56,220,249
289.byte 102,15,56,221,208
290.byte 102,15,56,221,216
291.byte 102,15,56,221,224
292.byte 102,15,56,221,232
293.byte 102,15,56,221,240
294.byte 102,15,56,221,248
295 ret
296.private_extern __aesni_decrypt6
297.align 4
298__aesni_decrypt6:
299 movups (%edx),%xmm0
300 shll $4,%ecx
301 movups 16(%edx),%xmm1
302 xorps %xmm0,%xmm2
303 pxor %xmm0,%xmm3
304 pxor %xmm0,%xmm4
305.byte 102,15,56,222,209
306 pxor %xmm0,%xmm5
307 pxor %xmm0,%xmm6
308.byte 102,15,56,222,217
309 leal 32(%edx,%ecx,1),%edx
310 negl %ecx
311.byte 102,15,56,222,225
312 pxor %xmm0,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -0700313 movups (%edx,%ecx,1),%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800314 addl $16,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700315 jmp L010_aesni_decrypt6_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800316.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700317L011dec6_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800318.byte 102,15,56,222,209
319.byte 102,15,56,222,217
320.byte 102,15,56,222,225
Adam Langleye9ada862015-05-11 17:20:37 -0700321L010_aesni_decrypt6_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800322.byte 102,15,56,222,233
323.byte 102,15,56,222,241
324.byte 102,15,56,222,249
325L_aesni_decrypt6_enter:
326 movups (%edx,%ecx,1),%xmm1
327 addl $32,%ecx
328.byte 102,15,56,222,208
329.byte 102,15,56,222,216
330.byte 102,15,56,222,224
331.byte 102,15,56,222,232
332.byte 102,15,56,222,240
333.byte 102,15,56,222,248
334 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700335 jnz L011dec6_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800336.byte 102,15,56,222,209
337.byte 102,15,56,222,217
338.byte 102,15,56,222,225
339.byte 102,15,56,222,233
340.byte 102,15,56,222,241
341.byte 102,15,56,222,249
342.byte 102,15,56,223,208
343.byte 102,15,56,223,216
344.byte 102,15,56,223,224
345.byte 102,15,56,223,232
346.byte 102,15,56,223,240
347.byte 102,15,56,223,248
348 ret
349.globl _aesni_ecb_encrypt
350.private_extern _aesni_ecb_encrypt
351.align 4
352_aesni_ecb_encrypt:
353L_aesni_ecb_encrypt_begin:
354 pushl %ebp
355 pushl %ebx
356 pushl %esi
357 pushl %edi
358 movl 20(%esp),%esi
359 movl 24(%esp),%edi
360 movl 28(%esp),%eax
361 movl 32(%esp),%edx
362 movl 36(%esp),%ebx
363 andl $-16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700364 jz L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800365 movl 240(%edx),%ecx
366 testl %ebx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -0700367 jz L013ecb_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -0800368 movl %edx,%ebp
369 movl %ecx,%ebx
370 cmpl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700371 jb L014ecb_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800372 movdqu (%esi),%xmm2
373 movdqu 16(%esi),%xmm3
374 movdqu 32(%esi),%xmm4
375 movdqu 48(%esi),%xmm5
376 movdqu 64(%esi),%xmm6
377 movdqu 80(%esi),%xmm7
378 leal 96(%esi),%esi
379 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700380 jmp L015ecb_enc_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800381.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700382L016ecb_enc_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800383 movups %xmm2,(%edi)
384 movdqu (%esi),%xmm2
385 movups %xmm3,16(%edi)
386 movdqu 16(%esi),%xmm3
387 movups %xmm4,32(%edi)
388 movdqu 32(%esi),%xmm4
389 movups %xmm5,48(%edi)
390 movdqu 48(%esi),%xmm5
391 movups %xmm6,64(%edi)
392 movdqu 64(%esi),%xmm6
393 movups %xmm7,80(%edi)
394 leal 96(%edi),%edi
395 movdqu 80(%esi),%xmm7
396 leal 96(%esi),%esi
Adam Langleye9ada862015-05-11 17:20:37 -0700397L015ecb_enc_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800398 call __aesni_encrypt6
399 movl %ebp,%edx
400 movl %ebx,%ecx
401 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700402 jnc L016ecb_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800403 movups %xmm2,(%edi)
404 movups %xmm3,16(%edi)
405 movups %xmm4,32(%edi)
406 movups %xmm5,48(%edi)
407 movups %xmm6,64(%edi)
408 movups %xmm7,80(%edi)
409 leal 96(%edi),%edi
410 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700411 jz L012ecb_ret
412L014ecb_enc_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800413 movups (%esi),%xmm2
414 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700415 jb L017ecb_enc_one
Adam Langleyd9e397b2015-01-22 14:27:53 -0800416 movups 16(%esi),%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700417 je L018ecb_enc_two
Adam Langleyd9e397b2015-01-22 14:27:53 -0800418 movups 32(%esi),%xmm4
419 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700420 jb L019ecb_enc_three
Adam Langleyd9e397b2015-01-22 14:27:53 -0800421 movups 48(%esi),%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -0700422 je L020ecb_enc_four
Adam Langleyd9e397b2015-01-22 14:27:53 -0800423 movups 64(%esi),%xmm6
424 xorps %xmm7,%xmm7
425 call __aesni_encrypt6
426 movups %xmm2,(%edi)
427 movups %xmm3,16(%edi)
428 movups %xmm4,32(%edi)
429 movups %xmm5,48(%edi)
430 movups %xmm6,64(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700431 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800432.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700433L017ecb_enc_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800434 movups (%edx),%xmm0
435 movups 16(%edx),%xmm1
436 leal 32(%edx),%edx
437 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700438L021enc1_loop_3:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800439.byte 102,15,56,220,209
440 decl %ecx
441 movups (%edx),%xmm1
442 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700443 jnz L021enc1_loop_3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800444.byte 102,15,56,221,209
445 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700446 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800447.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700448L018ecb_enc_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800449 call __aesni_encrypt2
450 movups %xmm2,(%edi)
451 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700452 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800453.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700454L019ecb_enc_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800455 call __aesni_encrypt3
456 movups %xmm2,(%edi)
457 movups %xmm3,16(%edi)
458 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700459 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800460.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700461L020ecb_enc_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800462 call __aesni_encrypt4
463 movups %xmm2,(%edi)
464 movups %xmm3,16(%edi)
465 movups %xmm4,32(%edi)
466 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700467 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800468.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700469L013ecb_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800470 movl %edx,%ebp
471 movl %ecx,%ebx
472 cmpl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700473 jb L022ecb_dec_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800474 movdqu (%esi),%xmm2
475 movdqu 16(%esi),%xmm3
476 movdqu 32(%esi),%xmm4
477 movdqu 48(%esi),%xmm5
478 movdqu 64(%esi),%xmm6
479 movdqu 80(%esi),%xmm7
480 leal 96(%esi),%esi
481 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700482 jmp L023ecb_dec_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800483.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700484L024ecb_dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800485 movups %xmm2,(%edi)
486 movdqu (%esi),%xmm2
487 movups %xmm3,16(%edi)
488 movdqu 16(%esi),%xmm3
489 movups %xmm4,32(%edi)
490 movdqu 32(%esi),%xmm4
491 movups %xmm5,48(%edi)
492 movdqu 48(%esi),%xmm5
493 movups %xmm6,64(%edi)
494 movdqu 64(%esi),%xmm6
495 movups %xmm7,80(%edi)
496 leal 96(%edi),%edi
497 movdqu 80(%esi),%xmm7
498 leal 96(%esi),%esi
Adam Langleye9ada862015-05-11 17:20:37 -0700499L023ecb_dec_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800500 call __aesni_decrypt6
501 movl %ebp,%edx
502 movl %ebx,%ecx
503 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700504 jnc L024ecb_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800505 movups %xmm2,(%edi)
506 movups %xmm3,16(%edi)
507 movups %xmm4,32(%edi)
508 movups %xmm5,48(%edi)
509 movups %xmm6,64(%edi)
510 movups %xmm7,80(%edi)
511 leal 96(%edi),%edi
512 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700513 jz L012ecb_ret
514L022ecb_dec_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800515 movups (%esi),%xmm2
516 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700517 jb L025ecb_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -0800518 movups 16(%esi),%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700519 je L026ecb_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -0800520 movups 32(%esi),%xmm4
521 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700522 jb L027ecb_dec_three
Adam Langleyd9e397b2015-01-22 14:27:53 -0800523 movups 48(%esi),%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -0700524 je L028ecb_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -0800525 movups 64(%esi),%xmm6
526 xorps %xmm7,%xmm7
527 call __aesni_decrypt6
528 movups %xmm2,(%edi)
529 movups %xmm3,16(%edi)
530 movups %xmm4,32(%edi)
531 movups %xmm5,48(%edi)
532 movups %xmm6,64(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700533 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800534.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700535L025ecb_dec_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800536 movups (%edx),%xmm0
537 movups 16(%edx),%xmm1
538 leal 32(%edx),%edx
539 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700540L029dec1_loop_4:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800541.byte 102,15,56,222,209
542 decl %ecx
543 movups (%edx),%xmm1
544 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700545 jnz L029dec1_loop_4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800546.byte 102,15,56,223,209
547 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700548 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800549.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700550L026ecb_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800551 call __aesni_decrypt2
552 movups %xmm2,(%edi)
553 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700554 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800555.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700556L027ecb_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800557 call __aesni_decrypt3
558 movups %xmm2,(%edi)
559 movups %xmm3,16(%edi)
560 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700561 jmp L012ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800562.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700563L028ecb_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800564 call __aesni_decrypt4
565 movups %xmm2,(%edi)
566 movups %xmm3,16(%edi)
567 movups %xmm4,32(%edi)
568 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700569L012ecb_ret:
570 pxor %xmm0,%xmm0
571 pxor %xmm1,%xmm1
572 pxor %xmm2,%xmm2
573 pxor %xmm3,%xmm3
574 pxor %xmm4,%xmm4
575 pxor %xmm5,%xmm5
576 pxor %xmm6,%xmm6
577 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800578 popl %edi
579 popl %esi
580 popl %ebx
581 popl %ebp
582 ret
583.globl _aesni_ccm64_encrypt_blocks
584.private_extern _aesni_ccm64_encrypt_blocks
585.align 4
586_aesni_ccm64_encrypt_blocks:
587L_aesni_ccm64_encrypt_blocks_begin:
588 pushl %ebp
589 pushl %ebx
590 pushl %esi
591 pushl %edi
592 movl 20(%esp),%esi
593 movl 24(%esp),%edi
594 movl 28(%esp),%eax
595 movl 32(%esp),%edx
596 movl 36(%esp),%ebx
597 movl 40(%esp),%ecx
598 movl %esp,%ebp
599 subl $60,%esp
600 andl $-16,%esp
601 movl %ebp,48(%esp)
602 movdqu (%ebx),%xmm7
603 movdqu (%ecx),%xmm3
604 movl 240(%edx),%ecx
605 movl $202182159,(%esp)
606 movl $134810123,4(%esp)
607 movl $67438087,8(%esp)
608 movl $66051,12(%esp)
609 movl $1,%ebx
610 xorl %ebp,%ebp
611 movl %ebx,16(%esp)
612 movl %ebp,20(%esp)
613 movl %ebp,24(%esp)
614 movl %ebp,28(%esp)
615 shll $4,%ecx
616 movl $16,%ebx
617 leal (%edx),%ebp
618 movdqa (%esp),%xmm5
619 movdqa %xmm7,%xmm2
620 leal 32(%edx,%ecx,1),%edx
621 subl %ecx,%ebx
622.byte 102,15,56,0,253
Adam Langleye9ada862015-05-11 17:20:37 -0700623L030ccm64_enc_outer:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800624 movups (%ebp),%xmm0
625 movl %ebx,%ecx
626 movups (%esi),%xmm6
627 xorps %xmm0,%xmm2
628 movups 16(%ebp),%xmm1
629 xorps %xmm6,%xmm0
630 xorps %xmm0,%xmm3
631 movups 32(%ebp),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700632L031ccm64_enc2_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800633.byte 102,15,56,220,209
634.byte 102,15,56,220,217
635 movups (%edx,%ecx,1),%xmm1
636 addl $32,%ecx
637.byte 102,15,56,220,208
638.byte 102,15,56,220,216
639 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700640 jnz L031ccm64_enc2_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800641.byte 102,15,56,220,209
642.byte 102,15,56,220,217
643 paddq 16(%esp),%xmm7
644 decl %eax
645.byte 102,15,56,221,208
646.byte 102,15,56,221,216
647 leal 16(%esi),%esi
648 xorps %xmm2,%xmm6
649 movdqa %xmm7,%xmm2
650 movups %xmm6,(%edi)
651.byte 102,15,56,0,213
652 leal 16(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -0700653 jnz L030ccm64_enc_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800654 movl 48(%esp),%esp
655 movl 40(%esp),%edi
656 movups %xmm3,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700657 pxor %xmm0,%xmm0
658 pxor %xmm1,%xmm1
659 pxor %xmm2,%xmm2
660 pxor %xmm3,%xmm3
661 pxor %xmm4,%xmm4
662 pxor %xmm5,%xmm5
663 pxor %xmm6,%xmm6
664 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800665 popl %edi
666 popl %esi
667 popl %ebx
668 popl %ebp
669 ret
670.globl _aesni_ccm64_decrypt_blocks
671.private_extern _aesni_ccm64_decrypt_blocks
672.align 4
673_aesni_ccm64_decrypt_blocks:
674L_aesni_ccm64_decrypt_blocks_begin:
675 pushl %ebp
676 pushl %ebx
677 pushl %esi
678 pushl %edi
679 movl 20(%esp),%esi
680 movl 24(%esp),%edi
681 movl 28(%esp),%eax
682 movl 32(%esp),%edx
683 movl 36(%esp),%ebx
684 movl 40(%esp),%ecx
685 movl %esp,%ebp
686 subl $60,%esp
687 andl $-16,%esp
688 movl %ebp,48(%esp)
689 movdqu (%ebx),%xmm7
690 movdqu (%ecx),%xmm3
691 movl 240(%edx),%ecx
692 movl $202182159,(%esp)
693 movl $134810123,4(%esp)
694 movl $67438087,8(%esp)
695 movl $66051,12(%esp)
696 movl $1,%ebx
697 xorl %ebp,%ebp
698 movl %ebx,16(%esp)
699 movl %ebp,20(%esp)
700 movl %ebp,24(%esp)
701 movl %ebp,28(%esp)
702 movdqa (%esp),%xmm5
703 movdqa %xmm7,%xmm2
704 movl %edx,%ebp
705 movl %ecx,%ebx
706.byte 102,15,56,0,253
707 movups (%edx),%xmm0
708 movups 16(%edx),%xmm1
709 leal 32(%edx),%edx
710 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700711L032enc1_loop_5:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800712.byte 102,15,56,220,209
713 decl %ecx
714 movups (%edx),%xmm1
715 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700716 jnz L032enc1_loop_5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800717.byte 102,15,56,221,209
718 shll $4,%ebx
719 movl $16,%ecx
720 movups (%esi),%xmm6
721 paddq 16(%esp),%xmm7
722 leal 16(%esi),%esi
723 subl %ebx,%ecx
724 leal 32(%ebp,%ebx,1),%edx
725 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -0700726 jmp L033ccm64_dec_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800727.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700728L033ccm64_dec_outer:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800729 xorps %xmm2,%xmm6
730 movdqa %xmm7,%xmm2
731 movups %xmm6,(%edi)
732 leal 16(%edi),%edi
733.byte 102,15,56,0,213
734 subl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700735 jz L034ccm64_dec_break
Adam Langleyd9e397b2015-01-22 14:27:53 -0800736 movups (%ebp),%xmm0
737 movl %ebx,%ecx
738 movups 16(%ebp),%xmm1
739 xorps %xmm0,%xmm6
740 xorps %xmm0,%xmm2
741 xorps %xmm6,%xmm3
742 movups 32(%ebp),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700743L035ccm64_dec2_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800744.byte 102,15,56,220,209
745.byte 102,15,56,220,217
746 movups (%edx,%ecx,1),%xmm1
747 addl $32,%ecx
748.byte 102,15,56,220,208
749.byte 102,15,56,220,216
750 movups -16(%edx,%ecx,1),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700751 jnz L035ccm64_dec2_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800752 movups (%esi),%xmm6
753 paddq 16(%esp),%xmm7
754.byte 102,15,56,220,209
755.byte 102,15,56,220,217
756.byte 102,15,56,221,208
757.byte 102,15,56,221,216
758 leal 16(%esi),%esi
Adam Langleye9ada862015-05-11 17:20:37 -0700759 jmp L033ccm64_dec_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800760.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700761L034ccm64_dec_break:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800762 movl 240(%ebp),%ecx
763 movl %ebp,%edx
764 movups (%edx),%xmm0
765 movups 16(%edx),%xmm1
766 xorps %xmm0,%xmm6
767 leal 32(%edx),%edx
768 xorps %xmm6,%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700769L036enc1_loop_6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800770.byte 102,15,56,220,217
771 decl %ecx
772 movups (%edx),%xmm1
773 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700774 jnz L036enc1_loop_6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800775.byte 102,15,56,221,217
776 movl 48(%esp),%esp
777 movl 40(%esp),%edi
778 movups %xmm3,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700779 pxor %xmm0,%xmm0
780 pxor %xmm1,%xmm1
781 pxor %xmm2,%xmm2
782 pxor %xmm3,%xmm3
783 pxor %xmm4,%xmm4
784 pxor %xmm5,%xmm5
785 pxor %xmm6,%xmm6
786 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800787 popl %edi
788 popl %esi
789 popl %ebx
790 popl %ebp
791 ret
792.globl _aesni_ctr32_encrypt_blocks
793.private_extern _aesni_ctr32_encrypt_blocks
794.align 4
795_aesni_ctr32_encrypt_blocks:
796L_aesni_ctr32_encrypt_blocks_begin:
797 pushl %ebp
798 pushl %ebx
799 pushl %esi
800 pushl %edi
801 movl 20(%esp),%esi
802 movl 24(%esp),%edi
803 movl 28(%esp),%eax
804 movl 32(%esp),%edx
805 movl 36(%esp),%ebx
806 movl %esp,%ebp
807 subl $88,%esp
808 andl $-16,%esp
809 movl %ebp,80(%esp)
810 cmpl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700811 je L037ctr32_one_shortcut
Adam Langleyd9e397b2015-01-22 14:27:53 -0800812 movdqu (%ebx),%xmm7
813 movl $202182159,(%esp)
814 movl $134810123,4(%esp)
815 movl $67438087,8(%esp)
816 movl $66051,12(%esp)
817 movl $6,%ecx
818 xorl %ebp,%ebp
819 movl %ecx,16(%esp)
820 movl %ecx,20(%esp)
821 movl %ecx,24(%esp)
822 movl %ebp,28(%esp)
823.byte 102,15,58,22,251,3
824.byte 102,15,58,34,253,3
825 movl 240(%edx),%ecx
826 bswap %ebx
827 pxor %xmm0,%xmm0
828 pxor %xmm1,%xmm1
829 movdqa (%esp),%xmm2
830.byte 102,15,58,34,195,0
831 leal 3(%ebx),%ebp
832.byte 102,15,58,34,205,0
833 incl %ebx
834.byte 102,15,58,34,195,1
835 incl %ebp
836.byte 102,15,58,34,205,1
837 incl %ebx
838.byte 102,15,58,34,195,2
839 incl %ebp
840.byte 102,15,58,34,205,2
841 movdqa %xmm0,48(%esp)
842.byte 102,15,56,0,194
843 movdqu (%edx),%xmm6
844 movdqa %xmm1,64(%esp)
845.byte 102,15,56,0,202
846 pshufd $192,%xmm0,%xmm2
847 pshufd $128,%xmm0,%xmm3
848 cmpl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700849 jb L038ctr32_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800850 pxor %xmm6,%xmm7
851 shll $4,%ecx
852 movl $16,%ebx
853 movdqa %xmm7,32(%esp)
854 movl %edx,%ebp
855 subl %ecx,%ebx
856 leal 32(%edx,%ecx,1),%edx
857 subl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700858 jmp L039ctr32_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800859.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700860L039ctr32_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800861 pshufd $64,%xmm0,%xmm4
862 movdqa 32(%esp),%xmm0
863 pshufd $192,%xmm1,%xmm5
864 pxor %xmm0,%xmm2
865 pshufd $128,%xmm1,%xmm6
866 pxor %xmm0,%xmm3
867 pshufd $64,%xmm1,%xmm7
868 movups 16(%ebp),%xmm1
869 pxor %xmm0,%xmm4
870 pxor %xmm0,%xmm5
871.byte 102,15,56,220,209
872 pxor %xmm0,%xmm6
873 pxor %xmm0,%xmm7
874.byte 102,15,56,220,217
875 movups 32(%ebp),%xmm0
876 movl %ebx,%ecx
877.byte 102,15,56,220,225
878.byte 102,15,56,220,233
879.byte 102,15,56,220,241
880.byte 102,15,56,220,249
881 call L_aesni_encrypt6_enter
882 movups (%esi),%xmm1
883 movups 16(%esi),%xmm0
884 xorps %xmm1,%xmm2
885 movups 32(%esi),%xmm1
886 xorps %xmm0,%xmm3
887 movups %xmm2,(%edi)
888 movdqa 16(%esp),%xmm0
889 xorps %xmm1,%xmm4
890 movdqa 64(%esp),%xmm1
891 movups %xmm3,16(%edi)
892 movups %xmm4,32(%edi)
893 paddd %xmm0,%xmm1
894 paddd 48(%esp),%xmm0
895 movdqa (%esp),%xmm2
896 movups 48(%esi),%xmm3
897 movups 64(%esi),%xmm4
898 xorps %xmm3,%xmm5
899 movups 80(%esi),%xmm3
900 leal 96(%esi),%esi
901 movdqa %xmm0,48(%esp)
902.byte 102,15,56,0,194
903 xorps %xmm4,%xmm6
904 movups %xmm5,48(%edi)
905 xorps %xmm3,%xmm7
906 movdqa %xmm1,64(%esp)
907.byte 102,15,56,0,202
908 movups %xmm6,64(%edi)
909 pshufd $192,%xmm0,%xmm2
910 movups %xmm7,80(%edi)
911 leal 96(%edi),%edi
912 pshufd $128,%xmm0,%xmm3
913 subl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700914 jnc L039ctr32_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800915 addl $6,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700916 jz L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800917 movdqu (%ebp),%xmm7
918 movl %ebp,%edx
919 pxor 32(%esp),%xmm7
920 movl 240(%ebp),%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700921L038ctr32_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800922 por %xmm7,%xmm2
923 cmpl $2,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700924 jb L041ctr32_one
Adam Langleyd9e397b2015-01-22 14:27:53 -0800925 pshufd $64,%xmm0,%xmm4
926 por %xmm7,%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -0700927 je L042ctr32_two
Adam Langleyd9e397b2015-01-22 14:27:53 -0800928 pshufd $192,%xmm1,%xmm5
929 por %xmm7,%xmm4
930 cmpl $4,%eax
Adam Langleye9ada862015-05-11 17:20:37 -0700931 jb L043ctr32_three
Adam Langleyd9e397b2015-01-22 14:27:53 -0800932 pshufd $128,%xmm1,%xmm6
933 por %xmm7,%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -0700934 je L044ctr32_four
Adam Langleyd9e397b2015-01-22 14:27:53 -0800935 por %xmm7,%xmm6
936 call __aesni_encrypt6
937 movups (%esi),%xmm1
938 movups 16(%esi),%xmm0
939 xorps %xmm1,%xmm2
940 movups 32(%esi),%xmm1
941 xorps %xmm0,%xmm3
942 movups 48(%esi),%xmm0
943 xorps %xmm1,%xmm4
944 movups 64(%esi),%xmm1
945 xorps %xmm0,%xmm5
946 movups %xmm2,(%edi)
947 xorps %xmm1,%xmm6
948 movups %xmm3,16(%edi)
949 movups %xmm4,32(%edi)
950 movups %xmm5,48(%edi)
951 movups %xmm6,64(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700952 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800953.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700954L037ctr32_one_shortcut:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800955 movups (%ebx),%xmm2
956 movl 240(%edx),%ecx
Adam Langleye9ada862015-05-11 17:20:37 -0700957L041ctr32_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800958 movups (%edx),%xmm0
959 movups 16(%edx),%xmm1
960 leal 32(%edx),%edx
961 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700962L045enc1_loop_7:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800963.byte 102,15,56,220,209
964 decl %ecx
965 movups (%edx),%xmm1
966 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -0700967 jnz L045enc1_loop_7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800968.byte 102,15,56,221,209
969 movups (%esi),%xmm6
970 xorps %xmm2,%xmm6
971 movups %xmm6,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700972 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800973.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700974L042ctr32_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800975 call __aesni_encrypt2
976 movups (%esi),%xmm5
977 movups 16(%esi),%xmm6
978 xorps %xmm5,%xmm2
979 xorps %xmm6,%xmm3
980 movups %xmm2,(%edi)
981 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700982 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800983.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700984L043ctr32_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800985 call __aesni_encrypt3
986 movups (%esi),%xmm5
987 movups 16(%esi),%xmm6
988 xorps %xmm5,%xmm2
989 movups 32(%esi),%xmm7
990 xorps %xmm6,%xmm3
991 movups %xmm2,(%edi)
992 xorps %xmm7,%xmm4
993 movups %xmm3,16(%edi)
994 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -0700995 jmp L040ctr32_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800996.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -0700997L044ctr32_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800998 call __aesni_encrypt4
999 movups (%esi),%xmm6
1000 movups 16(%esi),%xmm7
1001 movups 32(%esi),%xmm1
1002 xorps %xmm6,%xmm2
1003 movups 48(%esi),%xmm0
1004 xorps %xmm7,%xmm3
1005 movups %xmm2,(%edi)
1006 xorps %xmm1,%xmm4
1007 movups %xmm3,16(%edi)
1008 xorps %xmm0,%xmm5
1009 movups %xmm4,32(%edi)
1010 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001011L040ctr32_ret:
1012 pxor %xmm0,%xmm0
1013 pxor %xmm1,%xmm1
1014 pxor %xmm2,%xmm2
1015 pxor %xmm3,%xmm3
1016 pxor %xmm4,%xmm4
1017 movdqa %xmm0,32(%esp)
1018 pxor %xmm5,%xmm5
1019 movdqa %xmm0,48(%esp)
1020 pxor %xmm6,%xmm6
1021 movdqa %xmm0,64(%esp)
1022 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08001023 movl 80(%esp),%esp
1024 popl %edi
1025 popl %esi
1026 popl %ebx
1027 popl %ebp
1028 ret
1029.globl _aesni_xts_encrypt
1030.private_extern _aesni_xts_encrypt
1031.align 4
1032_aesni_xts_encrypt:
1033L_aesni_xts_encrypt_begin:
1034 pushl %ebp
1035 pushl %ebx
1036 pushl %esi
1037 pushl %edi
1038 movl 36(%esp),%edx
1039 movl 40(%esp),%esi
1040 movl 240(%edx),%ecx
1041 movups (%esi),%xmm2
1042 movups (%edx),%xmm0
1043 movups 16(%edx),%xmm1
1044 leal 32(%edx),%edx
1045 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001046L046enc1_loop_8:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001047.byte 102,15,56,220,209
1048 decl %ecx
1049 movups (%edx),%xmm1
1050 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001051 jnz L046enc1_loop_8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001052.byte 102,15,56,221,209
1053 movl 20(%esp),%esi
1054 movl 24(%esp),%edi
1055 movl 28(%esp),%eax
1056 movl 32(%esp),%edx
1057 movl %esp,%ebp
1058 subl $120,%esp
1059 movl 240(%edx),%ecx
1060 andl $-16,%esp
1061 movl $135,96(%esp)
1062 movl $0,100(%esp)
1063 movl $1,104(%esp)
1064 movl $0,108(%esp)
1065 movl %eax,112(%esp)
1066 movl %ebp,116(%esp)
1067 movdqa %xmm2,%xmm1
1068 pxor %xmm0,%xmm0
1069 movdqa 96(%esp),%xmm3
1070 pcmpgtd %xmm1,%xmm0
1071 andl $-16,%eax
1072 movl %edx,%ebp
1073 movl %ecx,%ebx
1074 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001075 jc L047xts_enc_short
Adam Langleyd9e397b2015-01-22 14:27:53 -08001076 shll $4,%ecx
1077 movl $16,%ebx
1078 subl %ecx,%ebx
1079 leal 32(%edx,%ecx,1),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001080 jmp L048xts_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001081.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001082L048xts_enc_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001083 pshufd $19,%xmm0,%xmm2
1084 pxor %xmm0,%xmm0
1085 movdqa %xmm1,(%esp)
1086 paddq %xmm1,%xmm1
1087 pand %xmm3,%xmm2
1088 pcmpgtd %xmm1,%xmm0
1089 pxor %xmm2,%xmm1
1090 pshufd $19,%xmm0,%xmm2
1091 pxor %xmm0,%xmm0
1092 movdqa %xmm1,16(%esp)
1093 paddq %xmm1,%xmm1
1094 pand %xmm3,%xmm2
1095 pcmpgtd %xmm1,%xmm0
1096 pxor %xmm2,%xmm1
1097 pshufd $19,%xmm0,%xmm2
1098 pxor %xmm0,%xmm0
1099 movdqa %xmm1,32(%esp)
1100 paddq %xmm1,%xmm1
1101 pand %xmm3,%xmm2
1102 pcmpgtd %xmm1,%xmm0
1103 pxor %xmm2,%xmm1
1104 pshufd $19,%xmm0,%xmm2
1105 pxor %xmm0,%xmm0
1106 movdqa %xmm1,48(%esp)
1107 paddq %xmm1,%xmm1
1108 pand %xmm3,%xmm2
1109 pcmpgtd %xmm1,%xmm0
1110 pxor %xmm2,%xmm1
1111 pshufd $19,%xmm0,%xmm7
1112 movdqa %xmm1,64(%esp)
1113 paddq %xmm1,%xmm1
1114 movups (%ebp),%xmm0
1115 pand %xmm3,%xmm7
1116 movups (%esi),%xmm2
1117 pxor %xmm1,%xmm7
1118 movl %ebx,%ecx
1119 movdqu 16(%esi),%xmm3
1120 xorps %xmm0,%xmm2
1121 movdqu 32(%esi),%xmm4
1122 pxor %xmm0,%xmm3
1123 movdqu 48(%esi),%xmm5
1124 pxor %xmm0,%xmm4
1125 movdqu 64(%esi),%xmm6
1126 pxor %xmm0,%xmm5
1127 movdqu 80(%esi),%xmm1
1128 pxor %xmm0,%xmm6
1129 leal 96(%esi),%esi
1130 pxor (%esp),%xmm2
1131 movdqa %xmm7,80(%esp)
1132 pxor %xmm1,%xmm7
1133 movups 16(%ebp),%xmm1
1134 pxor 16(%esp),%xmm3
1135 pxor 32(%esp),%xmm4
1136.byte 102,15,56,220,209
1137 pxor 48(%esp),%xmm5
1138 pxor 64(%esp),%xmm6
1139.byte 102,15,56,220,217
1140 pxor %xmm0,%xmm7
1141 movups 32(%ebp),%xmm0
1142.byte 102,15,56,220,225
1143.byte 102,15,56,220,233
1144.byte 102,15,56,220,241
1145.byte 102,15,56,220,249
1146 call L_aesni_encrypt6_enter
1147 movdqa 80(%esp),%xmm1
1148 pxor %xmm0,%xmm0
1149 xorps (%esp),%xmm2
1150 pcmpgtd %xmm1,%xmm0
1151 xorps 16(%esp),%xmm3
1152 movups %xmm2,(%edi)
1153 xorps 32(%esp),%xmm4
1154 movups %xmm3,16(%edi)
1155 xorps 48(%esp),%xmm5
1156 movups %xmm4,32(%edi)
1157 xorps 64(%esp),%xmm6
1158 movups %xmm5,48(%edi)
1159 xorps %xmm1,%xmm7
1160 movups %xmm6,64(%edi)
1161 pshufd $19,%xmm0,%xmm2
1162 movups %xmm7,80(%edi)
1163 leal 96(%edi),%edi
1164 movdqa 96(%esp),%xmm3
1165 pxor %xmm0,%xmm0
1166 paddq %xmm1,%xmm1
1167 pand %xmm3,%xmm2
1168 pcmpgtd %xmm1,%xmm0
1169 pxor %xmm2,%xmm1
1170 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001171 jnc L048xts_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001172 movl 240(%ebp),%ecx
1173 movl %ebp,%edx
1174 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -07001175L047xts_enc_short:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001176 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001177 jz L049xts_enc_done6x
Adam Langleyd9e397b2015-01-22 14:27:53 -08001178 movdqa %xmm1,%xmm5
1179 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001180 jb L050xts_enc_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08001181 pshufd $19,%xmm0,%xmm2
1182 pxor %xmm0,%xmm0
1183 paddq %xmm1,%xmm1
1184 pand %xmm3,%xmm2
1185 pcmpgtd %xmm1,%xmm0
1186 pxor %xmm2,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001187 je L051xts_enc_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08001188 pshufd $19,%xmm0,%xmm2
1189 pxor %xmm0,%xmm0
1190 movdqa %xmm1,%xmm6
1191 paddq %xmm1,%xmm1
1192 pand %xmm3,%xmm2
1193 pcmpgtd %xmm1,%xmm0
1194 pxor %xmm2,%xmm1
1195 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001196 jb L052xts_enc_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08001197 pshufd $19,%xmm0,%xmm2
1198 pxor %xmm0,%xmm0
1199 movdqa %xmm1,%xmm7
1200 paddq %xmm1,%xmm1
1201 pand %xmm3,%xmm2
1202 pcmpgtd %xmm1,%xmm0
1203 pxor %xmm2,%xmm1
1204 movdqa %xmm5,(%esp)
1205 movdqa %xmm6,16(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001206 je L053xts_enc_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08001207 movdqa %xmm7,32(%esp)
1208 pshufd $19,%xmm0,%xmm7
1209 movdqa %xmm1,48(%esp)
1210 paddq %xmm1,%xmm1
1211 pand %xmm3,%xmm7
1212 pxor %xmm1,%xmm7
1213 movdqu (%esi),%xmm2
1214 movdqu 16(%esi),%xmm3
1215 movdqu 32(%esi),%xmm4
1216 pxor (%esp),%xmm2
1217 movdqu 48(%esi),%xmm5
1218 pxor 16(%esp),%xmm3
1219 movdqu 64(%esi),%xmm6
1220 pxor 32(%esp),%xmm4
1221 leal 80(%esi),%esi
1222 pxor 48(%esp),%xmm5
1223 movdqa %xmm7,64(%esp)
1224 pxor %xmm7,%xmm6
1225 call __aesni_encrypt6
1226 movaps 64(%esp),%xmm1
1227 xorps (%esp),%xmm2
1228 xorps 16(%esp),%xmm3
1229 xorps 32(%esp),%xmm4
1230 movups %xmm2,(%edi)
1231 xorps 48(%esp),%xmm5
1232 movups %xmm3,16(%edi)
1233 xorps %xmm1,%xmm6
1234 movups %xmm4,32(%edi)
1235 movups %xmm5,48(%edi)
1236 movups %xmm6,64(%edi)
1237 leal 80(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001238 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001239.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001240L050xts_enc_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001241 movups (%esi),%xmm2
1242 leal 16(%esi),%esi
1243 xorps %xmm5,%xmm2
1244 movups (%edx),%xmm0
1245 movups 16(%edx),%xmm1
1246 leal 32(%edx),%edx
1247 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001248L055enc1_loop_9:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001249.byte 102,15,56,220,209
1250 decl %ecx
1251 movups (%edx),%xmm1
1252 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001253 jnz L055enc1_loop_9
Adam Langleyd9e397b2015-01-22 14:27:53 -08001254.byte 102,15,56,221,209
1255 xorps %xmm5,%xmm2
1256 movups %xmm2,(%edi)
1257 leal 16(%edi),%edi
1258 movdqa %xmm5,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001259 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001260.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001261L051xts_enc_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001262 movaps %xmm1,%xmm6
1263 movups (%esi),%xmm2
1264 movups 16(%esi),%xmm3
1265 leal 32(%esi),%esi
1266 xorps %xmm5,%xmm2
1267 xorps %xmm6,%xmm3
1268 call __aesni_encrypt2
1269 xorps %xmm5,%xmm2
1270 xorps %xmm6,%xmm3
1271 movups %xmm2,(%edi)
1272 movups %xmm3,16(%edi)
1273 leal 32(%edi),%edi
1274 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001275 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001276.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001277L052xts_enc_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001278 movaps %xmm1,%xmm7
1279 movups (%esi),%xmm2
1280 movups 16(%esi),%xmm3
1281 movups 32(%esi),%xmm4
1282 leal 48(%esi),%esi
1283 xorps %xmm5,%xmm2
1284 xorps %xmm6,%xmm3
1285 xorps %xmm7,%xmm4
1286 call __aesni_encrypt3
1287 xorps %xmm5,%xmm2
1288 xorps %xmm6,%xmm3
1289 xorps %xmm7,%xmm4
1290 movups %xmm2,(%edi)
1291 movups %xmm3,16(%edi)
1292 movups %xmm4,32(%edi)
1293 leal 48(%edi),%edi
1294 movdqa %xmm7,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001295 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001296.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001297L053xts_enc_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001298 movaps %xmm1,%xmm6
1299 movups (%esi),%xmm2
1300 movups 16(%esi),%xmm3
1301 movups 32(%esi),%xmm4
1302 xorps (%esp),%xmm2
1303 movups 48(%esi),%xmm5
1304 leal 64(%esi),%esi
1305 xorps 16(%esp),%xmm3
1306 xorps %xmm7,%xmm4
1307 xorps %xmm6,%xmm5
1308 call __aesni_encrypt4
1309 xorps (%esp),%xmm2
1310 xorps 16(%esp),%xmm3
1311 xorps %xmm7,%xmm4
1312 movups %xmm2,(%edi)
1313 xorps %xmm6,%xmm5
1314 movups %xmm3,16(%edi)
1315 movups %xmm4,32(%edi)
1316 movups %xmm5,48(%edi)
1317 leal 64(%edi),%edi
1318 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001319 jmp L054xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001320.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001321L049xts_enc_done6x:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001322 movl 112(%esp),%eax
1323 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001324 jz L056xts_enc_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001325 movdqa %xmm1,%xmm5
1326 movl %eax,112(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001327 jmp L057xts_enc_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001328.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001329L054xts_enc_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001330 movl 112(%esp),%eax
1331 pxor %xmm0,%xmm0
1332 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001333 jz L056xts_enc_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001334 pcmpgtd %xmm1,%xmm0
1335 movl %eax,112(%esp)
1336 pshufd $19,%xmm0,%xmm5
1337 paddq %xmm1,%xmm1
1338 pand 96(%esp),%xmm5
1339 pxor %xmm1,%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07001340L057xts_enc_steal:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001341 movzbl (%esi),%ecx
1342 movzbl -16(%edi),%edx
1343 leal 1(%esi),%esi
1344 movb %cl,-16(%edi)
1345 movb %dl,(%edi)
1346 leal 1(%edi),%edi
1347 subl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001348 jnz L057xts_enc_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001349 subl 112(%esp),%edi
1350 movl %ebp,%edx
1351 movl %ebx,%ecx
1352 movups -16(%edi),%xmm2
1353 xorps %xmm5,%xmm2
1354 movups (%edx),%xmm0
1355 movups 16(%edx),%xmm1
1356 leal 32(%edx),%edx
1357 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001358L058enc1_loop_10:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001359.byte 102,15,56,220,209
1360 decl %ecx
1361 movups (%edx),%xmm1
1362 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001363 jnz L058enc1_loop_10
Adam Langleyd9e397b2015-01-22 14:27:53 -08001364.byte 102,15,56,221,209
1365 xorps %xmm5,%xmm2
1366 movups %xmm2,-16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001367L056xts_enc_ret:
1368 pxor %xmm0,%xmm0
1369 pxor %xmm1,%xmm1
1370 pxor %xmm2,%xmm2
1371 movdqa %xmm0,(%esp)
1372 pxor %xmm3,%xmm3
1373 movdqa %xmm0,16(%esp)
1374 pxor %xmm4,%xmm4
1375 movdqa %xmm0,32(%esp)
1376 pxor %xmm5,%xmm5
1377 movdqa %xmm0,48(%esp)
1378 pxor %xmm6,%xmm6
1379 movdqa %xmm0,64(%esp)
1380 pxor %xmm7,%xmm7
1381 movdqa %xmm0,80(%esp)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001382 movl 116(%esp),%esp
1383 popl %edi
1384 popl %esi
1385 popl %ebx
1386 popl %ebp
1387 ret
1388.globl _aesni_xts_decrypt
1389.private_extern _aesni_xts_decrypt
1390.align 4
1391_aesni_xts_decrypt:
1392L_aesni_xts_decrypt_begin:
1393 pushl %ebp
1394 pushl %ebx
1395 pushl %esi
1396 pushl %edi
1397 movl 36(%esp),%edx
1398 movl 40(%esp),%esi
1399 movl 240(%edx),%ecx
1400 movups (%esi),%xmm2
1401 movups (%edx),%xmm0
1402 movups 16(%edx),%xmm1
1403 leal 32(%edx),%edx
1404 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001405L059enc1_loop_11:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001406.byte 102,15,56,220,209
1407 decl %ecx
1408 movups (%edx),%xmm1
1409 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001410 jnz L059enc1_loop_11
Adam Langleyd9e397b2015-01-22 14:27:53 -08001411.byte 102,15,56,221,209
1412 movl 20(%esp),%esi
1413 movl 24(%esp),%edi
1414 movl 28(%esp),%eax
1415 movl 32(%esp),%edx
1416 movl %esp,%ebp
1417 subl $120,%esp
1418 andl $-16,%esp
1419 xorl %ebx,%ebx
1420 testl $15,%eax
1421 setnz %bl
1422 shll $4,%ebx
1423 subl %ebx,%eax
1424 movl $135,96(%esp)
1425 movl $0,100(%esp)
1426 movl $1,104(%esp)
1427 movl $0,108(%esp)
1428 movl %eax,112(%esp)
1429 movl %ebp,116(%esp)
1430 movl 240(%edx),%ecx
1431 movl %edx,%ebp
1432 movl %ecx,%ebx
1433 movdqa %xmm2,%xmm1
1434 pxor %xmm0,%xmm0
1435 movdqa 96(%esp),%xmm3
1436 pcmpgtd %xmm1,%xmm0
1437 andl $-16,%eax
1438 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001439 jc L060xts_dec_short
Adam Langleyd9e397b2015-01-22 14:27:53 -08001440 shll $4,%ecx
1441 movl $16,%ebx
1442 subl %ecx,%ebx
1443 leal 32(%edx,%ecx,1),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001444 jmp L061xts_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001445.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001446L061xts_dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001447 pshufd $19,%xmm0,%xmm2
1448 pxor %xmm0,%xmm0
1449 movdqa %xmm1,(%esp)
1450 paddq %xmm1,%xmm1
1451 pand %xmm3,%xmm2
1452 pcmpgtd %xmm1,%xmm0
1453 pxor %xmm2,%xmm1
1454 pshufd $19,%xmm0,%xmm2
1455 pxor %xmm0,%xmm0
1456 movdqa %xmm1,16(%esp)
1457 paddq %xmm1,%xmm1
1458 pand %xmm3,%xmm2
1459 pcmpgtd %xmm1,%xmm0
1460 pxor %xmm2,%xmm1
1461 pshufd $19,%xmm0,%xmm2
1462 pxor %xmm0,%xmm0
1463 movdqa %xmm1,32(%esp)
1464 paddq %xmm1,%xmm1
1465 pand %xmm3,%xmm2
1466 pcmpgtd %xmm1,%xmm0
1467 pxor %xmm2,%xmm1
1468 pshufd $19,%xmm0,%xmm2
1469 pxor %xmm0,%xmm0
1470 movdqa %xmm1,48(%esp)
1471 paddq %xmm1,%xmm1
1472 pand %xmm3,%xmm2
1473 pcmpgtd %xmm1,%xmm0
1474 pxor %xmm2,%xmm1
1475 pshufd $19,%xmm0,%xmm7
1476 movdqa %xmm1,64(%esp)
1477 paddq %xmm1,%xmm1
1478 movups (%ebp),%xmm0
1479 pand %xmm3,%xmm7
1480 movups (%esi),%xmm2
1481 pxor %xmm1,%xmm7
1482 movl %ebx,%ecx
1483 movdqu 16(%esi),%xmm3
1484 xorps %xmm0,%xmm2
1485 movdqu 32(%esi),%xmm4
1486 pxor %xmm0,%xmm3
1487 movdqu 48(%esi),%xmm5
1488 pxor %xmm0,%xmm4
1489 movdqu 64(%esi),%xmm6
1490 pxor %xmm0,%xmm5
1491 movdqu 80(%esi),%xmm1
1492 pxor %xmm0,%xmm6
1493 leal 96(%esi),%esi
1494 pxor (%esp),%xmm2
1495 movdqa %xmm7,80(%esp)
1496 pxor %xmm1,%xmm7
1497 movups 16(%ebp),%xmm1
1498 pxor 16(%esp),%xmm3
1499 pxor 32(%esp),%xmm4
1500.byte 102,15,56,222,209
1501 pxor 48(%esp),%xmm5
1502 pxor 64(%esp),%xmm6
1503.byte 102,15,56,222,217
1504 pxor %xmm0,%xmm7
1505 movups 32(%ebp),%xmm0
1506.byte 102,15,56,222,225
1507.byte 102,15,56,222,233
1508.byte 102,15,56,222,241
1509.byte 102,15,56,222,249
1510 call L_aesni_decrypt6_enter
1511 movdqa 80(%esp),%xmm1
1512 pxor %xmm0,%xmm0
1513 xorps (%esp),%xmm2
1514 pcmpgtd %xmm1,%xmm0
1515 xorps 16(%esp),%xmm3
1516 movups %xmm2,(%edi)
1517 xorps 32(%esp),%xmm4
1518 movups %xmm3,16(%edi)
1519 xorps 48(%esp),%xmm5
1520 movups %xmm4,32(%edi)
1521 xorps 64(%esp),%xmm6
1522 movups %xmm5,48(%edi)
1523 xorps %xmm1,%xmm7
1524 movups %xmm6,64(%edi)
1525 pshufd $19,%xmm0,%xmm2
1526 movups %xmm7,80(%edi)
1527 leal 96(%edi),%edi
1528 movdqa 96(%esp),%xmm3
1529 pxor %xmm0,%xmm0
1530 paddq %xmm1,%xmm1
1531 pand %xmm3,%xmm2
1532 pcmpgtd %xmm1,%xmm0
1533 pxor %xmm2,%xmm1
1534 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001535 jnc L061xts_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001536 movl 240(%ebp),%ecx
1537 movl %ebp,%edx
1538 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -07001539L060xts_dec_short:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001540 addl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001541 jz L062xts_dec_done6x
Adam Langleyd9e397b2015-01-22 14:27:53 -08001542 movdqa %xmm1,%xmm5
1543 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001544 jb L063xts_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08001545 pshufd $19,%xmm0,%xmm2
1546 pxor %xmm0,%xmm0
1547 paddq %xmm1,%xmm1
1548 pand %xmm3,%xmm2
1549 pcmpgtd %xmm1,%xmm0
1550 pxor %xmm2,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001551 je L064xts_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08001552 pshufd $19,%xmm0,%xmm2
1553 pxor %xmm0,%xmm0
1554 movdqa %xmm1,%xmm6
1555 paddq %xmm1,%xmm1
1556 pand %xmm3,%xmm2
1557 pcmpgtd %xmm1,%xmm0
1558 pxor %xmm2,%xmm1
1559 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001560 jb L065xts_dec_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08001561 pshufd $19,%xmm0,%xmm2
1562 pxor %xmm0,%xmm0
1563 movdqa %xmm1,%xmm7
1564 paddq %xmm1,%xmm1
1565 pand %xmm3,%xmm2
1566 pcmpgtd %xmm1,%xmm0
1567 pxor %xmm2,%xmm1
1568 movdqa %xmm5,(%esp)
1569 movdqa %xmm6,16(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001570 je L066xts_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08001571 movdqa %xmm7,32(%esp)
1572 pshufd $19,%xmm0,%xmm7
1573 movdqa %xmm1,48(%esp)
1574 paddq %xmm1,%xmm1
1575 pand %xmm3,%xmm7
1576 pxor %xmm1,%xmm7
1577 movdqu (%esi),%xmm2
1578 movdqu 16(%esi),%xmm3
1579 movdqu 32(%esi),%xmm4
1580 pxor (%esp),%xmm2
1581 movdqu 48(%esi),%xmm5
1582 pxor 16(%esp),%xmm3
1583 movdqu 64(%esi),%xmm6
1584 pxor 32(%esp),%xmm4
1585 leal 80(%esi),%esi
1586 pxor 48(%esp),%xmm5
1587 movdqa %xmm7,64(%esp)
1588 pxor %xmm7,%xmm6
1589 call __aesni_decrypt6
1590 movaps 64(%esp),%xmm1
1591 xorps (%esp),%xmm2
1592 xorps 16(%esp),%xmm3
1593 xorps 32(%esp),%xmm4
1594 movups %xmm2,(%edi)
1595 xorps 48(%esp),%xmm5
1596 movups %xmm3,16(%edi)
1597 xorps %xmm1,%xmm6
1598 movups %xmm4,32(%edi)
1599 movups %xmm5,48(%edi)
1600 movups %xmm6,64(%edi)
1601 leal 80(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001602 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001603.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001604L063xts_dec_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001605 movups (%esi),%xmm2
1606 leal 16(%esi),%esi
1607 xorps %xmm5,%xmm2
1608 movups (%edx),%xmm0
1609 movups 16(%edx),%xmm1
1610 leal 32(%edx),%edx
1611 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001612L068dec1_loop_12:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001613.byte 102,15,56,222,209
1614 decl %ecx
1615 movups (%edx),%xmm1
1616 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001617 jnz L068dec1_loop_12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001618.byte 102,15,56,223,209
1619 xorps %xmm5,%xmm2
1620 movups %xmm2,(%edi)
1621 leal 16(%edi),%edi
1622 movdqa %xmm5,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001623 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001624.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001625L064xts_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001626 movaps %xmm1,%xmm6
1627 movups (%esi),%xmm2
1628 movups 16(%esi),%xmm3
1629 leal 32(%esi),%esi
1630 xorps %xmm5,%xmm2
1631 xorps %xmm6,%xmm3
1632 call __aesni_decrypt2
1633 xorps %xmm5,%xmm2
1634 xorps %xmm6,%xmm3
1635 movups %xmm2,(%edi)
1636 movups %xmm3,16(%edi)
1637 leal 32(%edi),%edi
1638 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001639 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001640.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001641L065xts_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001642 movaps %xmm1,%xmm7
1643 movups (%esi),%xmm2
1644 movups 16(%esi),%xmm3
1645 movups 32(%esi),%xmm4
1646 leal 48(%esi),%esi
1647 xorps %xmm5,%xmm2
1648 xorps %xmm6,%xmm3
1649 xorps %xmm7,%xmm4
1650 call __aesni_decrypt3
1651 xorps %xmm5,%xmm2
1652 xorps %xmm6,%xmm3
1653 xorps %xmm7,%xmm4
1654 movups %xmm2,(%edi)
1655 movups %xmm3,16(%edi)
1656 movups %xmm4,32(%edi)
1657 leal 48(%edi),%edi
1658 movdqa %xmm7,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001659 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001660.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001661L066xts_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001662 movaps %xmm1,%xmm6
1663 movups (%esi),%xmm2
1664 movups 16(%esi),%xmm3
1665 movups 32(%esi),%xmm4
1666 xorps (%esp),%xmm2
1667 movups 48(%esi),%xmm5
1668 leal 64(%esi),%esi
1669 xorps 16(%esp),%xmm3
1670 xorps %xmm7,%xmm4
1671 xorps %xmm6,%xmm5
1672 call __aesni_decrypt4
1673 xorps (%esp),%xmm2
1674 xorps 16(%esp),%xmm3
1675 xorps %xmm7,%xmm4
1676 movups %xmm2,(%edi)
1677 xorps %xmm6,%xmm5
1678 movups %xmm3,16(%edi)
1679 movups %xmm4,32(%edi)
1680 movups %xmm5,48(%edi)
1681 leal 64(%edi),%edi
1682 movdqa %xmm6,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001683 jmp L067xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001684.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001685L062xts_dec_done6x:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001686 movl 112(%esp),%eax
1687 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001688 jz L069xts_dec_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001689 movl %eax,112(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07001690 jmp L070xts_dec_only_one_more
Adam Langleyd9e397b2015-01-22 14:27:53 -08001691.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001692L067xts_dec_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001693 movl 112(%esp),%eax
1694 pxor %xmm0,%xmm0
1695 andl $15,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001696 jz L069xts_dec_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001697 pcmpgtd %xmm1,%xmm0
1698 movl %eax,112(%esp)
1699 pshufd $19,%xmm0,%xmm2
1700 pxor %xmm0,%xmm0
1701 movdqa 96(%esp),%xmm3
1702 paddq %xmm1,%xmm1
1703 pand %xmm3,%xmm2
1704 pcmpgtd %xmm1,%xmm0
1705 pxor %xmm2,%xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07001706L070xts_dec_only_one_more:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001707 pshufd $19,%xmm0,%xmm5
1708 movdqa %xmm1,%xmm6
1709 paddq %xmm1,%xmm1
1710 pand %xmm3,%xmm5
1711 pxor %xmm1,%xmm5
1712 movl %ebp,%edx
1713 movl %ebx,%ecx
1714 movups (%esi),%xmm2
1715 xorps %xmm5,%xmm2
1716 movups (%edx),%xmm0
1717 movups 16(%edx),%xmm1
1718 leal 32(%edx),%edx
1719 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001720L071dec1_loop_13:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001721.byte 102,15,56,222,209
1722 decl %ecx
1723 movups (%edx),%xmm1
1724 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001725 jnz L071dec1_loop_13
Adam Langleyd9e397b2015-01-22 14:27:53 -08001726.byte 102,15,56,223,209
1727 xorps %xmm5,%xmm2
1728 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001729L072xts_dec_steal:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001730 movzbl 16(%esi),%ecx
1731 movzbl (%edi),%edx
1732 leal 1(%esi),%esi
1733 movb %cl,(%edi)
1734 movb %dl,16(%edi)
1735 leal 1(%edi),%edi
1736 subl $1,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001737 jnz L072xts_dec_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08001738 subl 112(%esp),%edi
1739 movl %ebp,%edx
1740 movl %ebx,%ecx
1741 movups (%edi),%xmm2
1742 xorps %xmm6,%xmm2
1743 movups (%edx),%xmm0
1744 movups 16(%edx),%xmm1
1745 leal 32(%edx),%edx
1746 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001747L073dec1_loop_14:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001748.byte 102,15,56,222,209
1749 decl %ecx
1750 movups (%edx),%xmm1
1751 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001752 jnz L073dec1_loop_14
Adam Langleyd9e397b2015-01-22 14:27:53 -08001753.byte 102,15,56,223,209
1754 xorps %xmm6,%xmm2
1755 movups %xmm2,(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001756L069xts_dec_ret:
1757 pxor %xmm0,%xmm0
1758 pxor %xmm1,%xmm1
1759 pxor %xmm2,%xmm2
1760 movdqa %xmm0,(%esp)
1761 pxor %xmm3,%xmm3
1762 movdqa %xmm0,16(%esp)
1763 pxor %xmm4,%xmm4
1764 movdqa %xmm0,32(%esp)
1765 pxor %xmm5,%xmm5
1766 movdqa %xmm0,48(%esp)
1767 pxor %xmm6,%xmm6
1768 movdqa %xmm0,64(%esp)
1769 pxor %xmm7,%xmm7
1770 movdqa %xmm0,80(%esp)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001771 movl 116(%esp),%esp
1772 popl %edi
1773 popl %esi
1774 popl %ebx
1775 popl %ebp
1776 ret
1777.globl _aesni_cbc_encrypt
1778.private_extern _aesni_cbc_encrypt
1779.align 4
1780_aesni_cbc_encrypt:
1781L_aesni_cbc_encrypt_begin:
1782 pushl %ebp
1783 pushl %ebx
1784 pushl %esi
1785 pushl %edi
1786 movl 20(%esp),%esi
1787 movl %esp,%ebx
1788 movl 24(%esp),%edi
1789 subl $24,%ebx
1790 movl 28(%esp),%eax
1791 andl $-16,%ebx
1792 movl 32(%esp),%edx
1793 movl 36(%esp),%ebp
1794 testl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001795 jz L074cbc_abort
Adam Langleyd9e397b2015-01-22 14:27:53 -08001796 cmpl $0,40(%esp)
1797 xchgl %esp,%ebx
1798 movups (%ebp),%xmm7
1799 movl 240(%edx),%ecx
1800 movl %edx,%ebp
1801 movl %ebx,16(%esp)
1802 movl %ecx,%ebx
Adam Langleye9ada862015-05-11 17:20:37 -07001803 je L075cbc_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -08001804 movaps %xmm7,%xmm2
1805 cmpl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001806 jb L076cbc_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001807 subl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001808 jmp L077cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001809.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001810L077cbc_enc_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001811 movups (%esi),%xmm7
1812 leal 16(%esi),%esi
1813 movups (%edx),%xmm0
1814 movups 16(%edx),%xmm1
1815 xorps %xmm0,%xmm7
1816 leal 32(%edx),%edx
1817 xorps %xmm7,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001818L078enc1_loop_15:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001819.byte 102,15,56,220,209
1820 decl %ecx
1821 movups (%edx),%xmm1
1822 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001823 jnz L078enc1_loop_15
Adam Langleyd9e397b2015-01-22 14:27:53 -08001824.byte 102,15,56,221,209
1825 movl %ebx,%ecx
1826 movl %ebp,%edx
1827 movups %xmm2,(%edi)
1828 leal 16(%edi),%edi
1829 subl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001830 jnc L077cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001831 addl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001832 jnz L076cbc_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001833 movaps %xmm2,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -07001834 pxor %xmm2,%xmm2
1835 jmp L079cbc_ret
1836L076cbc_enc_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001837 movl %eax,%ecx
1838.long 2767451785
1839 movl $16,%ecx
1840 subl %eax,%ecx
1841 xorl %eax,%eax
1842.long 2868115081
1843 leal -16(%edi),%edi
1844 movl %ebx,%ecx
1845 movl %edi,%esi
1846 movl %ebp,%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001847 jmp L077cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001848.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001849L075cbc_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001850 cmpl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001851 jbe L080cbc_dec_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001852 movaps %xmm7,(%esp)
1853 subl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001854 jmp L081cbc_dec_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -08001855.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001856L082cbc_dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001857 movaps %xmm0,(%esp)
1858 movups %xmm7,(%edi)
1859 leal 16(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001860L081cbc_dec_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001861 movdqu (%esi),%xmm2
1862 movdqu 16(%esi),%xmm3
1863 movdqu 32(%esi),%xmm4
1864 movdqu 48(%esi),%xmm5
1865 movdqu 64(%esi),%xmm6
1866 movdqu 80(%esi),%xmm7
1867 call __aesni_decrypt6
1868 movups (%esi),%xmm1
1869 movups 16(%esi),%xmm0
1870 xorps (%esp),%xmm2
1871 xorps %xmm1,%xmm3
1872 movups 32(%esi),%xmm1
1873 xorps %xmm0,%xmm4
1874 movups 48(%esi),%xmm0
1875 xorps %xmm1,%xmm5
1876 movups 64(%esi),%xmm1
1877 xorps %xmm0,%xmm6
1878 movups 80(%esi),%xmm0
1879 xorps %xmm1,%xmm7
1880 movups %xmm2,(%edi)
1881 movups %xmm3,16(%edi)
1882 leal 96(%esi),%esi
1883 movups %xmm4,32(%edi)
1884 movl %ebx,%ecx
1885 movups %xmm5,48(%edi)
1886 movl %ebp,%edx
1887 movups %xmm6,64(%edi)
1888 leal 80(%edi),%edi
1889 subl $96,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001890 ja L082cbc_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001891 movaps %xmm7,%xmm2
1892 movaps %xmm0,%xmm7
1893 addl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001894 jle L083cbc_dec_clear_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001895 movups %xmm2,(%edi)
1896 leal 16(%edi),%edi
Adam Langleye9ada862015-05-11 17:20:37 -07001897L080cbc_dec_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001898 movups (%esi),%xmm2
1899 movaps %xmm2,%xmm6
1900 cmpl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001901 jbe L084cbc_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08001902 movups 16(%esi),%xmm3
1903 movaps %xmm3,%xmm5
1904 cmpl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001905 jbe L085cbc_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08001906 movups 32(%esi),%xmm4
1907 cmpl $48,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001908 jbe L086cbc_dec_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08001909 movups 48(%esi),%xmm5
1910 cmpl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001911 jbe L087cbc_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08001912 movups 64(%esi),%xmm6
1913 movaps %xmm7,(%esp)
1914 movups (%esi),%xmm2
1915 xorps %xmm7,%xmm7
1916 call __aesni_decrypt6
1917 movups (%esi),%xmm1
1918 movups 16(%esi),%xmm0
1919 xorps (%esp),%xmm2
1920 xorps %xmm1,%xmm3
1921 movups 32(%esi),%xmm1
1922 xorps %xmm0,%xmm4
1923 movups 48(%esi),%xmm0
1924 xorps %xmm1,%xmm5
1925 movups 64(%esi),%xmm7
1926 xorps %xmm0,%xmm6
1927 movups %xmm2,(%edi)
1928 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001929 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001930 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001931 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001932 movups %xmm5,48(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001933 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08001934 leal 64(%edi),%edi
1935 movaps %xmm6,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001936 pxor %xmm6,%xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001937 subl $80,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001938 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001939.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001940L084cbc_dec_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001941 movups (%edx),%xmm0
1942 movups 16(%edx),%xmm1
1943 leal 32(%edx),%edx
1944 xorps %xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001945L089dec1_loop_16:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001946.byte 102,15,56,222,209
1947 decl %ecx
1948 movups (%edx),%xmm1
1949 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07001950 jnz L089dec1_loop_16
Adam Langleyd9e397b2015-01-22 14:27:53 -08001951.byte 102,15,56,223,209
1952 xorps %xmm7,%xmm2
1953 movaps %xmm6,%xmm7
1954 subl $16,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001955 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001956.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001957L085cbc_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001958 call __aesni_decrypt2
1959 xorps %xmm7,%xmm2
1960 xorps %xmm6,%xmm3
1961 movups %xmm2,(%edi)
1962 movaps %xmm3,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001963 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001964 leal 16(%edi),%edi
1965 movaps %xmm5,%xmm7
1966 subl $32,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001967 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001968.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001969L086cbc_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001970 call __aesni_decrypt3
1971 xorps %xmm7,%xmm2
1972 xorps %xmm6,%xmm3
1973 xorps %xmm5,%xmm4
1974 movups %xmm2,(%edi)
1975 movaps %xmm4,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001976 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001977 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001978 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001979 leal 32(%edi),%edi
1980 movups 32(%esi),%xmm7
1981 subl $48,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07001982 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001983.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07001984L087cbc_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001985 call __aesni_decrypt4
1986 movups 16(%esi),%xmm1
1987 movups 32(%esi),%xmm0
1988 xorps %xmm7,%xmm2
1989 movups 48(%esi),%xmm7
1990 xorps %xmm6,%xmm3
1991 movups %xmm2,(%edi)
1992 xorps %xmm1,%xmm4
1993 movups %xmm3,16(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001994 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001995 xorps %xmm0,%xmm5
1996 movups %xmm4,32(%edi)
Adam Langleye9ada862015-05-11 17:20:37 -07001997 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001998 leal 48(%edi),%edi
1999 movaps %xmm5,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07002000 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08002001 subl $64,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002002 jmp L088cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08002003.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002004L083cbc_dec_clear_tail_collected:
2005 pxor %xmm3,%xmm3
2006 pxor %xmm4,%xmm4
2007 pxor %xmm5,%xmm5
2008 pxor %xmm6,%xmm6
2009L088cbc_dec_tail_collected:
2010 andl $15,%eax
2011 jnz L090cbc_dec_tail_partial
2012 movups %xmm2,(%edi)
2013 pxor %xmm0,%xmm0
2014 jmp L079cbc_ret
2015.align 4,0x90
2016L090cbc_dec_tail_partial:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002017 movaps %xmm2,(%esp)
Adam Langleye9ada862015-05-11 17:20:37 -07002018 pxor %xmm0,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002019 movl $16,%ecx
2020 movl %esp,%esi
2021 subl %eax,%ecx
2022.long 2767451785
Adam Langleye9ada862015-05-11 17:20:37 -07002023 movdqa %xmm2,(%esp)
2024L079cbc_ret:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002025 movl 16(%esp),%esp
2026 movl 36(%esp),%ebp
Adam Langleye9ada862015-05-11 17:20:37 -07002027 pxor %xmm2,%xmm2
2028 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08002029 movups %xmm7,(%ebp)
Adam Langleye9ada862015-05-11 17:20:37 -07002030 pxor %xmm7,%xmm7
2031L074cbc_abort:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002032 popl %edi
2033 popl %esi
2034 popl %ebx
2035 popl %ebp
2036 ret
2037.private_extern __aesni_set_encrypt_key
2038.align 4
2039__aesni_set_encrypt_key:
Adam Langleye9ada862015-05-11 17:20:37 -07002040 pushl %ebp
2041 pushl %ebx
Adam Langleyd9e397b2015-01-22 14:27:53 -08002042 testl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002043 jz L091bad_pointer
Adam Langleyd9e397b2015-01-22 14:27:53 -08002044 testl %edx,%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002045 jz L091bad_pointer
2046 call L092pic
2047L092pic:
2048 popl %ebx
2049 leal Lkey_const-L092pic(%ebx),%ebx
2050 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002051 movups (%eax),%xmm0
2052 xorps %xmm4,%xmm4
Adam Langleye9ada862015-05-11 17:20:37 -07002053 movl 4(%ebp),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002054 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002055 andl $268437504,%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002056 cmpl $256,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -07002057 je L09314rounds
Adam Langleyd9e397b2015-01-22 14:27:53 -08002058 cmpl $192,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -07002059 je L09412rounds
Adam Langleyd9e397b2015-01-22 14:27:53 -08002060 cmpl $128,%ecx
Adam Langleye9ada862015-05-11 17:20:37 -07002061 jne L095bad_keybits
Adam Langleyd9e397b2015-01-22 14:27:53 -08002062.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002063L09610rounds:
2064 cmpl $268435456,%ebp
2065 je L09710rounds_alt
Adam Langleyd9e397b2015-01-22 14:27:53 -08002066 movl $9,%ecx
2067 movups %xmm0,-16(%edx)
2068.byte 102,15,58,223,200,1
Adam Langleye9ada862015-05-11 17:20:37 -07002069 call L098key_128_cold
Adam Langleyd9e397b2015-01-22 14:27:53 -08002070.byte 102,15,58,223,200,2
Adam Langleye9ada862015-05-11 17:20:37 -07002071 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002072.byte 102,15,58,223,200,4
Adam Langleye9ada862015-05-11 17:20:37 -07002073 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002074.byte 102,15,58,223,200,8
Adam Langleye9ada862015-05-11 17:20:37 -07002075 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002076.byte 102,15,58,223,200,16
Adam Langleye9ada862015-05-11 17:20:37 -07002077 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002078.byte 102,15,58,223,200,32
Adam Langleye9ada862015-05-11 17:20:37 -07002079 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002080.byte 102,15,58,223,200,64
Adam Langleye9ada862015-05-11 17:20:37 -07002081 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002082.byte 102,15,58,223,200,128
Adam Langleye9ada862015-05-11 17:20:37 -07002083 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002084.byte 102,15,58,223,200,27
Adam Langleye9ada862015-05-11 17:20:37 -07002085 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002086.byte 102,15,58,223,200,54
Adam Langleye9ada862015-05-11 17:20:37 -07002087 call L099key_128
Adam Langleyd9e397b2015-01-22 14:27:53 -08002088 movups %xmm0,(%edx)
2089 movl %ecx,80(%edx)
Adam Langleye9ada862015-05-11 17:20:37 -07002090 jmp L100good_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08002091.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002092L099key_128:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002093 movups %xmm0,(%edx)
2094 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002095L098key_128_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002096 shufps $16,%xmm0,%xmm4
2097 xorps %xmm4,%xmm0
2098 shufps $140,%xmm0,%xmm4
2099 xorps %xmm4,%xmm0
2100 shufps $255,%xmm1,%xmm1
2101 xorps %xmm1,%xmm0
2102 ret
2103.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002104L09710rounds_alt:
2105 movdqa (%ebx),%xmm5
2106 movl $8,%ecx
2107 movdqa 32(%ebx),%xmm4
2108 movdqa %xmm0,%xmm2
2109 movdqu %xmm0,-16(%edx)
2110L101loop_key128:
2111.byte 102,15,56,0,197
2112.byte 102,15,56,221,196
2113 pslld $1,%xmm4
2114 leal 16(%edx),%edx
2115 movdqa %xmm2,%xmm3
2116 pslldq $4,%xmm2
2117 pxor %xmm2,%xmm3
2118 pslldq $4,%xmm2
2119 pxor %xmm2,%xmm3
2120 pslldq $4,%xmm2
2121 pxor %xmm3,%xmm2
2122 pxor %xmm2,%xmm0
2123 movdqu %xmm0,-16(%edx)
2124 movdqa %xmm0,%xmm2
2125 decl %ecx
2126 jnz L101loop_key128
2127 movdqa 48(%ebx),%xmm4
2128.byte 102,15,56,0,197
2129.byte 102,15,56,221,196
2130 pslld $1,%xmm4
2131 movdqa %xmm2,%xmm3
2132 pslldq $4,%xmm2
2133 pxor %xmm2,%xmm3
2134 pslldq $4,%xmm2
2135 pxor %xmm2,%xmm3
2136 pslldq $4,%xmm2
2137 pxor %xmm3,%xmm2
2138 pxor %xmm2,%xmm0
2139 movdqu %xmm0,(%edx)
2140 movdqa %xmm0,%xmm2
2141.byte 102,15,56,0,197
2142.byte 102,15,56,221,196
2143 movdqa %xmm2,%xmm3
2144 pslldq $4,%xmm2
2145 pxor %xmm2,%xmm3
2146 pslldq $4,%xmm2
2147 pxor %xmm2,%xmm3
2148 pslldq $4,%xmm2
2149 pxor %xmm3,%xmm2
2150 pxor %xmm2,%xmm0
2151 movdqu %xmm0,16(%edx)
2152 movl $9,%ecx
2153 movl %ecx,96(%edx)
2154 jmp L100good_key
2155.align 4,0x90
2156L09412rounds:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002157 movq 16(%eax),%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07002158 cmpl $268435456,%ebp
2159 je L10212rounds_alt
Adam Langleyd9e397b2015-01-22 14:27:53 -08002160 movl $11,%ecx
2161 movups %xmm0,-16(%edx)
2162.byte 102,15,58,223,202,1
Adam Langleye9ada862015-05-11 17:20:37 -07002163 call L103key_192a_cold
Adam Langleyd9e397b2015-01-22 14:27:53 -08002164.byte 102,15,58,223,202,2
Adam Langleye9ada862015-05-11 17:20:37 -07002165 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002166.byte 102,15,58,223,202,4
Adam Langleye9ada862015-05-11 17:20:37 -07002167 call L105key_192a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002168.byte 102,15,58,223,202,8
Adam Langleye9ada862015-05-11 17:20:37 -07002169 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002170.byte 102,15,58,223,202,16
Adam Langleye9ada862015-05-11 17:20:37 -07002171 call L105key_192a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002172.byte 102,15,58,223,202,32
Adam Langleye9ada862015-05-11 17:20:37 -07002173 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002174.byte 102,15,58,223,202,64
Adam Langleye9ada862015-05-11 17:20:37 -07002175 call L105key_192a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002176.byte 102,15,58,223,202,128
Adam Langleye9ada862015-05-11 17:20:37 -07002177 call L104key_192b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002178 movups %xmm0,(%edx)
2179 movl %ecx,48(%edx)
Adam Langleye9ada862015-05-11 17:20:37 -07002180 jmp L100good_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08002181.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002182L105key_192a:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002183 movups %xmm0,(%edx)
2184 leal 16(%edx),%edx
2185.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002186L103key_192a_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002187 movaps %xmm2,%xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07002188L106key_192b_warm:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002189 shufps $16,%xmm0,%xmm4
2190 movdqa %xmm2,%xmm3
2191 xorps %xmm4,%xmm0
2192 shufps $140,%xmm0,%xmm4
2193 pslldq $4,%xmm3
2194 xorps %xmm4,%xmm0
2195 pshufd $85,%xmm1,%xmm1
2196 pxor %xmm3,%xmm2
2197 pxor %xmm1,%xmm0
2198 pshufd $255,%xmm0,%xmm3
2199 pxor %xmm3,%xmm2
2200 ret
2201.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002202L104key_192b:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002203 movaps %xmm0,%xmm3
2204 shufps $68,%xmm0,%xmm5
2205 movups %xmm5,(%edx)
2206 shufps $78,%xmm2,%xmm3
2207 movups %xmm3,16(%edx)
2208 leal 32(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002209 jmp L106key_192b_warm
Adam Langleyd9e397b2015-01-22 14:27:53 -08002210.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002211L10212rounds_alt:
2212 movdqa 16(%ebx),%xmm5
2213 movdqa 32(%ebx),%xmm4
2214 movl $8,%ecx
2215 movdqu %xmm0,-16(%edx)
2216L107loop_key192:
2217 movq %xmm2,(%edx)
2218 movdqa %xmm2,%xmm1
2219.byte 102,15,56,0,213
2220.byte 102,15,56,221,212
2221 pslld $1,%xmm4
2222 leal 24(%edx),%edx
2223 movdqa %xmm0,%xmm3
2224 pslldq $4,%xmm0
2225 pxor %xmm0,%xmm3
2226 pslldq $4,%xmm0
2227 pxor %xmm0,%xmm3
2228 pslldq $4,%xmm0
2229 pxor %xmm3,%xmm0
2230 pshufd $255,%xmm0,%xmm3
2231 pxor %xmm1,%xmm3
2232 pslldq $4,%xmm1
2233 pxor %xmm1,%xmm3
2234 pxor %xmm2,%xmm0
2235 pxor %xmm3,%xmm2
2236 movdqu %xmm0,-16(%edx)
2237 decl %ecx
2238 jnz L107loop_key192
2239 movl $11,%ecx
2240 movl %ecx,32(%edx)
2241 jmp L100good_key
2242.align 4,0x90
2243L09314rounds:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002244 movups 16(%eax),%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002245 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002246 cmpl $268435456,%ebp
2247 je L10814rounds_alt
2248 movl $13,%ecx
Adam Langleyd9e397b2015-01-22 14:27:53 -08002249 movups %xmm0,-32(%edx)
2250 movups %xmm2,-16(%edx)
2251.byte 102,15,58,223,202,1
Adam Langleye9ada862015-05-11 17:20:37 -07002252 call L109key_256a_cold
Adam Langleyd9e397b2015-01-22 14:27:53 -08002253.byte 102,15,58,223,200,1
Adam Langleye9ada862015-05-11 17:20:37 -07002254 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002255.byte 102,15,58,223,202,2
Adam Langleye9ada862015-05-11 17:20:37 -07002256 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002257.byte 102,15,58,223,200,2
Adam Langleye9ada862015-05-11 17:20:37 -07002258 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002259.byte 102,15,58,223,202,4
Adam Langleye9ada862015-05-11 17:20:37 -07002260 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002261.byte 102,15,58,223,200,4
Adam Langleye9ada862015-05-11 17:20:37 -07002262 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002263.byte 102,15,58,223,202,8
Adam Langleye9ada862015-05-11 17:20:37 -07002264 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002265.byte 102,15,58,223,200,8
Adam Langleye9ada862015-05-11 17:20:37 -07002266 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002267.byte 102,15,58,223,202,16
Adam Langleye9ada862015-05-11 17:20:37 -07002268 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002269.byte 102,15,58,223,200,16
Adam Langleye9ada862015-05-11 17:20:37 -07002270 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002271.byte 102,15,58,223,202,32
Adam Langleye9ada862015-05-11 17:20:37 -07002272 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002273.byte 102,15,58,223,200,32
Adam Langleye9ada862015-05-11 17:20:37 -07002274 call L110key_256b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002275.byte 102,15,58,223,202,64
Adam Langleye9ada862015-05-11 17:20:37 -07002276 call L111key_256a
Adam Langleyd9e397b2015-01-22 14:27:53 -08002277 movups %xmm0,(%edx)
2278 movl %ecx,16(%edx)
2279 xorl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002280 jmp L100good_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08002281.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002282L111key_256a:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002283 movups %xmm2,(%edx)
2284 leal 16(%edx),%edx
Adam Langleye9ada862015-05-11 17:20:37 -07002285L109key_256a_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002286 shufps $16,%xmm0,%xmm4
2287 xorps %xmm4,%xmm0
2288 shufps $140,%xmm0,%xmm4
2289 xorps %xmm4,%xmm0
2290 shufps $255,%xmm1,%xmm1
2291 xorps %xmm1,%xmm0
2292 ret
2293.align 4,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002294L110key_256b:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002295 movups %xmm0,(%edx)
2296 leal 16(%edx),%edx
2297 shufps $16,%xmm2,%xmm4
2298 xorps %xmm4,%xmm2
2299 shufps $140,%xmm2,%xmm4
2300 xorps %xmm4,%xmm2
2301 shufps $170,%xmm1,%xmm1
2302 xorps %xmm1,%xmm2
2303 ret
Adam Langleye9ada862015-05-11 17:20:37 -07002304.align 4,0x90
2305L10814rounds_alt:
2306 movdqa (%ebx),%xmm5
2307 movdqa 32(%ebx),%xmm4
2308 movl $7,%ecx
2309 movdqu %xmm0,-32(%edx)
2310 movdqa %xmm2,%xmm1
2311 movdqu %xmm2,-16(%edx)
2312L112loop_key256:
2313.byte 102,15,56,0,213
2314.byte 102,15,56,221,212
2315 movdqa %xmm0,%xmm3
2316 pslldq $4,%xmm0
2317 pxor %xmm0,%xmm3
2318 pslldq $4,%xmm0
2319 pxor %xmm0,%xmm3
2320 pslldq $4,%xmm0
2321 pxor %xmm3,%xmm0
2322 pslld $1,%xmm4
2323 pxor %xmm2,%xmm0
2324 movdqu %xmm0,(%edx)
2325 decl %ecx
2326 jz L113done_key256
2327 pshufd $255,%xmm0,%xmm2
2328 pxor %xmm3,%xmm3
2329.byte 102,15,56,221,211
2330 movdqa %xmm1,%xmm3
2331 pslldq $4,%xmm1
2332 pxor %xmm1,%xmm3
2333 pslldq $4,%xmm1
2334 pxor %xmm1,%xmm3
2335 pslldq $4,%xmm1
2336 pxor %xmm3,%xmm1
2337 pxor %xmm1,%xmm2
2338 movdqu %xmm2,16(%edx)
2339 leal 32(%edx),%edx
2340 movdqa %xmm2,%xmm1
2341 jmp L112loop_key256
2342L113done_key256:
2343 movl $13,%ecx
2344 movl %ecx,16(%edx)
2345L100good_key:
2346 pxor %xmm0,%xmm0
2347 pxor %xmm1,%xmm1
2348 pxor %xmm2,%xmm2
2349 pxor %xmm3,%xmm3
2350 pxor %xmm4,%xmm4
2351 pxor %xmm5,%xmm5
2352 xorl %eax,%eax
2353 popl %ebx
2354 popl %ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002355 ret
2356.align 2,0x90
Adam Langleye9ada862015-05-11 17:20:37 -07002357L091bad_pointer:
2358 movl $-1,%eax
2359 popl %ebx
2360 popl %ebp
2361 ret
2362.align 2,0x90
2363L095bad_keybits:
2364 pxor %xmm0,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002365 movl $-2,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002366 popl %ebx
2367 popl %ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002368 ret
2369.globl _aesni_set_encrypt_key
2370.private_extern _aesni_set_encrypt_key
2371.align 4
2372_aesni_set_encrypt_key:
2373L_aesni_set_encrypt_key_begin:
2374 movl 4(%esp),%eax
2375 movl 8(%esp),%ecx
2376 movl 12(%esp),%edx
2377 call __aesni_set_encrypt_key
2378 ret
2379.globl _aesni_set_decrypt_key
2380.private_extern _aesni_set_decrypt_key
2381.align 4
2382_aesni_set_decrypt_key:
2383L_aesni_set_decrypt_key_begin:
2384 movl 4(%esp),%eax
2385 movl 8(%esp),%ecx
2386 movl 12(%esp),%edx
2387 call __aesni_set_encrypt_key
2388 movl 12(%esp),%edx
2389 shll $4,%ecx
2390 testl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002391 jnz L114dec_key_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08002392 leal 16(%edx,%ecx,1),%eax
2393 movups (%edx),%xmm0
2394 movups (%eax),%xmm1
2395 movups %xmm0,(%eax)
2396 movups %xmm1,(%edx)
2397 leal 16(%edx),%edx
2398 leal -16(%eax),%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002399L115dec_key_inverse:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002400 movups (%edx),%xmm0
2401 movups (%eax),%xmm1
2402.byte 102,15,56,219,192
2403.byte 102,15,56,219,201
2404 leal 16(%edx),%edx
2405 leal -16(%eax),%eax
2406 movups %xmm0,16(%eax)
2407 movups %xmm1,-16(%edx)
2408 cmpl %edx,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002409 ja L115dec_key_inverse
Adam Langleyd9e397b2015-01-22 14:27:53 -08002410 movups (%edx),%xmm0
2411.byte 102,15,56,219,192
2412 movups %xmm0,(%edx)
Adam Langleye9ada862015-05-11 17:20:37 -07002413 pxor %xmm0,%xmm0
2414 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08002415 xorl %eax,%eax
Adam Langleye9ada862015-05-11 17:20:37 -07002416L114dec_key_ret:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002417 ret
Adam Langleye9ada862015-05-11 17:20:37 -07002418.align 6,0x90
2419Lkey_const:
2420.long 202313229,202313229,202313229,202313229
2421.long 67569157,67569157,67569157,67569157
2422.long 1,1,1,1
2423.long 27,27,27,27
Adam Langleyd9e397b2015-01-22 14:27:53 -08002424.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2425.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2426.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2427.byte 115,108,46,111,114,103,62,0
Adam Langleye9ada862015-05-11 17:20:37 -07002428.section __IMPORT,__pointers,non_lazy_symbol_pointers
2429L_OPENSSL_ia32cap_P$non_lazy_ptr:
2430.indirect_symbol _OPENSSL_ia32cap_P
2431.long 0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002432#endif