blob: 660564b45638d87b0c51a50ba41a2895021c9824 [file] [log] [blame]
Robert Sloanc9abfe42018-11-26 12:19:07 -08001# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
Robert Sloan726e9d12018-09-11 11:45:04 -07004#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
Robert Sloan6f79a502017-04-03 09:16:40 -070010#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
Robert Sloan726e9d12018-09-11 11:45:04 -070011#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -080014.text
Robert Sloan2424d842017-05-01 07:46:28 -070015.extern OPENSSL_ia32cap_P
16.hidden OPENSSL_ia32cap_P
Adam Vartanianbfcf3a72018-08-10 14:55:24 +010017.globl aes_hw_encrypt
18.hidden aes_hw_encrypt
19.type aes_hw_encrypt,@function
Adam Langleyd9e397b2015-01-22 14:27:53 -080020.align 16
Adam Vartanianbfcf3a72018-08-10 14:55:24 +010021aes_hw_encrypt:
Robert Sloan4c22c5f2019-03-01 15:53:37 -080022.cfi_startproc
Pete Bentley470a9302019-10-02 14:44:32 +010023#ifdef BORINGSSL_DISPATCH_TEST
Robert Sloan4c22c5f2019-03-01 15:53:37 -080024.extern BORINGSSL_function_hit
25.hidden BORINGSSL_function_hit
26 movb $1,BORINGSSL_function_hit+1(%rip)
27#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -080028 movups (%rdi),%xmm2
29 movl 240(%rdx),%eax
30 movups (%rdx),%xmm0
31 movups 16(%rdx),%xmm1
32 leaq 32(%rdx),%rdx
33 xorps %xmm0,%xmm2
34.Loop_enc1_1:
35.byte 102,15,56,220,209
36 decl %eax
37 movups (%rdx),%xmm1
38 leaq 16(%rdx),%rdx
39 jnz .Loop_enc1_1
40.byte 102,15,56,221,209
Adam Langleye9ada862015-05-11 17:20:37 -070041 pxor %xmm0,%xmm0
42 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -080043 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -070044 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080045 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -080046.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +010047.size aes_hw_encrypt,.-aes_hw_encrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -080048
Adam Vartanianbfcf3a72018-08-10 14:55:24 +010049.globl aes_hw_decrypt
50.hidden aes_hw_decrypt
51.type aes_hw_decrypt,@function
Adam Langleyd9e397b2015-01-22 14:27:53 -080052.align 16
Adam Vartanianbfcf3a72018-08-10 14:55:24 +010053aes_hw_decrypt:
Robert Sloan4c22c5f2019-03-01 15:53:37 -080054.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -080055 movups (%rdi),%xmm2
56 movl 240(%rdx),%eax
57 movups (%rdx),%xmm0
58 movups 16(%rdx),%xmm1
59 leaq 32(%rdx),%rdx
60 xorps %xmm0,%xmm2
61.Loop_dec1_2:
62.byte 102,15,56,222,209
63 decl %eax
64 movups (%rdx),%xmm1
65 leaq 16(%rdx),%rdx
66 jnz .Loop_dec1_2
67.byte 102,15,56,223,209
Adam Langleye9ada862015-05-11 17:20:37 -070068 pxor %xmm0,%xmm0
69 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -080070 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -070071 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080072 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -080073.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +010074.size aes_hw_decrypt, .-aes_hw_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -080075.type _aesni_encrypt2,@function
76.align 16
77_aesni_encrypt2:
Robert Sloan4c22c5f2019-03-01 15:53:37 -080078.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -080079 movups (%rcx),%xmm0
80 shll $4,%eax
81 movups 16(%rcx),%xmm1
82 xorps %xmm0,%xmm2
83 xorps %xmm0,%xmm3
84 movups 32(%rcx),%xmm0
85 leaq 32(%rcx,%rax,1),%rcx
86 negq %rax
87 addq $16,%rax
88
89.Lenc_loop2:
90.byte 102,15,56,220,209
91.byte 102,15,56,220,217
92 movups (%rcx,%rax,1),%xmm1
93 addq $32,%rax
94.byte 102,15,56,220,208
95.byte 102,15,56,220,216
96 movups -16(%rcx,%rax,1),%xmm0
97 jnz .Lenc_loop2
98
99.byte 102,15,56,220,209
100.byte 102,15,56,220,217
101.byte 102,15,56,221,208
102.byte 102,15,56,221,216
103 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800104.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800105.size _aesni_encrypt2,.-_aesni_encrypt2
106.type _aesni_decrypt2,@function
107.align 16
108_aesni_decrypt2:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800109.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800110 movups (%rcx),%xmm0
111 shll $4,%eax
112 movups 16(%rcx),%xmm1
113 xorps %xmm0,%xmm2
114 xorps %xmm0,%xmm3
115 movups 32(%rcx),%xmm0
116 leaq 32(%rcx,%rax,1),%rcx
117 negq %rax
118 addq $16,%rax
119
120.Ldec_loop2:
121.byte 102,15,56,222,209
122.byte 102,15,56,222,217
123 movups (%rcx,%rax,1),%xmm1
124 addq $32,%rax
125.byte 102,15,56,222,208
126.byte 102,15,56,222,216
127 movups -16(%rcx,%rax,1),%xmm0
128 jnz .Ldec_loop2
129
130.byte 102,15,56,222,209
131.byte 102,15,56,222,217
132.byte 102,15,56,223,208
133.byte 102,15,56,223,216
134 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800135.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800136.size _aesni_decrypt2,.-_aesni_decrypt2
137.type _aesni_encrypt3,@function
138.align 16
139_aesni_encrypt3:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800140.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800141 movups (%rcx),%xmm0
142 shll $4,%eax
143 movups 16(%rcx),%xmm1
144 xorps %xmm0,%xmm2
145 xorps %xmm0,%xmm3
146 xorps %xmm0,%xmm4
147 movups 32(%rcx),%xmm0
148 leaq 32(%rcx,%rax,1),%rcx
149 negq %rax
150 addq $16,%rax
151
152.Lenc_loop3:
153.byte 102,15,56,220,209
154.byte 102,15,56,220,217
155.byte 102,15,56,220,225
156 movups (%rcx,%rax,1),%xmm1
157 addq $32,%rax
158.byte 102,15,56,220,208
159.byte 102,15,56,220,216
160.byte 102,15,56,220,224
161 movups -16(%rcx,%rax,1),%xmm0
162 jnz .Lenc_loop3
163
164.byte 102,15,56,220,209
165.byte 102,15,56,220,217
166.byte 102,15,56,220,225
167.byte 102,15,56,221,208
168.byte 102,15,56,221,216
169.byte 102,15,56,221,224
170 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800171.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800172.size _aesni_encrypt3,.-_aesni_encrypt3
173.type _aesni_decrypt3,@function
174.align 16
175_aesni_decrypt3:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800176.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800177 movups (%rcx),%xmm0
178 shll $4,%eax
179 movups 16(%rcx),%xmm1
180 xorps %xmm0,%xmm2
181 xorps %xmm0,%xmm3
182 xorps %xmm0,%xmm4
183 movups 32(%rcx),%xmm0
184 leaq 32(%rcx,%rax,1),%rcx
185 negq %rax
186 addq $16,%rax
187
188.Ldec_loop3:
189.byte 102,15,56,222,209
190.byte 102,15,56,222,217
191.byte 102,15,56,222,225
192 movups (%rcx,%rax,1),%xmm1
193 addq $32,%rax
194.byte 102,15,56,222,208
195.byte 102,15,56,222,216
196.byte 102,15,56,222,224
197 movups -16(%rcx,%rax,1),%xmm0
198 jnz .Ldec_loop3
199
200.byte 102,15,56,222,209
201.byte 102,15,56,222,217
202.byte 102,15,56,222,225
203.byte 102,15,56,223,208
204.byte 102,15,56,223,216
205.byte 102,15,56,223,224
206 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800207.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800208.size _aesni_decrypt3,.-_aesni_decrypt3
209.type _aesni_encrypt4,@function
210.align 16
211_aesni_encrypt4:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800212.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800213 movups (%rcx),%xmm0
214 shll $4,%eax
215 movups 16(%rcx),%xmm1
216 xorps %xmm0,%xmm2
217 xorps %xmm0,%xmm3
218 xorps %xmm0,%xmm4
219 xorps %xmm0,%xmm5
220 movups 32(%rcx),%xmm0
221 leaq 32(%rcx,%rax,1),%rcx
222 negq %rax
223.byte 0x0f,0x1f,0x00
224 addq $16,%rax
225
226.Lenc_loop4:
227.byte 102,15,56,220,209
228.byte 102,15,56,220,217
229.byte 102,15,56,220,225
230.byte 102,15,56,220,233
231 movups (%rcx,%rax,1),%xmm1
232 addq $32,%rax
233.byte 102,15,56,220,208
234.byte 102,15,56,220,216
235.byte 102,15,56,220,224
236.byte 102,15,56,220,232
237 movups -16(%rcx,%rax,1),%xmm0
238 jnz .Lenc_loop4
239
240.byte 102,15,56,220,209
241.byte 102,15,56,220,217
242.byte 102,15,56,220,225
243.byte 102,15,56,220,233
244.byte 102,15,56,221,208
245.byte 102,15,56,221,216
246.byte 102,15,56,221,224
247.byte 102,15,56,221,232
248 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800249.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800250.size _aesni_encrypt4,.-_aesni_encrypt4
251.type _aesni_decrypt4,@function
252.align 16
253_aesni_decrypt4:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800254.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800255 movups (%rcx),%xmm0
256 shll $4,%eax
257 movups 16(%rcx),%xmm1
258 xorps %xmm0,%xmm2
259 xorps %xmm0,%xmm3
260 xorps %xmm0,%xmm4
261 xorps %xmm0,%xmm5
262 movups 32(%rcx),%xmm0
263 leaq 32(%rcx,%rax,1),%rcx
264 negq %rax
265.byte 0x0f,0x1f,0x00
266 addq $16,%rax
267
268.Ldec_loop4:
269.byte 102,15,56,222,209
270.byte 102,15,56,222,217
271.byte 102,15,56,222,225
272.byte 102,15,56,222,233
273 movups (%rcx,%rax,1),%xmm1
274 addq $32,%rax
275.byte 102,15,56,222,208
276.byte 102,15,56,222,216
277.byte 102,15,56,222,224
278.byte 102,15,56,222,232
279 movups -16(%rcx,%rax,1),%xmm0
280 jnz .Ldec_loop4
281
282.byte 102,15,56,222,209
283.byte 102,15,56,222,217
284.byte 102,15,56,222,225
285.byte 102,15,56,222,233
286.byte 102,15,56,223,208
287.byte 102,15,56,223,216
288.byte 102,15,56,223,224
289.byte 102,15,56,223,232
290 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800291.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800292.size _aesni_decrypt4,.-_aesni_decrypt4
293.type _aesni_encrypt6,@function
294.align 16
295_aesni_encrypt6:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800296.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800297 movups (%rcx),%xmm0
298 shll $4,%eax
299 movups 16(%rcx),%xmm1
300 xorps %xmm0,%xmm2
301 pxor %xmm0,%xmm3
302 pxor %xmm0,%xmm4
303.byte 102,15,56,220,209
304 leaq 32(%rcx,%rax,1),%rcx
305 negq %rax
306.byte 102,15,56,220,217
307 pxor %xmm0,%xmm5
308 pxor %xmm0,%xmm6
309.byte 102,15,56,220,225
310 pxor %xmm0,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -0700311 movups (%rcx,%rax,1),%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800312 addq $16,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800313 jmp .Lenc_loop6_enter
314.align 16
315.Lenc_loop6:
316.byte 102,15,56,220,209
317.byte 102,15,56,220,217
318.byte 102,15,56,220,225
Adam Langleye9ada862015-05-11 17:20:37 -0700319.Lenc_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800320.byte 102,15,56,220,233
321.byte 102,15,56,220,241
322.byte 102,15,56,220,249
Adam Langleyd9e397b2015-01-22 14:27:53 -0800323 movups (%rcx,%rax,1),%xmm1
324 addq $32,%rax
325.byte 102,15,56,220,208
326.byte 102,15,56,220,216
327.byte 102,15,56,220,224
328.byte 102,15,56,220,232
329.byte 102,15,56,220,240
330.byte 102,15,56,220,248
331 movups -16(%rcx,%rax,1),%xmm0
332 jnz .Lenc_loop6
333
334.byte 102,15,56,220,209
335.byte 102,15,56,220,217
336.byte 102,15,56,220,225
337.byte 102,15,56,220,233
338.byte 102,15,56,220,241
339.byte 102,15,56,220,249
340.byte 102,15,56,221,208
341.byte 102,15,56,221,216
342.byte 102,15,56,221,224
343.byte 102,15,56,221,232
344.byte 102,15,56,221,240
345.byte 102,15,56,221,248
346 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800347.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800348.size _aesni_encrypt6,.-_aesni_encrypt6
349.type _aesni_decrypt6,@function
350.align 16
351_aesni_decrypt6:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800352.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800353 movups (%rcx),%xmm0
354 shll $4,%eax
355 movups 16(%rcx),%xmm1
356 xorps %xmm0,%xmm2
357 pxor %xmm0,%xmm3
358 pxor %xmm0,%xmm4
359.byte 102,15,56,222,209
360 leaq 32(%rcx,%rax,1),%rcx
361 negq %rax
362.byte 102,15,56,222,217
363 pxor %xmm0,%xmm5
364 pxor %xmm0,%xmm6
365.byte 102,15,56,222,225
366 pxor %xmm0,%xmm7
Adam Langleye9ada862015-05-11 17:20:37 -0700367 movups (%rcx,%rax,1),%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800368 addq $16,%rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800369 jmp .Ldec_loop6_enter
370.align 16
371.Ldec_loop6:
372.byte 102,15,56,222,209
373.byte 102,15,56,222,217
374.byte 102,15,56,222,225
Adam Langleye9ada862015-05-11 17:20:37 -0700375.Ldec_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800376.byte 102,15,56,222,233
377.byte 102,15,56,222,241
378.byte 102,15,56,222,249
Adam Langleyd9e397b2015-01-22 14:27:53 -0800379 movups (%rcx,%rax,1),%xmm1
380 addq $32,%rax
381.byte 102,15,56,222,208
382.byte 102,15,56,222,216
383.byte 102,15,56,222,224
384.byte 102,15,56,222,232
385.byte 102,15,56,222,240
386.byte 102,15,56,222,248
387 movups -16(%rcx,%rax,1),%xmm0
388 jnz .Ldec_loop6
389
390.byte 102,15,56,222,209
391.byte 102,15,56,222,217
392.byte 102,15,56,222,225
393.byte 102,15,56,222,233
394.byte 102,15,56,222,241
395.byte 102,15,56,222,249
396.byte 102,15,56,223,208
397.byte 102,15,56,223,216
398.byte 102,15,56,223,224
399.byte 102,15,56,223,232
400.byte 102,15,56,223,240
401.byte 102,15,56,223,248
402 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800403.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800404.size _aesni_decrypt6,.-_aesni_decrypt6
405.type _aesni_encrypt8,@function
406.align 16
407_aesni_encrypt8:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800408.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800409 movups (%rcx),%xmm0
410 shll $4,%eax
411 movups 16(%rcx),%xmm1
412 xorps %xmm0,%xmm2
413 xorps %xmm0,%xmm3
414 pxor %xmm0,%xmm4
415 pxor %xmm0,%xmm5
416 pxor %xmm0,%xmm6
417 leaq 32(%rcx,%rax,1),%rcx
418 negq %rax
419.byte 102,15,56,220,209
Adam Langleyd9e397b2015-01-22 14:27:53 -0800420 pxor %xmm0,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800421 pxor %xmm0,%xmm8
Adam Langleye9ada862015-05-11 17:20:37 -0700422.byte 102,15,56,220,217
Adam Langleyd9e397b2015-01-22 14:27:53 -0800423 pxor %xmm0,%xmm9
Adam Langleye9ada862015-05-11 17:20:37 -0700424 movups (%rcx,%rax,1),%xmm0
425 addq $16,%rax
426 jmp .Lenc_loop8_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800427.align 16
428.Lenc_loop8:
429.byte 102,15,56,220,209
430.byte 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -0700431.Lenc_loop8_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800432.byte 102,15,56,220,225
433.byte 102,15,56,220,233
434.byte 102,15,56,220,241
435.byte 102,15,56,220,249
436.byte 102,68,15,56,220,193
437.byte 102,68,15,56,220,201
438.Lenc_loop8_enter:
439 movups (%rcx,%rax,1),%xmm1
440 addq $32,%rax
441.byte 102,15,56,220,208
442.byte 102,15,56,220,216
443.byte 102,15,56,220,224
444.byte 102,15,56,220,232
445.byte 102,15,56,220,240
446.byte 102,15,56,220,248
447.byte 102,68,15,56,220,192
448.byte 102,68,15,56,220,200
449 movups -16(%rcx,%rax,1),%xmm0
450 jnz .Lenc_loop8
451
452.byte 102,15,56,220,209
453.byte 102,15,56,220,217
454.byte 102,15,56,220,225
455.byte 102,15,56,220,233
456.byte 102,15,56,220,241
457.byte 102,15,56,220,249
458.byte 102,68,15,56,220,193
459.byte 102,68,15,56,220,201
460.byte 102,15,56,221,208
461.byte 102,15,56,221,216
462.byte 102,15,56,221,224
463.byte 102,15,56,221,232
464.byte 102,15,56,221,240
465.byte 102,15,56,221,248
466.byte 102,68,15,56,221,192
467.byte 102,68,15,56,221,200
468 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800469.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800470.size _aesni_encrypt8,.-_aesni_encrypt8
471.type _aesni_decrypt8,@function
472.align 16
473_aesni_decrypt8:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800474.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800475 movups (%rcx),%xmm0
476 shll $4,%eax
477 movups 16(%rcx),%xmm1
478 xorps %xmm0,%xmm2
479 xorps %xmm0,%xmm3
480 pxor %xmm0,%xmm4
481 pxor %xmm0,%xmm5
482 pxor %xmm0,%xmm6
483 leaq 32(%rcx,%rax,1),%rcx
484 negq %rax
485.byte 102,15,56,222,209
Adam Langleyd9e397b2015-01-22 14:27:53 -0800486 pxor %xmm0,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800487 pxor %xmm0,%xmm8
Adam Langleye9ada862015-05-11 17:20:37 -0700488.byte 102,15,56,222,217
Adam Langleyd9e397b2015-01-22 14:27:53 -0800489 pxor %xmm0,%xmm9
Adam Langleye9ada862015-05-11 17:20:37 -0700490 movups (%rcx,%rax,1),%xmm0
491 addq $16,%rax
492 jmp .Ldec_loop8_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800493.align 16
494.Ldec_loop8:
495.byte 102,15,56,222,209
496.byte 102,15,56,222,217
Adam Langleye9ada862015-05-11 17:20:37 -0700497.Ldec_loop8_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800498.byte 102,15,56,222,225
499.byte 102,15,56,222,233
500.byte 102,15,56,222,241
501.byte 102,15,56,222,249
502.byte 102,68,15,56,222,193
503.byte 102,68,15,56,222,201
504.Ldec_loop8_enter:
505 movups (%rcx,%rax,1),%xmm1
506 addq $32,%rax
507.byte 102,15,56,222,208
508.byte 102,15,56,222,216
509.byte 102,15,56,222,224
510.byte 102,15,56,222,232
511.byte 102,15,56,222,240
512.byte 102,15,56,222,248
513.byte 102,68,15,56,222,192
514.byte 102,68,15,56,222,200
515 movups -16(%rcx,%rax,1),%xmm0
516 jnz .Ldec_loop8
517
518.byte 102,15,56,222,209
519.byte 102,15,56,222,217
520.byte 102,15,56,222,225
521.byte 102,15,56,222,233
522.byte 102,15,56,222,241
523.byte 102,15,56,222,249
524.byte 102,68,15,56,222,193
525.byte 102,68,15,56,222,201
526.byte 102,15,56,223,208
527.byte 102,15,56,223,216
528.byte 102,15,56,223,224
529.byte 102,15,56,223,232
530.byte 102,15,56,223,240
531.byte 102,15,56,223,248
532.byte 102,68,15,56,223,192
533.byte 102,68,15,56,223,200
534 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800535.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800536.size _aesni_decrypt8,.-_aesni_decrypt8
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100537.globl aes_hw_ecb_encrypt
538.hidden aes_hw_ecb_encrypt
539.type aes_hw_ecb_encrypt,@function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800540.align 16
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100541aes_hw_ecb_encrypt:
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800542.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -0800543 andq $-16,%rdx
544 jz .Lecb_ret
545
546 movl 240(%rcx),%eax
547 movups (%rcx),%xmm0
548 movq %rcx,%r11
549 movl %eax,%r10d
550 testl %r8d,%r8d
551 jz .Lecb_decrypt
552
David Benjamin4969cc92016-04-22 15:02:23 -0400553 cmpq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800554 jb .Lecb_enc_tail
555
556 movdqu (%rdi),%xmm2
557 movdqu 16(%rdi),%xmm3
558 movdqu 32(%rdi),%xmm4
559 movdqu 48(%rdi),%xmm5
560 movdqu 64(%rdi),%xmm6
561 movdqu 80(%rdi),%xmm7
562 movdqu 96(%rdi),%xmm8
563 movdqu 112(%rdi),%xmm9
564 leaq 128(%rdi),%rdi
David Benjamin4969cc92016-04-22 15:02:23 -0400565 subq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800566 jmp .Lecb_enc_loop8_enter
567.align 16
568.Lecb_enc_loop8:
569 movups %xmm2,(%rsi)
570 movq %r11,%rcx
571 movdqu (%rdi),%xmm2
572 movl %r10d,%eax
573 movups %xmm3,16(%rsi)
574 movdqu 16(%rdi),%xmm3
575 movups %xmm4,32(%rsi)
576 movdqu 32(%rdi),%xmm4
577 movups %xmm5,48(%rsi)
578 movdqu 48(%rdi),%xmm5
579 movups %xmm6,64(%rsi)
580 movdqu 64(%rdi),%xmm6
581 movups %xmm7,80(%rsi)
582 movdqu 80(%rdi),%xmm7
583 movups %xmm8,96(%rsi)
584 movdqu 96(%rdi),%xmm8
585 movups %xmm9,112(%rsi)
586 leaq 128(%rsi),%rsi
587 movdqu 112(%rdi),%xmm9
588 leaq 128(%rdi),%rdi
589.Lecb_enc_loop8_enter:
590
591 call _aesni_encrypt8
592
David Benjamin4969cc92016-04-22 15:02:23 -0400593 subq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800594 jnc .Lecb_enc_loop8
595
596 movups %xmm2,(%rsi)
597 movq %r11,%rcx
598 movups %xmm3,16(%rsi)
599 movl %r10d,%eax
600 movups %xmm4,32(%rsi)
601 movups %xmm5,48(%rsi)
602 movups %xmm6,64(%rsi)
603 movups %xmm7,80(%rsi)
604 movups %xmm8,96(%rsi)
605 movups %xmm9,112(%rsi)
606 leaq 128(%rsi),%rsi
David Benjamin4969cc92016-04-22 15:02:23 -0400607 addq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800608 jz .Lecb_ret
609
610.Lecb_enc_tail:
611 movups (%rdi),%xmm2
David Benjamin4969cc92016-04-22 15:02:23 -0400612 cmpq $0x20,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800613 jb .Lecb_enc_one
614 movups 16(%rdi),%xmm3
615 je .Lecb_enc_two
616 movups 32(%rdi),%xmm4
David Benjamin4969cc92016-04-22 15:02:23 -0400617 cmpq $0x40,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800618 jb .Lecb_enc_three
619 movups 48(%rdi),%xmm5
620 je .Lecb_enc_four
621 movups 64(%rdi),%xmm6
David Benjamin4969cc92016-04-22 15:02:23 -0400622 cmpq $0x60,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800623 jb .Lecb_enc_five
624 movups 80(%rdi),%xmm7
625 je .Lecb_enc_six
626 movdqu 96(%rdi),%xmm8
Adam Langleye9ada862015-05-11 17:20:37 -0700627 xorps %xmm9,%xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800628 call _aesni_encrypt8
629 movups %xmm2,(%rsi)
630 movups %xmm3,16(%rsi)
631 movups %xmm4,32(%rsi)
632 movups %xmm5,48(%rsi)
633 movups %xmm6,64(%rsi)
634 movups %xmm7,80(%rsi)
635 movups %xmm8,96(%rsi)
636 jmp .Lecb_ret
637.align 16
638.Lecb_enc_one:
639 movups (%rcx),%xmm0
640 movups 16(%rcx),%xmm1
641 leaq 32(%rcx),%rcx
642 xorps %xmm0,%xmm2
643.Loop_enc1_3:
644.byte 102,15,56,220,209
645 decl %eax
646 movups (%rcx),%xmm1
647 leaq 16(%rcx),%rcx
648 jnz .Loop_enc1_3
649.byte 102,15,56,221,209
650 movups %xmm2,(%rsi)
651 jmp .Lecb_ret
652.align 16
653.Lecb_enc_two:
654 call _aesni_encrypt2
655 movups %xmm2,(%rsi)
656 movups %xmm3,16(%rsi)
657 jmp .Lecb_ret
658.align 16
659.Lecb_enc_three:
660 call _aesni_encrypt3
661 movups %xmm2,(%rsi)
662 movups %xmm3,16(%rsi)
663 movups %xmm4,32(%rsi)
664 jmp .Lecb_ret
665.align 16
666.Lecb_enc_four:
667 call _aesni_encrypt4
668 movups %xmm2,(%rsi)
669 movups %xmm3,16(%rsi)
670 movups %xmm4,32(%rsi)
671 movups %xmm5,48(%rsi)
672 jmp .Lecb_ret
673.align 16
674.Lecb_enc_five:
675 xorps %xmm7,%xmm7
676 call _aesni_encrypt6
677 movups %xmm2,(%rsi)
678 movups %xmm3,16(%rsi)
679 movups %xmm4,32(%rsi)
680 movups %xmm5,48(%rsi)
681 movups %xmm6,64(%rsi)
682 jmp .Lecb_ret
683.align 16
684.Lecb_enc_six:
685 call _aesni_encrypt6
686 movups %xmm2,(%rsi)
687 movups %xmm3,16(%rsi)
688 movups %xmm4,32(%rsi)
689 movups %xmm5,48(%rsi)
690 movups %xmm6,64(%rsi)
691 movups %xmm7,80(%rsi)
692 jmp .Lecb_ret
693
694.align 16
695.Lecb_decrypt:
David Benjamin4969cc92016-04-22 15:02:23 -0400696 cmpq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800697 jb .Lecb_dec_tail
698
699 movdqu (%rdi),%xmm2
700 movdqu 16(%rdi),%xmm3
701 movdqu 32(%rdi),%xmm4
702 movdqu 48(%rdi),%xmm5
703 movdqu 64(%rdi),%xmm6
704 movdqu 80(%rdi),%xmm7
705 movdqu 96(%rdi),%xmm8
706 movdqu 112(%rdi),%xmm9
707 leaq 128(%rdi),%rdi
David Benjamin4969cc92016-04-22 15:02:23 -0400708 subq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800709 jmp .Lecb_dec_loop8_enter
710.align 16
711.Lecb_dec_loop8:
712 movups %xmm2,(%rsi)
713 movq %r11,%rcx
714 movdqu (%rdi),%xmm2
715 movl %r10d,%eax
716 movups %xmm3,16(%rsi)
717 movdqu 16(%rdi),%xmm3
718 movups %xmm4,32(%rsi)
719 movdqu 32(%rdi),%xmm4
720 movups %xmm5,48(%rsi)
721 movdqu 48(%rdi),%xmm5
722 movups %xmm6,64(%rsi)
723 movdqu 64(%rdi),%xmm6
724 movups %xmm7,80(%rsi)
725 movdqu 80(%rdi),%xmm7
726 movups %xmm8,96(%rsi)
727 movdqu 96(%rdi),%xmm8
728 movups %xmm9,112(%rsi)
729 leaq 128(%rsi),%rsi
730 movdqu 112(%rdi),%xmm9
731 leaq 128(%rdi),%rdi
732.Lecb_dec_loop8_enter:
733
734 call _aesni_decrypt8
735
736 movups (%r11),%xmm0
David Benjamin4969cc92016-04-22 15:02:23 -0400737 subq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800738 jnc .Lecb_dec_loop8
739
740 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700741 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800742 movq %r11,%rcx
743 movups %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700744 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800745 movl %r10d,%eax
746 movups %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700747 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800748 movups %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700749 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800750 movups %xmm6,64(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700751 pxor %xmm6,%xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800752 movups %xmm7,80(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700753 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800754 movups %xmm8,96(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700755 pxor %xmm8,%xmm8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800756 movups %xmm9,112(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700757 pxor %xmm9,%xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800758 leaq 128(%rsi),%rsi
David Benjamin4969cc92016-04-22 15:02:23 -0400759 addq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800760 jz .Lecb_ret
761
762.Lecb_dec_tail:
763 movups (%rdi),%xmm2
David Benjamin4969cc92016-04-22 15:02:23 -0400764 cmpq $0x20,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800765 jb .Lecb_dec_one
766 movups 16(%rdi),%xmm3
767 je .Lecb_dec_two
768 movups 32(%rdi),%xmm4
David Benjamin4969cc92016-04-22 15:02:23 -0400769 cmpq $0x40,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800770 jb .Lecb_dec_three
771 movups 48(%rdi),%xmm5
772 je .Lecb_dec_four
773 movups 64(%rdi),%xmm6
David Benjamin4969cc92016-04-22 15:02:23 -0400774 cmpq $0x60,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800775 jb .Lecb_dec_five
776 movups 80(%rdi),%xmm7
777 je .Lecb_dec_six
778 movups 96(%rdi),%xmm8
779 movups (%rcx),%xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700780 xorps %xmm9,%xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800781 call _aesni_decrypt8
782 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700783 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800784 movups %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700785 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800786 movups %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700787 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800788 movups %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700789 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800790 movups %xmm6,64(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700791 pxor %xmm6,%xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800792 movups %xmm7,80(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700793 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800794 movups %xmm8,96(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700795 pxor %xmm8,%xmm8
796 pxor %xmm9,%xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800797 jmp .Lecb_ret
798.align 16
799.Lecb_dec_one:
800 movups (%rcx),%xmm0
801 movups 16(%rcx),%xmm1
802 leaq 32(%rcx),%rcx
803 xorps %xmm0,%xmm2
804.Loop_dec1_4:
805.byte 102,15,56,222,209
806 decl %eax
807 movups (%rcx),%xmm1
808 leaq 16(%rcx),%rcx
809 jnz .Loop_dec1_4
810.byte 102,15,56,223,209
811 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700812 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800813 jmp .Lecb_ret
814.align 16
815.Lecb_dec_two:
816 call _aesni_decrypt2
817 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700818 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800819 movups %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700820 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800821 jmp .Lecb_ret
822.align 16
823.Lecb_dec_three:
824 call _aesni_decrypt3
825 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700826 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800827 movups %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700828 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800829 movups %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700830 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800831 jmp .Lecb_ret
832.align 16
833.Lecb_dec_four:
834 call _aesni_decrypt4
835 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700836 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800837 movups %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700838 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800839 movups %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700840 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800841 movups %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700842 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800843 jmp .Lecb_ret
844.align 16
845.Lecb_dec_five:
846 xorps %xmm7,%xmm7
847 call _aesni_decrypt6
848 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700849 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800850 movups %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700851 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800852 movups %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700853 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800854 movups %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700855 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800856 movups %xmm6,64(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700857 pxor %xmm6,%xmm6
858 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800859 jmp .Lecb_ret
860.align 16
861.Lecb_dec_six:
862 call _aesni_decrypt6
863 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700864 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800865 movups %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700866 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800867 movups %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700868 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800869 movups %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700870 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800871 movups %xmm6,64(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700872 pxor %xmm6,%xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800873 movups %xmm7,80(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -0700874 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800875
876.Lecb_ret:
Adam Langleye9ada862015-05-11 17:20:37 -0700877 xorps %xmm0,%xmm0
878 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800879 .byte 0xf3,0xc3
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800880.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100881.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100882.globl aes_hw_ctr32_encrypt_blocks
883.hidden aes_hw_ctr32_encrypt_blocks
884.type aes_hw_ctr32_encrypt_blocks,@function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800885.align 16
Adam Vartanianbfcf3a72018-08-10 14:55:24 +0100886aes_hw_ctr32_encrypt_blocks:
Robert Sloanab8b8882018-03-26 11:39:51 -0700887.cfi_startproc
Pete Bentley470a9302019-10-02 14:44:32 +0100888#ifdef BORINGSSL_DISPATCH_TEST
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800889 movb $1,BORINGSSL_function_hit(%rip)
890#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700891 cmpq $1,%rdx
892 jne .Lctr32_bulk
893
894
895
896 movups (%r8),%xmm2
897 movups (%rdi),%xmm3
898 movl 240(%rcx),%edx
899 movups (%rcx),%xmm0
900 movups 16(%rcx),%xmm1
901 leaq 32(%rcx),%rcx
902 xorps %xmm0,%xmm2
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800903.Loop_enc1_5:
Adam Langleye9ada862015-05-11 17:20:37 -0700904.byte 102,15,56,220,209
905 decl %edx
906 movups (%rcx),%xmm1
907 leaq 16(%rcx),%rcx
Robert Sloan4c22c5f2019-03-01 15:53:37 -0800908 jnz .Loop_enc1_5
Adam Langleye9ada862015-05-11 17:20:37 -0700909.byte 102,15,56,221,209
910 pxor %xmm0,%xmm0
911 pxor %xmm1,%xmm1
912 xorps %xmm3,%xmm2
913 pxor %xmm3,%xmm3
914 movups %xmm2,(%rsi)
915 xorps %xmm2,%xmm2
916 jmp .Lctr32_epilogue
917
918.align 16
919.Lctr32_bulk:
Robert Sloana94fe052017-02-21 08:49:28 -0800920 leaq (%rsp),%r11
Robert Sloanab8b8882018-03-26 11:39:51 -0700921.cfi_def_cfa_register %r11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800922 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -0700923.cfi_offset %rbp,-16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800924 subq $128,%rsp
925 andq $-16,%rsp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800926
Adam Langleye9ada862015-05-11 17:20:37 -0700927
928
Adam Langleyd9e397b2015-01-22 14:27:53 -0800929
930 movdqu (%r8),%xmm2
931 movdqu (%rcx),%xmm0
932 movl 12(%r8),%r8d
933 pxor %xmm0,%xmm2
Robert Sloana94fe052017-02-21 08:49:28 -0800934 movl 12(%rcx),%ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800935 movdqa %xmm2,0(%rsp)
936 bswapl %r8d
937 movdqa %xmm2,%xmm3
938 movdqa %xmm2,%xmm4
939 movdqa %xmm2,%xmm5
940 movdqa %xmm2,64(%rsp)
941 movdqa %xmm2,80(%rsp)
942 movdqa %xmm2,96(%rsp)
943 movq %rdx,%r10
944 movdqa %xmm2,112(%rsp)
945
946 leaq 1(%r8),%rax
947 leaq 2(%r8),%rdx
948 bswapl %eax
949 bswapl %edx
Robert Sloana94fe052017-02-21 08:49:28 -0800950 xorl %ebp,%eax
951 xorl %ebp,%edx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800952.byte 102,15,58,34,216,3
953 leaq 3(%r8),%rax
954 movdqa %xmm3,16(%rsp)
955.byte 102,15,58,34,226,3
956 bswapl %eax
957 movq %r10,%rdx
958 leaq 4(%r8),%r10
959 movdqa %xmm4,32(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -0800960 xorl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800961 bswapl %r10d
962.byte 102,15,58,34,232,3
Robert Sloana94fe052017-02-21 08:49:28 -0800963 xorl %ebp,%r10d
Adam Langleyd9e397b2015-01-22 14:27:53 -0800964 movdqa %xmm5,48(%rsp)
965 leaq 5(%r8),%r9
966 movl %r10d,64+12(%rsp)
967 bswapl %r9d
968 leaq 6(%r8),%r10
969 movl 240(%rcx),%eax
Robert Sloana94fe052017-02-21 08:49:28 -0800970 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -0800971 bswapl %r10d
972 movl %r9d,80+12(%rsp)
Robert Sloana94fe052017-02-21 08:49:28 -0800973 xorl %ebp,%r10d
Adam Langleyd9e397b2015-01-22 14:27:53 -0800974 leaq 7(%r8),%r9
975 movl %r10d,96+12(%rsp)
976 bswapl %r9d
Robert Sloan2424d842017-05-01 07:46:28 -0700977 leaq OPENSSL_ia32cap_P(%rip),%r10
Robert Sloan572a4e22017-04-17 10:52:19 -0700978 movl 4(%r10),%r10d
Robert Sloana94fe052017-02-21 08:49:28 -0800979 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -0800980 andl $71303168,%r10d
981 movl %r9d,112+12(%rsp)
982
983 movups 16(%rcx),%xmm1
984
985 movdqa 64(%rsp),%xmm6
986 movdqa 80(%rsp),%xmm7
987
988 cmpq $8,%rdx
989 jb .Lctr32_tail
990
991 subq $6,%rdx
992 cmpl $4194304,%r10d
993 je .Lctr32_6x
994
995 leaq 128(%rcx),%rcx
996 subq $2,%rdx
997 jmp .Lctr32_loop8
998
999.align 16
1000.Lctr32_6x:
1001 shll $4,%eax
1002 movl $48,%r10d
Robert Sloana94fe052017-02-21 08:49:28 -08001003 bswapl %ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001004 leaq 32(%rcx,%rax,1),%rcx
1005 subq %rax,%r10
1006 jmp .Lctr32_loop6
1007
1008.align 16
1009.Lctr32_loop6:
1010 addl $6,%r8d
1011 movups -48(%rcx,%r10,1),%xmm0
1012.byte 102,15,56,220,209
1013 movl %r8d,%eax
Robert Sloana94fe052017-02-21 08:49:28 -08001014 xorl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001015.byte 102,15,56,220,217
1016.byte 0x0f,0x38,0xf1,0x44,0x24,12
1017 leal 1(%r8),%eax
1018.byte 102,15,56,220,225
Robert Sloana94fe052017-02-21 08:49:28 -08001019 xorl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001020.byte 0x0f,0x38,0xf1,0x44,0x24,28
1021.byte 102,15,56,220,233
1022 leal 2(%r8),%eax
Robert Sloana94fe052017-02-21 08:49:28 -08001023 xorl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001024.byte 102,15,56,220,241
1025.byte 0x0f,0x38,0xf1,0x44,0x24,44
1026 leal 3(%r8),%eax
1027.byte 102,15,56,220,249
1028 movups -32(%rcx,%r10,1),%xmm1
Robert Sloana94fe052017-02-21 08:49:28 -08001029 xorl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001030
1031.byte 102,15,56,220,208
1032.byte 0x0f,0x38,0xf1,0x44,0x24,60
1033 leal 4(%r8),%eax
1034.byte 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001035 xorl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001036.byte 0x0f,0x38,0xf1,0x44,0x24,76
1037.byte 102,15,56,220,224
1038 leal 5(%r8),%eax
Robert Sloana94fe052017-02-21 08:49:28 -08001039 xorl %ebp,%eax
Adam Langleyd9e397b2015-01-22 14:27:53 -08001040.byte 102,15,56,220,232
1041.byte 0x0f,0x38,0xf1,0x44,0x24,92
1042 movq %r10,%rax
1043.byte 102,15,56,220,240
1044.byte 102,15,56,220,248
1045 movups -16(%rcx,%r10,1),%xmm0
1046
1047 call .Lenc_loop6
1048
1049 movdqu (%rdi),%xmm8
1050 movdqu 16(%rdi),%xmm9
1051 movdqu 32(%rdi),%xmm10
1052 movdqu 48(%rdi),%xmm11
1053 movdqu 64(%rdi),%xmm12
1054 movdqu 80(%rdi),%xmm13
1055 leaq 96(%rdi),%rdi
1056 movups -64(%rcx,%r10,1),%xmm1
1057 pxor %xmm2,%xmm8
1058 movaps 0(%rsp),%xmm2
1059 pxor %xmm3,%xmm9
1060 movaps 16(%rsp),%xmm3
1061 pxor %xmm4,%xmm10
1062 movaps 32(%rsp),%xmm4
1063 pxor %xmm5,%xmm11
1064 movaps 48(%rsp),%xmm5
1065 pxor %xmm6,%xmm12
1066 movaps 64(%rsp),%xmm6
1067 pxor %xmm7,%xmm13
1068 movaps 80(%rsp),%xmm7
1069 movdqu %xmm8,(%rsi)
1070 movdqu %xmm9,16(%rsi)
1071 movdqu %xmm10,32(%rsi)
1072 movdqu %xmm11,48(%rsi)
1073 movdqu %xmm12,64(%rsi)
1074 movdqu %xmm13,80(%rsi)
1075 leaq 96(%rsi),%rsi
1076
1077 subq $6,%rdx
1078 jnc .Lctr32_loop6
1079
1080 addq $6,%rdx
1081 jz .Lctr32_done
1082
1083 leal -48(%r10),%eax
1084 leaq -80(%rcx,%r10,1),%rcx
1085 negl %eax
1086 shrl $4,%eax
1087 jmp .Lctr32_tail
1088
1089.align 32
1090.Lctr32_loop8:
1091 addl $8,%r8d
1092 movdqa 96(%rsp),%xmm8
1093.byte 102,15,56,220,209
1094 movl %r8d,%r9d
1095 movdqa 112(%rsp),%xmm9
1096.byte 102,15,56,220,217
1097 bswapl %r9d
1098 movups 32-128(%rcx),%xmm0
1099.byte 102,15,56,220,225
Robert Sloana94fe052017-02-21 08:49:28 -08001100 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001101 nop
1102.byte 102,15,56,220,233
1103 movl %r9d,0+12(%rsp)
1104 leaq 1(%r8),%r9
1105.byte 102,15,56,220,241
1106.byte 102,15,56,220,249
1107.byte 102,68,15,56,220,193
1108.byte 102,68,15,56,220,201
1109 movups 48-128(%rcx),%xmm1
1110 bswapl %r9d
1111.byte 102,15,56,220,208
1112.byte 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001113 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001114.byte 0x66,0x90
1115.byte 102,15,56,220,224
1116.byte 102,15,56,220,232
1117 movl %r9d,16+12(%rsp)
1118 leaq 2(%r8),%r9
1119.byte 102,15,56,220,240
1120.byte 102,15,56,220,248
1121.byte 102,68,15,56,220,192
1122.byte 102,68,15,56,220,200
1123 movups 64-128(%rcx),%xmm0
1124 bswapl %r9d
1125.byte 102,15,56,220,209
1126.byte 102,15,56,220,217
Robert Sloana94fe052017-02-21 08:49:28 -08001127 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001128.byte 0x66,0x90
1129.byte 102,15,56,220,225
1130.byte 102,15,56,220,233
1131 movl %r9d,32+12(%rsp)
1132 leaq 3(%r8),%r9
1133.byte 102,15,56,220,241
1134.byte 102,15,56,220,249
1135.byte 102,68,15,56,220,193
1136.byte 102,68,15,56,220,201
1137 movups 80-128(%rcx),%xmm1
1138 bswapl %r9d
1139.byte 102,15,56,220,208
1140.byte 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001141 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001142.byte 0x66,0x90
1143.byte 102,15,56,220,224
1144.byte 102,15,56,220,232
1145 movl %r9d,48+12(%rsp)
1146 leaq 4(%r8),%r9
1147.byte 102,15,56,220,240
1148.byte 102,15,56,220,248
1149.byte 102,68,15,56,220,192
1150.byte 102,68,15,56,220,200
1151 movups 96-128(%rcx),%xmm0
1152 bswapl %r9d
1153.byte 102,15,56,220,209
1154.byte 102,15,56,220,217
Robert Sloana94fe052017-02-21 08:49:28 -08001155 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001156.byte 0x66,0x90
1157.byte 102,15,56,220,225
1158.byte 102,15,56,220,233
1159 movl %r9d,64+12(%rsp)
1160 leaq 5(%r8),%r9
1161.byte 102,15,56,220,241
1162.byte 102,15,56,220,249
1163.byte 102,68,15,56,220,193
1164.byte 102,68,15,56,220,201
1165 movups 112-128(%rcx),%xmm1
1166 bswapl %r9d
1167.byte 102,15,56,220,208
1168.byte 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001169 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001170.byte 0x66,0x90
1171.byte 102,15,56,220,224
1172.byte 102,15,56,220,232
1173 movl %r9d,80+12(%rsp)
1174 leaq 6(%r8),%r9
1175.byte 102,15,56,220,240
1176.byte 102,15,56,220,248
1177.byte 102,68,15,56,220,192
1178.byte 102,68,15,56,220,200
1179 movups 128-128(%rcx),%xmm0
1180 bswapl %r9d
1181.byte 102,15,56,220,209
1182.byte 102,15,56,220,217
Robert Sloana94fe052017-02-21 08:49:28 -08001183 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001184.byte 0x66,0x90
1185.byte 102,15,56,220,225
1186.byte 102,15,56,220,233
1187 movl %r9d,96+12(%rsp)
1188 leaq 7(%r8),%r9
1189.byte 102,15,56,220,241
1190.byte 102,15,56,220,249
1191.byte 102,68,15,56,220,193
1192.byte 102,68,15,56,220,201
1193 movups 144-128(%rcx),%xmm1
1194 bswapl %r9d
1195.byte 102,15,56,220,208
1196.byte 102,15,56,220,216
1197.byte 102,15,56,220,224
Robert Sloana94fe052017-02-21 08:49:28 -08001198 xorl %ebp,%r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001199 movdqu 0(%rdi),%xmm10
1200.byte 102,15,56,220,232
1201 movl %r9d,112+12(%rsp)
1202 cmpl $11,%eax
1203.byte 102,15,56,220,240
1204.byte 102,15,56,220,248
1205.byte 102,68,15,56,220,192
1206.byte 102,68,15,56,220,200
1207 movups 160-128(%rcx),%xmm0
1208
1209 jb .Lctr32_enc_done
1210
1211.byte 102,15,56,220,209
1212.byte 102,15,56,220,217
1213.byte 102,15,56,220,225
1214.byte 102,15,56,220,233
1215.byte 102,15,56,220,241
1216.byte 102,15,56,220,249
1217.byte 102,68,15,56,220,193
1218.byte 102,68,15,56,220,201
1219 movups 176-128(%rcx),%xmm1
1220
1221.byte 102,15,56,220,208
1222.byte 102,15,56,220,216
1223.byte 102,15,56,220,224
1224.byte 102,15,56,220,232
1225.byte 102,15,56,220,240
1226.byte 102,15,56,220,248
1227.byte 102,68,15,56,220,192
1228.byte 102,68,15,56,220,200
1229 movups 192-128(%rcx),%xmm0
1230 je .Lctr32_enc_done
1231
1232.byte 102,15,56,220,209
1233.byte 102,15,56,220,217
1234.byte 102,15,56,220,225
1235.byte 102,15,56,220,233
1236.byte 102,15,56,220,241
1237.byte 102,15,56,220,249
1238.byte 102,68,15,56,220,193
1239.byte 102,68,15,56,220,201
1240 movups 208-128(%rcx),%xmm1
1241
1242.byte 102,15,56,220,208
1243.byte 102,15,56,220,216
1244.byte 102,15,56,220,224
1245.byte 102,15,56,220,232
1246.byte 102,15,56,220,240
1247.byte 102,15,56,220,248
1248.byte 102,68,15,56,220,192
1249.byte 102,68,15,56,220,200
1250 movups 224-128(%rcx),%xmm0
1251 jmp .Lctr32_enc_done
1252
1253.align 16
1254.Lctr32_enc_done:
1255 movdqu 16(%rdi),%xmm11
1256 pxor %xmm0,%xmm10
1257 movdqu 32(%rdi),%xmm12
1258 pxor %xmm0,%xmm11
1259 movdqu 48(%rdi),%xmm13
1260 pxor %xmm0,%xmm12
1261 movdqu 64(%rdi),%xmm14
1262 pxor %xmm0,%xmm13
1263 movdqu 80(%rdi),%xmm15
1264 pxor %xmm0,%xmm14
1265 pxor %xmm0,%xmm15
1266.byte 102,15,56,220,209
1267.byte 102,15,56,220,217
1268.byte 102,15,56,220,225
1269.byte 102,15,56,220,233
1270.byte 102,15,56,220,241
1271.byte 102,15,56,220,249
1272.byte 102,68,15,56,220,193
1273.byte 102,68,15,56,220,201
1274 movdqu 96(%rdi),%xmm1
1275 leaq 128(%rdi),%rdi
1276
1277.byte 102,65,15,56,221,210
1278 pxor %xmm0,%xmm1
1279 movdqu 112-128(%rdi),%xmm10
1280.byte 102,65,15,56,221,219
1281 pxor %xmm0,%xmm10
1282 movdqa 0(%rsp),%xmm11
1283.byte 102,65,15,56,221,228
1284.byte 102,65,15,56,221,237
1285 movdqa 16(%rsp),%xmm12
1286 movdqa 32(%rsp),%xmm13
1287.byte 102,65,15,56,221,246
1288.byte 102,65,15,56,221,255
1289 movdqa 48(%rsp),%xmm14
1290 movdqa 64(%rsp),%xmm15
1291.byte 102,68,15,56,221,193
1292 movdqa 80(%rsp),%xmm0
1293 movups 16-128(%rcx),%xmm1
1294.byte 102,69,15,56,221,202
1295
1296 movups %xmm2,(%rsi)
1297 movdqa %xmm11,%xmm2
1298 movups %xmm3,16(%rsi)
1299 movdqa %xmm12,%xmm3
1300 movups %xmm4,32(%rsi)
1301 movdqa %xmm13,%xmm4
1302 movups %xmm5,48(%rsi)
1303 movdqa %xmm14,%xmm5
1304 movups %xmm6,64(%rsi)
1305 movdqa %xmm15,%xmm6
1306 movups %xmm7,80(%rsi)
1307 movdqa %xmm0,%xmm7
1308 movups %xmm8,96(%rsi)
1309 movups %xmm9,112(%rsi)
1310 leaq 128(%rsi),%rsi
1311
1312 subq $8,%rdx
1313 jnc .Lctr32_loop8
1314
1315 addq $8,%rdx
1316 jz .Lctr32_done
1317 leaq -128(%rcx),%rcx
1318
1319.Lctr32_tail:
Adam Langleye9ada862015-05-11 17:20:37 -07001320
1321
Adam Langleyd9e397b2015-01-22 14:27:53 -08001322 leaq 16(%rcx),%rcx
1323 cmpq $4,%rdx
1324 jb .Lctr32_loop3
1325 je .Lctr32_loop4
1326
Adam Langleye9ada862015-05-11 17:20:37 -07001327
Adam Langleyd9e397b2015-01-22 14:27:53 -08001328 shll $4,%eax
1329 movdqa 96(%rsp),%xmm8
1330 pxor %xmm9,%xmm9
1331
1332 movups 16(%rcx),%xmm0
1333.byte 102,15,56,220,209
1334.byte 102,15,56,220,217
1335 leaq 32-16(%rcx,%rax,1),%rcx
1336 negq %rax
1337.byte 102,15,56,220,225
1338 addq $16,%rax
1339 movups (%rdi),%xmm10
1340.byte 102,15,56,220,233
1341.byte 102,15,56,220,241
1342 movups 16(%rdi),%xmm11
1343 movups 32(%rdi),%xmm12
1344.byte 102,15,56,220,249
1345.byte 102,68,15,56,220,193
1346
1347 call .Lenc_loop8_enter
1348
1349 movdqu 48(%rdi),%xmm13
1350 pxor %xmm10,%xmm2
1351 movdqu 64(%rdi),%xmm10
1352 pxor %xmm11,%xmm3
1353 movdqu %xmm2,(%rsi)
1354 pxor %xmm12,%xmm4
1355 movdqu %xmm3,16(%rsi)
1356 pxor %xmm13,%xmm5
1357 movdqu %xmm4,32(%rsi)
1358 pxor %xmm10,%xmm6
1359 movdqu %xmm5,48(%rsi)
1360 movdqu %xmm6,64(%rsi)
1361 cmpq $6,%rdx
1362 jb .Lctr32_done
1363
1364 movups 80(%rdi),%xmm11
1365 xorps %xmm11,%xmm7
1366 movups %xmm7,80(%rsi)
1367 je .Lctr32_done
1368
1369 movups 96(%rdi),%xmm12
1370 xorps %xmm12,%xmm8
1371 movups %xmm8,96(%rsi)
1372 jmp .Lctr32_done
1373
1374.align 32
1375.Lctr32_loop4:
1376.byte 102,15,56,220,209
1377 leaq 16(%rcx),%rcx
1378 decl %eax
1379.byte 102,15,56,220,217
1380.byte 102,15,56,220,225
1381.byte 102,15,56,220,233
1382 movups (%rcx),%xmm1
1383 jnz .Lctr32_loop4
1384.byte 102,15,56,221,209
1385.byte 102,15,56,221,217
1386 movups (%rdi),%xmm10
1387 movups 16(%rdi),%xmm11
1388.byte 102,15,56,221,225
1389.byte 102,15,56,221,233
1390 movups 32(%rdi),%xmm12
1391 movups 48(%rdi),%xmm13
1392
1393 xorps %xmm10,%xmm2
1394 movups %xmm2,(%rsi)
1395 xorps %xmm11,%xmm3
1396 movups %xmm3,16(%rsi)
1397 pxor %xmm12,%xmm4
1398 movdqu %xmm4,32(%rsi)
1399 pxor %xmm13,%xmm5
1400 movdqu %xmm5,48(%rsi)
1401 jmp .Lctr32_done
1402
1403.align 32
1404.Lctr32_loop3:
1405.byte 102,15,56,220,209
1406 leaq 16(%rcx),%rcx
1407 decl %eax
1408.byte 102,15,56,220,217
1409.byte 102,15,56,220,225
1410 movups (%rcx),%xmm1
1411 jnz .Lctr32_loop3
1412.byte 102,15,56,221,209
1413.byte 102,15,56,221,217
1414.byte 102,15,56,221,225
1415
1416 movups (%rdi),%xmm10
1417 xorps %xmm10,%xmm2
1418 movups %xmm2,(%rsi)
1419 cmpq $2,%rdx
1420 jb .Lctr32_done
1421
1422 movups 16(%rdi),%xmm11
1423 xorps %xmm11,%xmm3
1424 movups %xmm3,16(%rsi)
1425 je .Lctr32_done
1426
1427 movups 32(%rdi),%xmm12
1428 xorps %xmm12,%xmm4
1429 movups %xmm4,32(%rsi)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001430
Adam Langleyd9e397b2015-01-22 14:27:53 -08001431.Lctr32_done:
Adam Langleye9ada862015-05-11 17:20:37 -07001432 xorps %xmm0,%xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08001433 xorl %ebp,%ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001434 pxor %xmm1,%xmm1
1435 pxor %xmm2,%xmm2
1436 pxor %xmm3,%xmm3
1437 pxor %xmm4,%xmm4
1438 pxor %xmm5,%xmm5
1439 pxor %xmm6,%xmm6
1440 pxor %xmm7,%xmm7
1441 movaps %xmm0,0(%rsp)
1442 pxor %xmm8,%xmm8
1443 movaps %xmm0,16(%rsp)
1444 pxor %xmm9,%xmm9
1445 movaps %xmm0,32(%rsp)
1446 pxor %xmm10,%xmm10
1447 movaps %xmm0,48(%rsp)
1448 pxor %xmm11,%xmm11
1449 movaps %xmm0,64(%rsp)
1450 pxor %xmm12,%xmm12
1451 movaps %xmm0,80(%rsp)
1452 pxor %xmm13,%xmm13
1453 movaps %xmm0,96(%rsp)
1454 pxor %xmm14,%xmm14
1455 movaps %xmm0,112(%rsp)
1456 pxor %xmm15,%xmm15
Robert Sloana94fe052017-02-21 08:49:28 -08001457 movq -8(%r11),%rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001458.cfi_restore %rbp
Robert Sloana94fe052017-02-21 08:49:28 -08001459 leaq (%r11),%rsp
Robert Sloanab8b8882018-03-26 11:39:51 -07001460.cfi_def_cfa_register %rsp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001461.Lctr32_epilogue:
1462 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07001463.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001464.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001465.globl aes_hw_cbc_encrypt
1466.hidden aes_hw_cbc_encrypt
1467.type aes_hw_cbc_encrypt,@function
Adam Langleyd9e397b2015-01-22 14:27:53 -08001468.align 16
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01001469aes_hw_cbc_encrypt:
Robert Sloanab8b8882018-03-26 11:39:51 -07001470.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -08001471 testq %rdx,%rdx
1472 jz .Lcbc_ret
1473
1474 movl 240(%rcx),%r10d
1475 movq %rcx,%r11
1476 testl %r9d,%r9d
1477 jz .Lcbc_decrypt
1478
1479 movups (%r8),%xmm2
1480 movl %r10d,%eax
1481 cmpq $16,%rdx
1482 jb .Lcbc_enc_tail
1483 subq $16,%rdx
1484 jmp .Lcbc_enc_loop
1485.align 16
1486.Lcbc_enc_loop:
1487 movups (%rdi),%xmm3
1488 leaq 16(%rdi),%rdi
1489
1490 movups (%rcx),%xmm0
1491 movups 16(%rcx),%xmm1
1492 xorps %xmm0,%xmm3
1493 leaq 32(%rcx),%rcx
1494 xorps %xmm3,%xmm2
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001495.Loop_enc1_6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001496.byte 102,15,56,220,209
1497 decl %eax
1498 movups (%rcx),%xmm1
1499 leaq 16(%rcx),%rcx
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001500 jnz .Loop_enc1_6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001501.byte 102,15,56,221,209
1502 movl %r10d,%eax
1503 movq %r11,%rcx
1504 movups %xmm2,0(%rsi)
1505 leaq 16(%rsi),%rsi
1506 subq $16,%rdx
1507 jnc .Lcbc_enc_loop
1508 addq $16,%rdx
1509 jnz .Lcbc_enc_tail
Adam Langleye9ada862015-05-11 17:20:37 -07001510 pxor %xmm0,%xmm0
1511 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08001512 movups %xmm2,(%r8)
Adam Langleye9ada862015-05-11 17:20:37 -07001513 pxor %xmm2,%xmm2
1514 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001515 jmp .Lcbc_ret
1516
1517.Lcbc_enc_tail:
1518 movq %rdx,%rcx
1519 xchgq %rdi,%rsi
1520.long 0x9066A4F3
1521 movl $16,%ecx
1522 subq %rdx,%rcx
1523 xorl %eax,%eax
1524.long 0x9066AAF3
1525 leaq -16(%rdi),%rdi
1526 movl %r10d,%eax
1527 movq %rdi,%rsi
1528 movq %r11,%rcx
1529 xorq %rdx,%rdx
1530 jmp .Lcbc_enc_loop
1531
1532.align 16
1533.Lcbc_decrypt:
Adam Langleye9ada862015-05-11 17:20:37 -07001534 cmpq $16,%rdx
1535 jne .Lcbc_decrypt_bulk
1536
1537
1538
1539 movdqu (%rdi),%xmm2
1540 movdqu (%r8),%xmm3
1541 movdqa %xmm2,%xmm4
1542 movups (%rcx),%xmm0
1543 movups 16(%rcx),%xmm1
1544 leaq 32(%rcx),%rcx
1545 xorps %xmm0,%xmm2
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001546.Loop_dec1_7:
Adam Langleye9ada862015-05-11 17:20:37 -07001547.byte 102,15,56,222,209
1548 decl %r10d
1549 movups (%rcx),%xmm1
1550 leaq 16(%rcx),%rcx
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001551 jnz .Loop_dec1_7
Adam Langleye9ada862015-05-11 17:20:37 -07001552.byte 102,15,56,223,209
1553 pxor %xmm0,%xmm0
1554 pxor %xmm1,%xmm1
1555 movdqu %xmm4,(%r8)
1556 xorps %xmm3,%xmm2
1557 pxor %xmm3,%xmm3
1558 movups %xmm2,(%rsi)
1559 pxor %xmm2,%xmm2
1560 jmp .Lcbc_ret
1561.align 16
1562.Lcbc_decrypt_bulk:
Robert Sloana94fe052017-02-21 08:49:28 -08001563 leaq (%rsp),%r11
Robert Sloanab8b8882018-03-26 11:39:51 -07001564.cfi_def_cfa_register %r11
Adam Langleyd9e397b2015-01-22 14:27:53 -08001565 pushq %rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07001566.cfi_offset %rbp,-16
Adam Langleyd9e397b2015-01-22 14:27:53 -08001567 subq $16,%rsp
1568 andq $-16,%rsp
Robert Sloana94fe052017-02-21 08:49:28 -08001569 movq %rcx,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001570 movups (%r8),%xmm10
1571 movl %r10d,%eax
David Benjamin4969cc92016-04-22 15:02:23 -04001572 cmpq $0x50,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001573 jbe .Lcbc_dec_tail
1574
1575 movups (%rcx),%xmm0
1576 movdqu 0(%rdi),%xmm2
1577 movdqu 16(%rdi),%xmm3
1578 movdqa %xmm2,%xmm11
1579 movdqu 32(%rdi),%xmm4
1580 movdqa %xmm3,%xmm12
1581 movdqu 48(%rdi),%xmm5
1582 movdqa %xmm4,%xmm13
1583 movdqu 64(%rdi),%xmm6
1584 movdqa %xmm5,%xmm14
1585 movdqu 80(%rdi),%xmm7
1586 movdqa %xmm6,%xmm15
Robert Sloan2424d842017-05-01 07:46:28 -07001587 leaq OPENSSL_ia32cap_P(%rip),%r9
Robert Sloan572a4e22017-04-17 10:52:19 -07001588 movl 4(%r9),%r9d
David Benjamin4969cc92016-04-22 15:02:23 -04001589 cmpq $0x70,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001590 jbe .Lcbc_dec_six_or_seven
1591
1592 andl $71303168,%r9d
David Benjamin4969cc92016-04-22 15:02:23 -04001593 subq $0x50,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001594 cmpl $4194304,%r9d
1595 je .Lcbc_dec_loop6_enter
David Benjamin4969cc92016-04-22 15:02:23 -04001596 subq $0x20,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001597 leaq 112(%rcx),%rcx
1598 jmp .Lcbc_dec_loop8_enter
1599.align 16
1600.Lcbc_dec_loop8:
1601 movups %xmm9,(%rsi)
1602 leaq 16(%rsi),%rsi
1603.Lcbc_dec_loop8_enter:
1604 movdqu 96(%rdi),%xmm8
1605 pxor %xmm0,%xmm2
1606 movdqu 112(%rdi),%xmm9
1607 pxor %xmm0,%xmm3
1608 movups 16-112(%rcx),%xmm1
1609 pxor %xmm0,%xmm4
Robert Sloana94fe052017-02-21 08:49:28 -08001610 movq $-1,%rbp
David Benjamin4969cc92016-04-22 15:02:23 -04001611 cmpq $0x70,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001612 pxor %xmm0,%xmm5
1613 pxor %xmm0,%xmm6
1614 pxor %xmm0,%xmm7
1615 pxor %xmm0,%xmm8
1616
1617.byte 102,15,56,222,209
1618 pxor %xmm0,%xmm9
1619 movups 32-112(%rcx),%xmm0
1620.byte 102,15,56,222,217
1621.byte 102,15,56,222,225
1622.byte 102,15,56,222,233
1623.byte 102,15,56,222,241
1624.byte 102,15,56,222,249
1625.byte 102,68,15,56,222,193
Robert Sloana94fe052017-02-21 08:49:28 -08001626 adcq $0,%rbp
1627 andq $128,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001628.byte 102,68,15,56,222,201
Robert Sloana94fe052017-02-21 08:49:28 -08001629 addq %rdi,%rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001630 movups 48-112(%rcx),%xmm1
1631.byte 102,15,56,222,208
1632.byte 102,15,56,222,216
1633.byte 102,15,56,222,224
1634.byte 102,15,56,222,232
1635.byte 102,15,56,222,240
1636.byte 102,15,56,222,248
1637.byte 102,68,15,56,222,192
1638.byte 102,68,15,56,222,200
1639 movups 64-112(%rcx),%xmm0
1640 nop
1641.byte 102,15,56,222,209
1642.byte 102,15,56,222,217
1643.byte 102,15,56,222,225
1644.byte 102,15,56,222,233
1645.byte 102,15,56,222,241
1646.byte 102,15,56,222,249
1647.byte 102,68,15,56,222,193
1648.byte 102,68,15,56,222,201
1649 movups 80-112(%rcx),%xmm1
1650 nop
1651.byte 102,15,56,222,208
1652.byte 102,15,56,222,216
1653.byte 102,15,56,222,224
1654.byte 102,15,56,222,232
1655.byte 102,15,56,222,240
1656.byte 102,15,56,222,248
1657.byte 102,68,15,56,222,192
1658.byte 102,68,15,56,222,200
1659 movups 96-112(%rcx),%xmm0
1660 nop
1661.byte 102,15,56,222,209
1662.byte 102,15,56,222,217
1663.byte 102,15,56,222,225
1664.byte 102,15,56,222,233
1665.byte 102,15,56,222,241
1666.byte 102,15,56,222,249
1667.byte 102,68,15,56,222,193
1668.byte 102,68,15,56,222,201
1669 movups 112-112(%rcx),%xmm1
1670 nop
1671.byte 102,15,56,222,208
1672.byte 102,15,56,222,216
1673.byte 102,15,56,222,224
1674.byte 102,15,56,222,232
1675.byte 102,15,56,222,240
1676.byte 102,15,56,222,248
1677.byte 102,68,15,56,222,192
1678.byte 102,68,15,56,222,200
1679 movups 128-112(%rcx),%xmm0
1680 nop
1681.byte 102,15,56,222,209
1682.byte 102,15,56,222,217
1683.byte 102,15,56,222,225
1684.byte 102,15,56,222,233
1685.byte 102,15,56,222,241
1686.byte 102,15,56,222,249
1687.byte 102,68,15,56,222,193
1688.byte 102,68,15,56,222,201
1689 movups 144-112(%rcx),%xmm1
1690 cmpl $11,%eax
1691.byte 102,15,56,222,208
1692.byte 102,15,56,222,216
1693.byte 102,15,56,222,224
1694.byte 102,15,56,222,232
1695.byte 102,15,56,222,240
1696.byte 102,15,56,222,248
1697.byte 102,68,15,56,222,192
1698.byte 102,68,15,56,222,200
1699 movups 160-112(%rcx),%xmm0
1700 jb .Lcbc_dec_done
1701.byte 102,15,56,222,209
1702.byte 102,15,56,222,217
1703.byte 102,15,56,222,225
1704.byte 102,15,56,222,233
1705.byte 102,15,56,222,241
1706.byte 102,15,56,222,249
1707.byte 102,68,15,56,222,193
1708.byte 102,68,15,56,222,201
1709 movups 176-112(%rcx),%xmm1
1710 nop
1711.byte 102,15,56,222,208
1712.byte 102,15,56,222,216
1713.byte 102,15,56,222,224
1714.byte 102,15,56,222,232
1715.byte 102,15,56,222,240
1716.byte 102,15,56,222,248
1717.byte 102,68,15,56,222,192
1718.byte 102,68,15,56,222,200
1719 movups 192-112(%rcx),%xmm0
1720 je .Lcbc_dec_done
1721.byte 102,15,56,222,209
1722.byte 102,15,56,222,217
1723.byte 102,15,56,222,225
1724.byte 102,15,56,222,233
1725.byte 102,15,56,222,241
1726.byte 102,15,56,222,249
1727.byte 102,68,15,56,222,193
1728.byte 102,68,15,56,222,201
1729 movups 208-112(%rcx),%xmm1
1730 nop
1731.byte 102,15,56,222,208
1732.byte 102,15,56,222,216
1733.byte 102,15,56,222,224
1734.byte 102,15,56,222,232
1735.byte 102,15,56,222,240
1736.byte 102,15,56,222,248
1737.byte 102,68,15,56,222,192
1738.byte 102,68,15,56,222,200
1739 movups 224-112(%rcx),%xmm0
1740 jmp .Lcbc_dec_done
1741.align 16
1742.Lcbc_dec_done:
1743.byte 102,15,56,222,209
1744.byte 102,15,56,222,217
1745 pxor %xmm0,%xmm10
1746 pxor %xmm0,%xmm11
1747.byte 102,15,56,222,225
1748.byte 102,15,56,222,233
1749 pxor %xmm0,%xmm12
1750 pxor %xmm0,%xmm13
1751.byte 102,15,56,222,241
1752.byte 102,15,56,222,249
1753 pxor %xmm0,%xmm14
1754 pxor %xmm0,%xmm15
1755.byte 102,68,15,56,222,193
1756.byte 102,68,15,56,222,201
1757 movdqu 80(%rdi),%xmm1
1758
1759.byte 102,65,15,56,223,210
1760 movdqu 96(%rdi),%xmm10
1761 pxor %xmm0,%xmm1
1762.byte 102,65,15,56,223,219
1763 pxor %xmm0,%xmm10
1764 movdqu 112(%rdi),%xmm0
1765.byte 102,65,15,56,223,228
1766 leaq 128(%rdi),%rdi
Robert Sloana94fe052017-02-21 08:49:28 -08001767 movdqu 0(%rbp),%xmm11
Adam Langleyd9e397b2015-01-22 14:27:53 -08001768.byte 102,65,15,56,223,237
1769.byte 102,65,15,56,223,246
Robert Sloana94fe052017-02-21 08:49:28 -08001770 movdqu 16(%rbp),%xmm12
1771 movdqu 32(%rbp),%xmm13
Adam Langleyd9e397b2015-01-22 14:27:53 -08001772.byte 102,65,15,56,223,255
1773.byte 102,68,15,56,223,193
Robert Sloana94fe052017-02-21 08:49:28 -08001774 movdqu 48(%rbp),%xmm14
1775 movdqu 64(%rbp),%xmm15
Adam Langleyd9e397b2015-01-22 14:27:53 -08001776.byte 102,69,15,56,223,202
1777 movdqa %xmm0,%xmm10
Robert Sloana94fe052017-02-21 08:49:28 -08001778 movdqu 80(%rbp),%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08001779 movups -112(%rcx),%xmm0
1780
1781 movups %xmm2,(%rsi)
1782 movdqa %xmm11,%xmm2
1783 movups %xmm3,16(%rsi)
1784 movdqa %xmm12,%xmm3
1785 movups %xmm4,32(%rsi)
1786 movdqa %xmm13,%xmm4
1787 movups %xmm5,48(%rsi)
1788 movdqa %xmm14,%xmm5
1789 movups %xmm6,64(%rsi)
1790 movdqa %xmm15,%xmm6
1791 movups %xmm7,80(%rsi)
1792 movdqa %xmm1,%xmm7
1793 movups %xmm8,96(%rsi)
1794 leaq 112(%rsi),%rsi
1795
David Benjamin4969cc92016-04-22 15:02:23 -04001796 subq $0x80,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001797 ja .Lcbc_dec_loop8
1798
1799 movaps %xmm9,%xmm2
1800 leaq -112(%rcx),%rcx
David Benjamin4969cc92016-04-22 15:02:23 -04001801 addq $0x70,%rdx
Adam Langleye9ada862015-05-11 17:20:37 -07001802 jle .Lcbc_dec_clear_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001803 movups %xmm9,(%rsi)
1804 leaq 16(%rsi),%rsi
David Benjamin4969cc92016-04-22 15:02:23 -04001805 cmpq $0x50,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001806 jbe .Lcbc_dec_tail
1807
1808 movaps %xmm11,%xmm2
1809.Lcbc_dec_six_or_seven:
David Benjamin4969cc92016-04-22 15:02:23 -04001810 cmpq $0x60,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001811 ja .Lcbc_dec_seven
1812
1813 movaps %xmm7,%xmm8
1814 call _aesni_decrypt6
1815 pxor %xmm10,%xmm2
1816 movaps %xmm8,%xmm10
1817 pxor %xmm11,%xmm3
1818 movdqu %xmm2,(%rsi)
1819 pxor %xmm12,%xmm4
1820 movdqu %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001821 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001822 pxor %xmm13,%xmm5
1823 movdqu %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001824 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001825 pxor %xmm14,%xmm6
1826 movdqu %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001827 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08001828 pxor %xmm15,%xmm7
1829 movdqu %xmm6,64(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001830 pxor %xmm6,%xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001831 leaq 80(%rsi),%rsi
1832 movdqa %xmm7,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001833 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08001834 jmp .Lcbc_dec_tail_collected
1835
1836.align 16
1837.Lcbc_dec_seven:
1838 movups 96(%rdi),%xmm8
1839 xorps %xmm9,%xmm9
1840 call _aesni_decrypt8
1841 movups 80(%rdi),%xmm9
1842 pxor %xmm10,%xmm2
1843 movups 96(%rdi),%xmm10
1844 pxor %xmm11,%xmm3
1845 movdqu %xmm2,(%rsi)
1846 pxor %xmm12,%xmm4
1847 movdqu %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001848 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001849 pxor %xmm13,%xmm5
1850 movdqu %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001851 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001852 pxor %xmm14,%xmm6
1853 movdqu %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001854 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08001855 pxor %xmm15,%xmm7
1856 movdqu %xmm6,64(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001857 pxor %xmm6,%xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001858 pxor %xmm9,%xmm8
1859 movdqu %xmm7,80(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001860 pxor %xmm7,%xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08001861 leaq 96(%rsi),%rsi
1862 movdqa %xmm8,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001863 pxor %xmm8,%xmm8
1864 pxor %xmm9,%xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -08001865 jmp .Lcbc_dec_tail_collected
1866
1867.align 16
1868.Lcbc_dec_loop6:
1869 movups %xmm7,(%rsi)
1870 leaq 16(%rsi),%rsi
1871 movdqu 0(%rdi),%xmm2
1872 movdqu 16(%rdi),%xmm3
1873 movdqa %xmm2,%xmm11
1874 movdqu 32(%rdi),%xmm4
1875 movdqa %xmm3,%xmm12
1876 movdqu 48(%rdi),%xmm5
1877 movdqa %xmm4,%xmm13
1878 movdqu 64(%rdi),%xmm6
1879 movdqa %xmm5,%xmm14
1880 movdqu 80(%rdi),%xmm7
1881 movdqa %xmm6,%xmm15
1882.Lcbc_dec_loop6_enter:
1883 leaq 96(%rdi),%rdi
1884 movdqa %xmm7,%xmm8
1885
1886 call _aesni_decrypt6
1887
1888 pxor %xmm10,%xmm2
1889 movdqa %xmm8,%xmm10
1890 pxor %xmm11,%xmm3
1891 movdqu %xmm2,(%rsi)
1892 pxor %xmm12,%xmm4
1893 movdqu %xmm3,16(%rsi)
1894 pxor %xmm13,%xmm5
1895 movdqu %xmm4,32(%rsi)
1896 pxor %xmm14,%xmm6
Robert Sloana94fe052017-02-21 08:49:28 -08001897 movq %rbp,%rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001898 movdqu %xmm5,48(%rsi)
1899 pxor %xmm15,%xmm7
1900 movl %r10d,%eax
1901 movdqu %xmm6,64(%rsi)
1902 leaq 80(%rsi),%rsi
David Benjamin4969cc92016-04-22 15:02:23 -04001903 subq $0x60,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001904 ja .Lcbc_dec_loop6
1905
1906 movdqa %xmm7,%xmm2
David Benjamin4969cc92016-04-22 15:02:23 -04001907 addq $0x50,%rdx
Adam Langleye9ada862015-05-11 17:20:37 -07001908 jle .Lcbc_dec_clear_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08001909 movups %xmm7,(%rsi)
1910 leaq 16(%rsi),%rsi
1911
1912.Lcbc_dec_tail:
1913 movups (%rdi),%xmm2
David Benjamin4969cc92016-04-22 15:02:23 -04001914 subq $0x10,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001915 jbe .Lcbc_dec_one
1916
1917 movups 16(%rdi),%xmm3
1918 movaps %xmm2,%xmm11
David Benjamin4969cc92016-04-22 15:02:23 -04001919 subq $0x10,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001920 jbe .Lcbc_dec_two
1921
1922 movups 32(%rdi),%xmm4
1923 movaps %xmm3,%xmm12
David Benjamin4969cc92016-04-22 15:02:23 -04001924 subq $0x10,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001925 jbe .Lcbc_dec_three
1926
1927 movups 48(%rdi),%xmm5
1928 movaps %xmm4,%xmm13
David Benjamin4969cc92016-04-22 15:02:23 -04001929 subq $0x10,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001930 jbe .Lcbc_dec_four
1931
1932 movups 64(%rdi),%xmm6
1933 movaps %xmm5,%xmm14
1934 movaps %xmm6,%xmm15
1935 xorps %xmm7,%xmm7
1936 call _aesni_decrypt6
1937 pxor %xmm10,%xmm2
1938 movaps %xmm15,%xmm10
1939 pxor %xmm11,%xmm3
1940 movdqu %xmm2,(%rsi)
1941 pxor %xmm12,%xmm4
1942 movdqu %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001943 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001944 pxor %xmm13,%xmm5
1945 movdqu %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001946 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001947 pxor %xmm14,%xmm6
1948 movdqu %xmm5,48(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001949 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08001950 leaq 64(%rsi),%rsi
1951 movdqa %xmm6,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001952 pxor %xmm6,%xmm6
1953 pxor %xmm7,%xmm7
David Benjamin4969cc92016-04-22 15:02:23 -04001954 subq $0x10,%rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001955 jmp .Lcbc_dec_tail_collected
1956
1957.align 16
1958.Lcbc_dec_one:
1959 movaps %xmm2,%xmm11
1960 movups (%rcx),%xmm0
1961 movups 16(%rcx),%xmm1
1962 leaq 32(%rcx),%rcx
1963 xorps %xmm0,%xmm2
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001964.Loop_dec1_8:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001965.byte 102,15,56,222,209
1966 decl %eax
1967 movups (%rcx),%xmm1
1968 leaq 16(%rcx),%rcx
Robert Sloan4c22c5f2019-03-01 15:53:37 -08001969 jnz .Loop_dec1_8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001970.byte 102,15,56,223,209
1971 xorps %xmm10,%xmm2
1972 movaps %xmm11,%xmm10
1973 jmp .Lcbc_dec_tail_collected
1974.align 16
1975.Lcbc_dec_two:
1976 movaps %xmm3,%xmm12
1977 call _aesni_decrypt2
1978 pxor %xmm10,%xmm2
1979 movaps %xmm12,%xmm10
1980 pxor %xmm11,%xmm3
1981 movdqu %xmm2,(%rsi)
1982 movdqa %xmm3,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001983 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001984 leaq 16(%rsi),%rsi
1985 jmp .Lcbc_dec_tail_collected
1986.align 16
1987.Lcbc_dec_three:
1988 movaps %xmm4,%xmm13
1989 call _aesni_decrypt3
1990 pxor %xmm10,%xmm2
1991 movaps %xmm13,%xmm10
1992 pxor %xmm11,%xmm3
1993 movdqu %xmm2,(%rsi)
1994 pxor %xmm12,%xmm4
1995 movdqu %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07001996 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001997 movdqa %xmm4,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001998 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001999 leaq 32(%rsi),%rsi
2000 jmp .Lcbc_dec_tail_collected
2001.align 16
2002.Lcbc_dec_four:
2003 movaps %xmm5,%xmm14
2004 call _aesni_decrypt4
2005 pxor %xmm10,%xmm2
2006 movaps %xmm14,%xmm10
2007 pxor %xmm11,%xmm3
2008 movdqu %xmm2,(%rsi)
2009 pxor %xmm12,%xmm4
2010 movdqu %xmm3,16(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07002011 pxor %xmm3,%xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08002012 pxor %xmm13,%xmm5
2013 movdqu %xmm4,32(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07002014 pxor %xmm4,%xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08002015 movdqa %xmm5,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07002016 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08002017 leaq 48(%rsi),%rsi
2018 jmp .Lcbc_dec_tail_collected
2019
2020.align 16
Adam Langleye9ada862015-05-11 17:20:37 -07002021.Lcbc_dec_clear_tail_collected:
2022 pxor %xmm3,%xmm3
2023 pxor %xmm4,%xmm4
2024 pxor %xmm5,%xmm5
2025 pxor %xmm6,%xmm6
2026 pxor %xmm7,%xmm7
2027 pxor %xmm8,%xmm8
2028 pxor %xmm9,%xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -08002029.Lcbc_dec_tail_collected:
2030 movups %xmm10,(%r8)
2031 andq $15,%rdx
2032 jnz .Lcbc_dec_tail_partial
2033 movups %xmm2,(%rsi)
Adam Langleye9ada862015-05-11 17:20:37 -07002034 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002035 jmp .Lcbc_dec_ret
2036.align 16
2037.Lcbc_dec_tail_partial:
2038 movaps %xmm2,(%rsp)
Adam Langleye9ada862015-05-11 17:20:37 -07002039 pxor %xmm2,%xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002040 movq $16,%rcx
2041 movq %rsi,%rdi
2042 subq %rdx,%rcx
2043 leaq (%rsp),%rsi
2044.long 0x9066A4F3
Adam Langleye9ada862015-05-11 17:20:37 -07002045 movdqa %xmm2,(%rsp)
Adam Langleyd9e397b2015-01-22 14:27:53 -08002046
2047.Lcbc_dec_ret:
Adam Langleye9ada862015-05-11 17:20:37 -07002048 xorps %xmm0,%xmm0
2049 pxor %xmm1,%xmm1
Robert Sloana94fe052017-02-21 08:49:28 -08002050 movq -8(%r11),%rbp
Robert Sloanab8b8882018-03-26 11:39:51 -07002051.cfi_restore %rbp
Robert Sloana94fe052017-02-21 08:49:28 -08002052 leaq (%r11),%rsp
Robert Sloanab8b8882018-03-26 11:39:51 -07002053.cfi_def_cfa_register %rsp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002054.Lcbc_ret:
2055 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07002056.cfi_endproc
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002057.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
2058.globl aes_hw_set_decrypt_key
2059.hidden aes_hw_set_decrypt_key
2060.type aes_hw_set_decrypt_key,@function
Adam Langleyd9e397b2015-01-22 14:27:53 -08002061.align 16
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002062aes_hw_set_decrypt_key:
Robert Sloanab8b8882018-03-26 11:39:51 -07002063.cfi_startproc
Adam Langleyd9e397b2015-01-22 14:27:53 -08002064.byte 0x48,0x83,0xEC,0x08
Robert Sloanab8b8882018-03-26 11:39:51 -07002065.cfi_adjust_cfa_offset 8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002066 call __aesni_set_encrypt_key
2067 shll $4,%esi
2068 testl %eax,%eax
2069 jnz .Ldec_key_ret
2070 leaq 16(%rdx,%rsi,1),%rdi
2071
2072 movups (%rdx),%xmm0
2073 movups (%rdi),%xmm1
2074 movups %xmm0,(%rdi)
2075 movups %xmm1,(%rdx)
2076 leaq 16(%rdx),%rdx
2077 leaq -16(%rdi),%rdi
2078
2079.Ldec_key_inverse:
2080 movups (%rdx),%xmm0
2081 movups (%rdi),%xmm1
2082.byte 102,15,56,219,192
2083.byte 102,15,56,219,201
2084 leaq 16(%rdx),%rdx
2085 leaq -16(%rdi),%rdi
2086 movups %xmm0,16(%rdi)
2087 movups %xmm1,-16(%rdx)
2088 cmpq %rdx,%rdi
2089 ja .Ldec_key_inverse
2090
2091 movups (%rdx),%xmm0
2092.byte 102,15,56,219,192
Adam Langleye9ada862015-05-11 17:20:37 -07002093 pxor %xmm1,%xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08002094 movups %xmm0,(%rdi)
Adam Langleye9ada862015-05-11 17:20:37 -07002095 pxor %xmm0,%xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002096.Ldec_key_ret:
2097 addq $8,%rsp
Robert Sloanab8b8882018-03-26 11:39:51 -07002098.cfi_adjust_cfa_offset -8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002099 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07002100.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -08002101.LSEH_end_set_decrypt_key:
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002102.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
2103.globl aes_hw_set_encrypt_key
2104.hidden aes_hw_set_encrypt_key
2105.type aes_hw_set_encrypt_key,@function
Adam Langleyd9e397b2015-01-22 14:27:53 -08002106.align 16
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002107aes_hw_set_encrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002108__aesni_set_encrypt_key:
Robert Sloanab8b8882018-03-26 11:39:51 -07002109.cfi_startproc
Pete Bentley470a9302019-10-02 14:44:32 +01002110#ifdef BORINGSSL_DISPATCH_TEST
Robert Sloan4c22c5f2019-03-01 15:53:37 -08002111 movb $1,BORINGSSL_function_hit+3(%rip)
2112#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -08002113.byte 0x48,0x83,0xEC,0x08
Robert Sloanab8b8882018-03-26 11:39:51 -07002114.cfi_adjust_cfa_offset 8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002115 movq $-1,%rax
2116 testq %rdi,%rdi
2117 jz .Lenc_key_ret
2118 testq %rdx,%rdx
2119 jz .Lenc_key_ret
2120
2121 movups (%rdi),%xmm0
2122 xorps %xmm4,%xmm4
Robert Sloan2424d842017-05-01 07:46:28 -07002123 leaq OPENSSL_ia32cap_P(%rip),%r10
Robert Sloan572a4e22017-04-17 10:52:19 -07002124 movl 4(%r10),%r10d
2125 andl $268437504,%r10d
Adam Langleyd9e397b2015-01-22 14:27:53 -08002126 leaq 16(%rdx),%rax
2127 cmpl $256,%esi
2128 je .L14rounds
2129 cmpl $192,%esi
2130 je .L12rounds
2131 cmpl $128,%esi
2132 jne .Lbad_keybits
2133
2134.L10rounds:
2135 movl $9,%esi
Adam Langleye9ada862015-05-11 17:20:37 -07002136 cmpl $268435456,%r10d
2137 je .L10rounds_alt
2138
Adam Langleyd9e397b2015-01-22 14:27:53 -08002139 movups %xmm0,(%rdx)
2140.byte 102,15,58,223,200,1
2141 call .Lkey_expansion_128_cold
2142.byte 102,15,58,223,200,2
2143 call .Lkey_expansion_128
2144.byte 102,15,58,223,200,4
2145 call .Lkey_expansion_128
2146.byte 102,15,58,223,200,8
2147 call .Lkey_expansion_128
2148.byte 102,15,58,223,200,16
2149 call .Lkey_expansion_128
2150.byte 102,15,58,223,200,32
2151 call .Lkey_expansion_128
2152.byte 102,15,58,223,200,64
2153 call .Lkey_expansion_128
2154.byte 102,15,58,223,200,128
2155 call .Lkey_expansion_128
2156.byte 102,15,58,223,200,27
2157 call .Lkey_expansion_128
2158.byte 102,15,58,223,200,54
2159 call .Lkey_expansion_128
2160 movups %xmm0,(%rax)
2161 movl %esi,80(%rax)
2162 xorl %eax,%eax
2163 jmp .Lenc_key_ret
2164
2165.align 16
Adam Langleye9ada862015-05-11 17:20:37 -07002166.L10rounds_alt:
2167 movdqa .Lkey_rotate(%rip),%xmm5
2168 movl $8,%r10d
2169 movdqa .Lkey_rcon1(%rip),%xmm4
2170 movdqa %xmm0,%xmm2
2171 movdqu %xmm0,(%rdx)
2172 jmp .Loop_key128
2173
2174.align 16
2175.Loop_key128:
2176.byte 102,15,56,0,197
2177.byte 102,15,56,221,196
2178 pslld $1,%xmm4
2179 leaq 16(%rax),%rax
2180
2181 movdqa %xmm2,%xmm3
2182 pslldq $4,%xmm2
2183 pxor %xmm2,%xmm3
2184 pslldq $4,%xmm2
2185 pxor %xmm2,%xmm3
2186 pslldq $4,%xmm2
2187 pxor %xmm3,%xmm2
2188
2189 pxor %xmm2,%xmm0
2190 movdqu %xmm0,-16(%rax)
2191 movdqa %xmm0,%xmm2
2192
2193 decl %r10d
2194 jnz .Loop_key128
2195
2196 movdqa .Lkey_rcon1b(%rip),%xmm4
2197
2198.byte 102,15,56,0,197
2199.byte 102,15,56,221,196
2200 pslld $1,%xmm4
2201
2202 movdqa %xmm2,%xmm3
2203 pslldq $4,%xmm2
2204 pxor %xmm2,%xmm3
2205 pslldq $4,%xmm2
2206 pxor %xmm2,%xmm3
2207 pslldq $4,%xmm2
2208 pxor %xmm3,%xmm2
2209
2210 pxor %xmm2,%xmm0
2211 movdqu %xmm0,(%rax)
2212
2213 movdqa %xmm0,%xmm2
2214.byte 102,15,56,0,197
2215.byte 102,15,56,221,196
2216
2217 movdqa %xmm2,%xmm3
2218 pslldq $4,%xmm2
2219 pxor %xmm2,%xmm3
2220 pslldq $4,%xmm2
2221 pxor %xmm2,%xmm3
2222 pslldq $4,%xmm2
2223 pxor %xmm3,%xmm2
2224
2225 pxor %xmm2,%xmm0
2226 movdqu %xmm0,16(%rax)
2227
2228 movl %esi,96(%rax)
2229 xorl %eax,%eax
2230 jmp .Lenc_key_ret
2231
2232.align 16
Adam Langleyd9e397b2015-01-22 14:27:53 -08002233.L12rounds:
2234 movq 16(%rdi),%xmm2
2235 movl $11,%esi
Adam Langleye9ada862015-05-11 17:20:37 -07002236 cmpl $268435456,%r10d
2237 je .L12rounds_alt
2238
Adam Langleyd9e397b2015-01-22 14:27:53 -08002239 movups %xmm0,(%rdx)
2240.byte 102,15,58,223,202,1
2241 call .Lkey_expansion_192a_cold
2242.byte 102,15,58,223,202,2
2243 call .Lkey_expansion_192b
2244.byte 102,15,58,223,202,4
2245 call .Lkey_expansion_192a
2246.byte 102,15,58,223,202,8
2247 call .Lkey_expansion_192b
2248.byte 102,15,58,223,202,16
2249 call .Lkey_expansion_192a
2250.byte 102,15,58,223,202,32
2251 call .Lkey_expansion_192b
2252.byte 102,15,58,223,202,64
2253 call .Lkey_expansion_192a
2254.byte 102,15,58,223,202,128
2255 call .Lkey_expansion_192b
2256 movups %xmm0,(%rax)
2257 movl %esi,48(%rax)
2258 xorq %rax,%rax
2259 jmp .Lenc_key_ret
2260
2261.align 16
Adam Langleye9ada862015-05-11 17:20:37 -07002262.L12rounds_alt:
2263 movdqa .Lkey_rotate192(%rip),%xmm5
2264 movdqa .Lkey_rcon1(%rip),%xmm4
2265 movl $8,%r10d
2266 movdqu %xmm0,(%rdx)
2267 jmp .Loop_key192
2268
2269.align 16
2270.Loop_key192:
2271 movq %xmm2,0(%rax)
2272 movdqa %xmm2,%xmm1
2273.byte 102,15,56,0,213
2274.byte 102,15,56,221,212
2275 pslld $1,%xmm4
2276 leaq 24(%rax),%rax
2277
2278 movdqa %xmm0,%xmm3
2279 pslldq $4,%xmm0
2280 pxor %xmm0,%xmm3
2281 pslldq $4,%xmm0
2282 pxor %xmm0,%xmm3
2283 pslldq $4,%xmm0
2284 pxor %xmm3,%xmm0
2285
David Benjamin4969cc92016-04-22 15:02:23 -04002286 pshufd $0xff,%xmm0,%xmm3
Adam Langleye9ada862015-05-11 17:20:37 -07002287 pxor %xmm1,%xmm3
2288 pslldq $4,%xmm1
2289 pxor %xmm1,%xmm3
2290
2291 pxor %xmm2,%xmm0
2292 pxor %xmm3,%xmm2
2293 movdqu %xmm0,-16(%rax)
2294
2295 decl %r10d
2296 jnz .Loop_key192
2297
2298 movl %esi,32(%rax)
2299 xorl %eax,%eax
2300 jmp .Lenc_key_ret
2301
2302.align 16
Adam Langleyd9e397b2015-01-22 14:27:53 -08002303.L14rounds:
2304 movups 16(%rdi),%xmm2
2305 movl $13,%esi
2306 leaq 16(%rax),%rax
Adam Langleye9ada862015-05-11 17:20:37 -07002307 cmpl $268435456,%r10d
2308 je .L14rounds_alt
2309
Adam Langleyd9e397b2015-01-22 14:27:53 -08002310 movups %xmm0,(%rdx)
2311 movups %xmm2,16(%rdx)
2312.byte 102,15,58,223,202,1
2313 call .Lkey_expansion_256a_cold
2314.byte 102,15,58,223,200,1
2315 call .Lkey_expansion_256b
2316.byte 102,15,58,223,202,2
2317 call .Lkey_expansion_256a
2318.byte 102,15,58,223,200,2
2319 call .Lkey_expansion_256b
2320.byte 102,15,58,223,202,4
2321 call .Lkey_expansion_256a
2322.byte 102,15,58,223,200,4
2323 call .Lkey_expansion_256b
2324.byte 102,15,58,223,202,8
2325 call .Lkey_expansion_256a
2326.byte 102,15,58,223,200,8
2327 call .Lkey_expansion_256b
2328.byte 102,15,58,223,202,16
2329 call .Lkey_expansion_256a
2330.byte 102,15,58,223,200,16
2331 call .Lkey_expansion_256b
2332.byte 102,15,58,223,202,32
2333 call .Lkey_expansion_256a
2334.byte 102,15,58,223,200,32
2335 call .Lkey_expansion_256b
2336.byte 102,15,58,223,202,64
2337 call .Lkey_expansion_256a
2338 movups %xmm0,(%rax)
2339 movl %esi,16(%rax)
2340 xorq %rax,%rax
2341 jmp .Lenc_key_ret
2342
2343.align 16
Adam Langleye9ada862015-05-11 17:20:37 -07002344.L14rounds_alt:
2345 movdqa .Lkey_rotate(%rip),%xmm5
2346 movdqa .Lkey_rcon1(%rip),%xmm4
2347 movl $7,%r10d
2348 movdqu %xmm0,0(%rdx)
2349 movdqa %xmm2,%xmm1
2350 movdqu %xmm2,16(%rdx)
2351 jmp .Loop_key256
2352
2353.align 16
2354.Loop_key256:
2355.byte 102,15,56,0,213
2356.byte 102,15,56,221,212
2357
2358 movdqa %xmm0,%xmm3
2359 pslldq $4,%xmm0
2360 pxor %xmm0,%xmm3
2361 pslldq $4,%xmm0
2362 pxor %xmm0,%xmm3
2363 pslldq $4,%xmm0
2364 pxor %xmm3,%xmm0
2365 pslld $1,%xmm4
2366
2367 pxor %xmm2,%xmm0
2368 movdqu %xmm0,(%rax)
2369
2370 decl %r10d
2371 jz .Ldone_key256
2372
David Benjamin4969cc92016-04-22 15:02:23 -04002373 pshufd $0xff,%xmm0,%xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07002374 pxor %xmm3,%xmm3
2375.byte 102,15,56,221,211
2376
2377 movdqa %xmm1,%xmm3
2378 pslldq $4,%xmm1
2379 pxor %xmm1,%xmm3
2380 pslldq $4,%xmm1
2381 pxor %xmm1,%xmm3
2382 pslldq $4,%xmm1
2383 pxor %xmm3,%xmm1
2384
2385 pxor %xmm1,%xmm2
2386 movdqu %xmm2,16(%rax)
2387 leaq 32(%rax),%rax
2388 movdqa %xmm2,%xmm1
2389
2390 jmp .Loop_key256
2391
2392.Ldone_key256:
2393 movl %esi,16(%rax)
2394 xorl %eax,%eax
2395 jmp .Lenc_key_ret
2396
2397.align 16
Adam Langleyd9e397b2015-01-22 14:27:53 -08002398.Lbad_keybits:
2399 movq $-2,%rax
2400.Lenc_key_ret:
Adam Langleye9ada862015-05-11 17:20:37 -07002401 pxor %xmm0,%xmm0
2402 pxor %xmm1,%xmm1
2403 pxor %xmm2,%xmm2
2404 pxor %xmm3,%xmm3
2405 pxor %xmm4,%xmm4
2406 pxor %xmm5,%xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08002407 addq $8,%rsp
Robert Sloanab8b8882018-03-26 11:39:51 -07002408.cfi_adjust_cfa_offset -8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002409 .byte 0xf3,0xc3
Robert Sloanab8b8882018-03-26 11:39:51 -07002410.cfi_endproc
Adam Langleyd9e397b2015-01-22 14:27:53 -08002411.LSEH_end_set_encrypt_key:
2412
2413.align 16
2414.Lkey_expansion_128:
2415 movups %xmm0,(%rax)
2416 leaq 16(%rax),%rax
2417.Lkey_expansion_128_cold:
2418 shufps $16,%xmm0,%xmm4
2419 xorps %xmm4,%xmm0
2420 shufps $140,%xmm0,%xmm4
2421 xorps %xmm4,%xmm0
2422 shufps $255,%xmm1,%xmm1
2423 xorps %xmm1,%xmm0
2424 .byte 0xf3,0xc3
2425
2426.align 16
2427.Lkey_expansion_192a:
2428 movups %xmm0,(%rax)
2429 leaq 16(%rax),%rax
2430.Lkey_expansion_192a_cold:
2431 movaps %xmm2,%xmm5
2432.Lkey_expansion_192b_warm:
2433 shufps $16,%xmm0,%xmm4
2434 movdqa %xmm2,%xmm3
2435 xorps %xmm4,%xmm0
2436 shufps $140,%xmm0,%xmm4
2437 pslldq $4,%xmm3
2438 xorps %xmm4,%xmm0
2439 pshufd $85,%xmm1,%xmm1
2440 pxor %xmm3,%xmm2
2441 pxor %xmm1,%xmm0
2442 pshufd $255,%xmm0,%xmm3
2443 pxor %xmm3,%xmm2
2444 .byte 0xf3,0xc3
2445
2446.align 16
2447.Lkey_expansion_192b:
2448 movaps %xmm0,%xmm3
2449 shufps $68,%xmm0,%xmm5
2450 movups %xmm5,(%rax)
2451 shufps $78,%xmm2,%xmm3
2452 movups %xmm3,16(%rax)
2453 leaq 32(%rax),%rax
2454 jmp .Lkey_expansion_192b_warm
2455
2456.align 16
2457.Lkey_expansion_256a:
2458 movups %xmm2,(%rax)
2459 leaq 16(%rax),%rax
2460.Lkey_expansion_256a_cold:
2461 shufps $16,%xmm0,%xmm4
2462 xorps %xmm4,%xmm0
2463 shufps $140,%xmm0,%xmm4
2464 xorps %xmm4,%xmm0
2465 shufps $255,%xmm1,%xmm1
2466 xorps %xmm1,%xmm0
2467 .byte 0xf3,0xc3
2468
2469.align 16
2470.Lkey_expansion_256b:
2471 movups %xmm0,(%rax)
2472 leaq 16(%rax),%rax
2473
2474 shufps $16,%xmm2,%xmm4
2475 xorps %xmm4,%xmm2
2476 shufps $140,%xmm2,%xmm4
2477 xorps %xmm4,%xmm2
2478 shufps $170,%xmm1,%xmm1
2479 xorps %xmm1,%xmm2
2480 .byte 0xf3,0xc3
Adam Vartanianbfcf3a72018-08-10 14:55:24 +01002481.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08002482.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2483.align 64
2484.Lbswap_mask:
2485.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2486.Lincrement32:
2487.long 6,6,6,0
2488.Lincrement64:
2489.long 1,0,0,0
2490.Lxts_magic:
2491.long 0x87,0,1,0
2492.Lincrement1:
2493.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
Adam Langleye9ada862015-05-11 17:20:37 -07002494.Lkey_rotate:
2495.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2496.Lkey_rotate192:
2497.long 0x04070605,0x04070605,0x04070605,0x04070605
2498.Lkey_rcon1:
2499.long 1,1,1,1
2500.Lkey_rcon1b:
2501.long 0x1b,0x1b,0x1b,0x1b
Adam Langleyd9e397b2015-01-22 14:27:53 -08002502
2503.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2504.align 64
Tobias Thierer01e077a2019-10-09 20:48:13 +01002505.section .note.GNU-stack,"",@progbits
Adam Langleyd9e397b2015-01-22 14:27:53 -08002506#endif