blob: 13e9c5e5b6b16090decb2a85a213f1029e1e057e [file] [log] [blame]
Adam Langleye9ada862015-05-11 17:20:37 -07001default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
Robert Sloan2424d842017-05-01 07:46:28 -07007EXTERN OPENSSL_ia32cap_P
Adam Langleye9ada862015-05-11 17:20:37 -07008global aesni_encrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -08009
10ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070011aesni_encrypt:
12 movups xmm2,XMMWORD[rcx]
13 mov eax,DWORD[240+r8]
14 movups xmm0,XMMWORD[r8]
15 movups xmm1,XMMWORD[16+r8]
16 lea r8,[32+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -080017 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -070018$L$oop_enc1_1:
Adam Langleyd9e397b2015-01-22 14:27:53 -080019DB 102,15,56,220,209
20 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -070021 movups xmm1,XMMWORD[r8]
22 lea r8,[16+r8]
23 jnz NEAR $L$oop_enc1_1
Adam Langleyd9e397b2015-01-22 14:27:53 -080024DB 102,15,56,221,209
Adam Langleye9ada862015-05-11 17:20:37 -070025 pxor xmm0,xmm0
26 pxor xmm1,xmm1
27 movups XMMWORD[rdx],xmm2
28 pxor xmm2,xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080029 DB 0F3h,0C3h ;repret
Adam Langleyd9e397b2015-01-22 14:27:53 -080030
Adam Langleye9ada862015-05-11 17:20:37 -070031
32global aesni_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -080033
34ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070035aesni_decrypt:
36 movups xmm2,XMMWORD[rcx]
37 mov eax,DWORD[240+r8]
38 movups xmm0,XMMWORD[r8]
39 movups xmm1,XMMWORD[16+r8]
40 lea r8,[32+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -080041 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -070042$L$oop_dec1_2:
Adam Langleyd9e397b2015-01-22 14:27:53 -080043DB 102,15,56,222,209
44 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -070045 movups xmm1,XMMWORD[r8]
46 lea r8,[16+r8]
47 jnz NEAR $L$oop_dec1_2
Adam Langleyd9e397b2015-01-22 14:27:53 -080048DB 102,15,56,223,209
Adam Langleye9ada862015-05-11 17:20:37 -070049 pxor xmm0,xmm0
50 pxor xmm1,xmm1
51 movups XMMWORD[rdx],xmm2
52 pxor xmm2,xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -080053 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -070054
Adam Langleyd9e397b2015-01-22 14:27:53 -080055
56ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070057_aesni_encrypt2:
58 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -080059 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -070060 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -080061 xorps xmm2,xmm0
62 xorps xmm3,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -070063 movups xmm0,XMMWORD[32+rcx]
64 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -080065 neg rax
66 add rax,16
67
Adam Langleye9ada862015-05-11 17:20:37 -070068$L$enc_loop2:
Adam Langleyd9e397b2015-01-22 14:27:53 -080069DB 102,15,56,220,209
70DB 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -070071 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -080072 add rax,32
73DB 102,15,56,220,208
74DB 102,15,56,220,216
Adam Langleye9ada862015-05-11 17:20:37 -070075 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
76 jnz NEAR $L$enc_loop2
Adam Langleyd9e397b2015-01-22 14:27:53 -080077
78DB 102,15,56,220,209
79DB 102,15,56,220,217
80DB 102,15,56,221,208
81DB 102,15,56,221,216
82 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -070083
Adam Langleyd9e397b2015-01-22 14:27:53 -080084
85ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070086_aesni_decrypt2:
87 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -080088 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -070089 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -080090 xorps xmm2,xmm0
91 xorps xmm3,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -070092 movups xmm0,XMMWORD[32+rcx]
93 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -080094 neg rax
95 add rax,16
96
Adam Langleye9ada862015-05-11 17:20:37 -070097$L$dec_loop2:
Adam Langleyd9e397b2015-01-22 14:27:53 -080098DB 102,15,56,222,209
99DB 102,15,56,222,217
Adam Langleye9ada862015-05-11 17:20:37 -0700100 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800101 add rax,32
102DB 102,15,56,222,208
103DB 102,15,56,222,216
Adam Langleye9ada862015-05-11 17:20:37 -0700104 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
105 jnz NEAR $L$dec_loop2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800106
107DB 102,15,56,222,209
108DB 102,15,56,222,217
109DB 102,15,56,223,208
110DB 102,15,56,223,216
111 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700112
Adam Langleyd9e397b2015-01-22 14:27:53 -0800113
114ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700115_aesni_encrypt3:
116 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800117 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700118 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800119 xorps xmm2,xmm0
120 xorps xmm3,xmm0
121 xorps xmm4,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700122 movups xmm0,XMMWORD[32+rcx]
123 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800124 neg rax
125 add rax,16
126
Adam Langleye9ada862015-05-11 17:20:37 -0700127$L$enc_loop3:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800128DB 102,15,56,220,209
129DB 102,15,56,220,217
130DB 102,15,56,220,225
Adam Langleye9ada862015-05-11 17:20:37 -0700131 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800132 add rax,32
133DB 102,15,56,220,208
134DB 102,15,56,220,216
135DB 102,15,56,220,224
Adam Langleye9ada862015-05-11 17:20:37 -0700136 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
137 jnz NEAR $L$enc_loop3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800138
139DB 102,15,56,220,209
140DB 102,15,56,220,217
141DB 102,15,56,220,225
142DB 102,15,56,221,208
143DB 102,15,56,221,216
144DB 102,15,56,221,224
145 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700146
Adam Langleyd9e397b2015-01-22 14:27:53 -0800147
148ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700149_aesni_decrypt3:
150 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800151 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700152 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800153 xorps xmm2,xmm0
154 xorps xmm3,xmm0
155 xorps xmm4,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700156 movups xmm0,XMMWORD[32+rcx]
157 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800158 neg rax
159 add rax,16
160
Adam Langleye9ada862015-05-11 17:20:37 -0700161$L$dec_loop3:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800162DB 102,15,56,222,209
163DB 102,15,56,222,217
164DB 102,15,56,222,225
Adam Langleye9ada862015-05-11 17:20:37 -0700165 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800166 add rax,32
167DB 102,15,56,222,208
168DB 102,15,56,222,216
169DB 102,15,56,222,224
Adam Langleye9ada862015-05-11 17:20:37 -0700170 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
171 jnz NEAR $L$dec_loop3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800172
173DB 102,15,56,222,209
174DB 102,15,56,222,217
175DB 102,15,56,222,225
176DB 102,15,56,223,208
177DB 102,15,56,223,216
178DB 102,15,56,223,224
179 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700180
Adam Langleyd9e397b2015-01-22 14:27:53 -0800181
182ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700183_aesni_encrypt4:
184 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800185 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700186 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800187 xorps xmm2,xmm0
188 xorps xmm3,xmm0
189 xorps xmm4,xmm0
190 xorps xmm5,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700191 movups xmm0,XMMWORD[32+rcx]
192 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800193 neg rax
Adam Langleye9ada862015-05-11 17:20:37 -0700194DB 0x0f,0x1f,0x00
Adam Langleyd9e397b2015-01-22 14:27:53 -0800195 add rax,16
196
Adam Langleye9ada862015-05-11 17:20:37 -0700197$L$enc_loop4:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800198DB 102,15,56,220,209
199DB 102,15,56,220,217
200DB 102,15,56,220,225
201DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -0700202 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800203 add rax,32
204DB 102,15,56,220,208
205DB 102,15,56,220,216
206DB 102,15,56,220,224
207DB 102,15,56,220,232
Adam Langleye9ada862015-05-11 17:20:37 -0700208 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
209 jnz NEAR $L$enc_loop4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800210
211DB 102,15,56,220,209
212DB 102,15,56,220,217
213DB 102,15,56,220,225
214DB 102,15,56,220,233
215DB 102,15,56,221,208
216DB 102,15,56,221,216
217DB 102,15,56,221,224
218DB 102,15,56,221,232
219 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700220
Adam Langleyd9e397b2015-01-22 14:27:53 -0800221
222ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700223_aesni_decrypt4:
224 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800225 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700226 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800227 xorps xmm2,xmm0
228 xorps xmm3,xmm0
229 xorps xmm4,xmm0
230 xorps xmm5,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700231 movups xmm0,XMMWORD[32+rcx]
232 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800233 neg rax
Adam Langleye9ada862015-05-11 17:20:37 -0700234DB 0x0f,0x1f,0x00
Adam Langleyd9e397b2015-01-22 14:27:53 -0800235 add rax,16
236
Adam Langleye9ada862015-05-11 17:20:37 -0700237$L$dec_loop4:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800238DB 102,15,56,222,209
239DB 102,15,56,222,217
240DB 102,15,56,222,225
241DB 102,15,56,222,233
Adam Langleye9ada862015-05-11 17:20:37 -0700242 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800243 add rax,32
244DB 102,15,56,222,208
245DB 102,15,56,222,216
246DB 102,15,56,222,224
247DB 102,15,56,222,232
Adam Langleye9ada862015-05-11 17:20:37 -0700248 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
249 jnz NEAR $L$dec_loop4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800250
251DB 102,15,56,222,209
252DB 102,15,56,222,217
253DB 102,15,56,222,225
254DB 102,15,56,222,233
255DB 102,15,56,223,208
256DB 102,15,56,223,216
257DB 102,15,56,223,224
258DB 102,15,56,223,232
259 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700260
Adam Langleyd9e397b2015-01-22 14:27:53 -0800261
262ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700263_aesni_encrypt6:
264 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800265 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700266 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800267 xorps xmm2,xmm0
268 pxor xmm3,xmm0
269 pxor xmm4,xmm0
270DB 102,15,56,220,209
Adam Langleye9ada862015-05-11 17:20:37 -0700271 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800272 neg rax
273DB 102,15,56,220,217
274 pxor xmm5,xmm0
275 pxor xmm6,xmm0
276DB 102,15,56,220,225
277 pxor xmm7,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700278 movups xmm0,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800279 add rax,16
Adam Langleye9ada862015-05-11 17:20:37 -0700280 jmp NEAR $L$enc_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800281ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700282$L$enc_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800283DB 102,15,56,220,209
284DB 102,15,56,220,217
285DB 102,15,56,220,225
Adam Langleye9ada862015-05-11 17:20:37 -0700286$L$enc_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800287DB 102,15,56,220,233
288DB 102,15,56,220,241
289DB 102,15,56,220,249
Adam Langleye9ada862015-05-11 17:20:37 -0700290 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800291 add rax,32
292DB 102,15,56,220,208
293DB 102,15,56,220,216
294DB 102,15,56,220,224
295DB 102,15,56,220,232
296DB 102,15,56,220,240
297DB 102,15,56,220,248
Adam Langleye9ada862015-05-11 17:20:37 -0700298 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
299 jnz NEAR $L$enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800300
301DB 102,15,56,220,209
302DB 102,15,56,220,217
303DB 102,15,56,220,225
304DB 102,15,56,220,233
305DB 102,15,56,220,241
306DB 102,15,56,220,249
307DB 102,15,56,221,208
308DB 102,15,56,221,216
309DB 102,15,56,221,224
310DB 102,15,56,221,232
311DB 102,15,56,221,240
312DB 102,15,56,221,248
313 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700314
Adam Langleyd9e397b2015-01-22 14:27:53 -0800315
316ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700317_aesni_decrypt6:
318 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800319 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700320 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800321 xorps xmm2,xmm0
322 pxor xmm3,xmm0
323 pxor xmm4,xmm0
324DB 102,15,56,222,209
Adam Langleye9ada862015-05-11 17:20:37 -0700325 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800326 neg rax
327DB 102,15,56,222,217
328 pxor xmm5,xmm0
329 pxor xmm6,xmm0
330DB 102,15,56,222,225
331 pxor xmm7,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700332 movups xmm0,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800333 add rax,16
Adam Langleye9ada862015-05-11 17:20:37 -0700334 jmp NEAR $L$dec_loop6_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800335ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700336$L$dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800337DB 102,15,56,222,209
338DB 102,15,56,222,217
339DB 102,15,56,222,225
Adam Langleye9ada862015-05-11 17:20:37 -0700340$L$dec_loop6_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800341DB 102,15,56,222,233
342DB 102,15,56,222,241
343DB 102,15,56,222,249
Adam Langleye9ada862015-05-11 17:20:37 -0700344 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800345 add rax,32
346DB 102,15,56,222,208
347DB 102,15,56,222,216
348DB 102,15,56,222,224
349DB 102,15,56,222,232
350DB 102,15,56,222,240
351DB 102,15,56,222,248
Adam Langleye9ada862015-05-11 17:20:37 -0700352 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
353 jnz NEAR $L$dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -0800354
355DB 102,15,56,222,209
356DB 102,15,56,222,217
357DB 102,15,56,222,225
358DB 102,15,56,222,233
359DB 102,15,56,222,241
360DB 102,15,56,222,249
361DB 102,15,56,223,208
362DB 102,15,56,223,216
363DB 102,15,56,223,224
364DB 102,15,56,223,232
365DB 102,15,56,223,240
366DB 102,15,56,223,248
367 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700368
Adam Langleyd9e397b2015-01-22 14:27:53 -0800369
370ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700371_aesni_encrypt8:
372 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800373 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700374 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800375 xorps xmm2,xmm0
376 xorps xmm3,xmm0
377 pxor xmm4,xmm0
378 pxor xmm5,xmm0
379 pxor xmm6,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700380 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800381 neg rax
382DB 102,15,56,220,209
Adam Langleyd9e397b2015-01-22 14:27:53 -0800383 pxor xmm7,xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800384 pxor xmm8,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700385DB 102,15,56,220,217
Adam Langleyd9e397b2015-01-22 14:27:53 -0800386 pxor xmm9,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700387 movups xmm0,XMMWORD[rax*1+rcx]
388 add rax,16
389 jmp NEAR $L$enc_loop8_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800390ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700391$L$enc_loop8:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800392DB 102,15,56,220,209
393DB 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -0700394$L$enc_loop8_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800395DB 102,15,56,220,225
396DB 102,15,56,220,233
397DB 102,15,56,220,241
398DB 102,15,56,220,249
399DB 102,68,15,56,220,193
400DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -0700401$L$enc_loop8_enter:
402 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800403 add rax,32
404DB 102,15,56,220,208
405DB 102,15,56,220,216
406DB 102,15,56,220,224
407DB 102,15,56,220,232
408DB 102,15,56,220,240
409DB 102,15,56,220,248
410DB 102,68,15,56,220,192
411DB 102,68,15,56,220,200
Adam Langleye9ada862015-05-11 17:20:37 -0700412 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
413 jnz NEAR $L$enc_loop8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800414
415DB 102,15,56,220,209
416DB 102,15,56,220,217
417DB 102,15,56,220,225
418DB 102,15,56,220,233
419DB 102,15,56,220,241
420DB 102,15,56,220,249
421DB 102,68,15,56,220,193
422DB 102,68,15,56,220,201
423DB 102,15,56,221,208
424DB 102,15,56,221,216
425DB 102,15,56,221,224
426DB 102,15,56,221,232
427DB 102,15,56,221,240
428DB 102,15,56,221,248
429DB 102,68,15,56,221,192
430DB 102,68,15,56,221,200
431 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700432
Adam Langleyd9e397b2015-01-22 14:27:53 -0800433
434ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700435_aesni_decrypt8:
436 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800437 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -0700438 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800439 xorps xmm2,xmm0
440 xorps xmm3,xmm0
441 pxor xmm4,xmm0
442 pxor xmm5,xmm0
443 pxor xmm6,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700444 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800445 neg rax
446DB 102,15,56,222,209
Adam Langleyd9e397b2015-01-22 14:27:53 -0800447 pxor xmm7,xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800448 pxor xmm8,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700449DB 102,15,56,222,217
Adam Langleyd9e397b2015-01-22 14:27:53 -0800450 pxor xmm9,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700451 movups xmm0,XMMWORD[rax*1+rcx]
452 add rax,16
453 jmp NEAR $L$dec_loop8_inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800454ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700455$L$dec_loop8:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800456DB 102,15,56,222,209
457DB 102,15,56,222,217
Adam Langleye9ada862015-05-11 17:20:37 -0700458$L$dec_loop8_inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800459DB 102,15,56,222,225
460DB 102,15,56,222,233
461DB 102,15,56,222,241
462DB 102,15,56,222,249
463DB 102,68,15,56,222,193
464DB 102,68,15,56,222,201
Adam Langleye9ada862015-05-11 17:20:37 -0700465$L$dec_loop8_enter:
466 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800467 add rax,32
468DB 102,15,56,222,208
469DB 102,15,56,222,216
470DB 102,15,56,222,224
471DB 102,15,56,222,232
472DB 102,15,56,222,240
473DB 102,15,56,222,248
474DB 102,68,15,56,222,192
475DB 102,68,15,56,222,200
Adam Langleye9ada862015-05-11 17:20:37 -0700476 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
477 jnz NEAR $L$dec_loop8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800478
479DB 102,15,56,222,209
480DB 102,15,56,222,217
481DB 102,15,56,222,225
482DB 102,15,56,222,233
483DB 102,15,56,222,241
484DB 102,15,56,222,249
485DB 102,68,15,56,222,193
486DB 102,68,15,56,222,201
487DB 102,15,56,223,208
488DB 102,15,56,223,216
489DB 102,15,56,223,224
490DB 102,15,56,223,232
491DB 102,15,56,223,240
492DB 102,15,56,223,248
493DB 102,68,15,56,223,192
494DB 102,68,15,56,223,200
495 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700496
497global aesni_ecb_encrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -0800498
499ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700500aesni_ecb_encrypt:
501 mov QWORD[8+rsp],rdi ;WIN64 prologue
502 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800503 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700504$L$SEH_begin_aesni_ecb_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800505 mov rdi,rcx
506 mov rsi,rdx
507 mov rdx,r8
508 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700509 mov r8,QWORD[40+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800510
511
Adam Langleye9ada862015-05-11 17:20:37 -0700512 lea rsp,[((-88))+rsp]
513 movaps XMMWORD[rsp],xmm6
514 movaps XMMWORD[16+rsp],xmm7
515 movaps XMMWORD[32+rsp],xmm8
516 movaps XMMWORD[48+rsp],xmm9
517$L$ecb_enc_body:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800518 and rdx,-16
Adam Langleye9ada862015-05-11 17:20:37 -0700519 jz NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800520
Adam Langleye9ada862015-05-11 17:20:37 -0700521 mov eax,DWORD[240+rcx]
522 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800523 mov r11,rcx
524 mov r10d,eax
525 test r8d,r8d
Adam Langleye9ada862015-05-11 17:20:37 -0700526 jz NEAR $L$ecb_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -0800527
Adam Langleye9ada862015-05-11 17:20:37 -0700528 cmp rdx,0x80
529 jb NEAR $L$ecb_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800530
Adam Langleye9ada862015-05-11 17:20:37 -0700531 movdqu xmm2,XMMWORD[rdi]
532 movdqu xmm3,XMMWORD[16+rdi]
533 movdqu xmm4,XMMWORD[32+rdi]
534 movdqu xmm5,XMMWORD[48+rdi]
535 movdqu xmm6,XMMWORD[64+rdi]
536 movdqu xmm7,XMMWORD[80+rdi]
537 movdqu xmm8,XMMWORD[96+rdi]
538 movdqu xmm9,XMMWORD[112+rdi]
539 lea rdi,[128+rdi]
540 sub rdx,0x80
541 jmp NEAR $L$ecb_enc_loop8_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800542ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700543$L$ecb_enc_loop8:
544 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800545 mov rcx,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700546 movdqu xmm2,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800547 mov eax,r10d
Adam Langleye9ada862015-05-11 17:20:37 -0700548 movups XMMWORD[16+rsi],xmm3
549 movdqu xmm3,XMMWORD[16+rdi]
550 movups XMMWORD[32+rsi],xmm4
551 movdqu xmm4,XMMWORD[32+rdi]
552 movups XMMWORD[48+rsi],xmm5
553 movdqu xmm5,XMMWORD[48+rdi]
554 movups XMMWORD[64+rsi],xmm6
555 movdqu xmm6,XMMWORD[64+rdi]
556 movups XMMWORD[80+rsi],xmm7
557 movdqu xmm7,XMMWORD[80+rdi]
558 movups XMMWORD[96+rsi],xmm8
559 movdqu xmm8,XMMWORD[96+rdi]
560 movups XMMWORD[112+rsi],xmm9
561 lea rsi,[128+rsi]
562 movdqu xmm9,XMMWORD[112+rdi]
563 lea rdi,[128+rdi]
564$L$ecb_enc_loop8_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800565
566 call _aesni_encrypt8
567
Adam Langleye9ada862015-05-11 17:20:37 -0700568 sub rdx,0x80
569 jnc NEAR $L$ecb_enc_loop8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800570
Adam Langleye9ada862015-05-11 17:20:37 -0700571 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800572 mov rcx,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700573 movups XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800574 mov eax,r10d
Adam Langleye9ada862015-05-11 17:20:37 -0700575 movups XMMWORD[32+rsi],xmm4
576 movups XMMWORD[48+rsi],xmm5
577 movups XMMWORD[64+rsi],xmm6
578 movups XMMWORD[80+rsi],xmm7
579 movups XMMWORD[96+rsi],xmm8
580 movups XMMWORD[112+rsi],xmm9
581 lea rsi,[128+rsi]
582 add rdx,0x80
583 jz NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800584
Adam Langleye9ada862015-05-11 17:20:37 -0700585$L$ecb_enc_tail:
586 movups xmm2,XMMWORD[rdi]
587 cmp rdx,0x20
588 jb NEAR $L$ecb_enc_one
589 movups xmm3,XMMWORD[16+rdi]
590 je NEAR $L$ecb_enc_two
591 movups xmm4,XMMWORD[32+rdi]
592 cmp rdx,0x40
593 jb NEAR $L$ecb_enc_three
594 movups xmm5,XMMWORD[48+rdi]
595 je NEAR $L$ecb_enc_four
596 movups xmm6,XMMWORD[64+rdi]
597 cmp rdx,0x60
598 jb NEAR $L$ecb_enc_five
599 movups xmm7,XMMWORD[80+rdi]
600 je NEAR $L$ecb_enc_six
601 movdqu xmm8,XMMWORD[96+rdi]
602 xorps xmm9,xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800603 call _aesni_encrypt8
Adam Langleye9ada862015-05-11 17:20:37 -0700604 movups XMMWORD[rsi],xmm2
605 movups XMMWORD[16+rsi],xmm3
606 movups XMMWORD[32+rsi],xmm4
607 movups XMMWORD[48+rsi],xmm5
608 movups XMMWORD[64+rsi],xmm6
609 movups XMMWORD[80+rsi],xmm7
610 movups XMMWORD[96+rsi],xmm8
611 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800612ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700613$L$ecb_enc_one:
614 movups xmm0,XMMWORD[rcx]
615 movups xmm1,XMMWORD[16+rcx]
616 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800617 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700618$L$oop_enc1_3:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800619DB 102,15,56,220,209
620 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -0700621 movups xmm1,XMMWORD[rcx]
622 lea rcx,[16+rcx]
623 jnz NEAR $L$oop_enc1_3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800624DB 102,15,56,221,209
Adam Langleye9ada862015-05-11 17:20:37 -0700625 movups XMMWORD[rsi],xmm2
626 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800627ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700628$L$ecb_enc_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800629 call _aesni_encrypt2
Adam Langleye9ada862015-05-11 17:20:37 -0700630 movups XMMWORD[rsi],xmm2
631 movups XMMWORD[16+rsi],xmm3
632 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800633ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700634$L$ecb_enc_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800635 call _aesni_encrypt3
Adam Langleye9ada862015-05-11 17:20:37 -0700636 movups XMMWORD[rsi],xmm2
637 movups XMMWORD[16+rsi],xmm3
638 movups XMMWORD[32+rsi],xmm4
639 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800640ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700641$L$ecb_enc_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800642 call _aesni_encrypt4
Adam Langleye9ada862015-05-11 17:20:37 -0700643 movups XMMWORD[rsi],xmm2
644 movups XMMWORD[16+rsi],xmm3
645 movups XMMWORD[32+rsi],xmm4
646 movups XMMWORD[48+rsi],xmm5
647 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800648ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700649$L$ecb_enc_five:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800650 xorps xmm7,xmm7
651 call _aesni_encrypt6
Adam Langleye9ada862015-05-11 17:20:37 -0700652 movups XMMWORD[rsi],xmm2
653 movups XMMWORD[16+rsi],xmm3
654 movups XMMWORD[32+rsi],xmm4
655 movups XMMWORD[48+rsi],xmm5
656 movups XMMWORD[64+rsi],xmm6
657 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800658ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700659$L$ecb_enc_six:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800660 call _aesni_encrypt6
Adam Langleye9ada862015-05-11 17:20:37 -0700661 movups XMMWORD[rsi],xmm2
662 movups XMMWORD[16+rsi],xmm3
663 movups XMMWORD[32+rsi],xmm4
664 movups XMMWORD[48+rsi],xmm5
665 movups XMMWORD[64+rsi],xmm6
666 movups XMMWORD[80+rsi],xmm7
667 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800668
669ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700670$L$ecb_decrypt:
671 cmp rdx,0x80
672 jb NEAR $L$ecb_dec_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800673
Adam Langleye9ada862015-05-11 17:20:37 -0700674 movdqu xmm2,XMMWORD[rdi]
675 movdqu xmm3,XMMWORD[16+rdi]
676 movdqu xmm4,XMMWORD[32+rdi]
677 movdqu xmm5,XMMWORD[48+rdi]
678 movdqu xmm6,XMMWORD[64+rdi]
679 movdqu xmm7,XMMWORD[80+rdi]
680 movdqu xmm8,XMMWORD[96+rdi]
681 movdqu xmm9,XMMWORD[112+rdi]
682 lea rdi,[128+rdi]
683 sub rdx,0x80
684 jmp NEAR $L$ecb_dec_loop8_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800685ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700686$L$ecb_dec_loop8:
687 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800688 mov rcx,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700689 movdqu xmm2,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800690 mov eax,r10d
Adam Langleye9ada862015-05-11 17:20:37 -0700691 movups XMMWORD[16+rsi],xmm3
692 movdqu xmm3,XMMWORD[16+rdi]
693 movups XMMWORD[32+rsi],xmm4
694 movdqu xmm4,XMMWORD[32+rdi]
695 movups XMMWORD[48+rsi],xmm5
696 movdqu xmm5,XMMWORD[48+rdi]
697 movups XMMWORD[64+rsi],xmm6
698 movdqu xmm6,XMMWORD[64+rdi]
699 movups XMMWORD[80+rsi],xmm7
700 movdqu xmm7,XMMWORD[80+rdi]
701 movups XMMWORD[96+rsi],xmm8
702 movdqu xmm8,XMMWORD[96+rdi]
703 movups XMMWORD[112+rsi],xmm9
704 lea rsi,[128+rsi]
705 movdqu xmm9,XMMWORD[112+rdi]
706 lea rdi,[128+rdi]
707$L$ecb_dec_loop8_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800708
709 call _aesni_decrypt8
710
Adam Langleye9ada862015-05-11 17:20:37 -0700711 movups xmm0,XMMWORD[r11]
712 sub rdx,0x80
713 jnc NEAR $L$ecb_dec_loop8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800714
Adam Langleye9ada862015-05-11 17:20:37 -0700715 movups XMMWORD[rsi],xmm2
716 pxor xmm2,xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800717 mov rcx,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700718 movups XMMWORD[16+rsi],xmm3
719 pxor xmm3,xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800720 mov eax,r10d
Adam Langleye9ada862015-05-11 17:20:37 -0700721 movups XMMWORD[32+rsi],xmm4
722 pxor xmm4,xmm4
723 movups XMMWORD[48+rsi],xmm5
724 pxor xmm5,xmm5
725 movups XMMWORD[64+rsi],xmm6
726 pxor xmm6,xmm6
727 movups XMMWORD[80+rsi],xmm7
728 pxor xmm7,xmm7
729 movups XMMWORD[96+rsi],xmm8
730 pxor xmm8,xmm8
731 movups XMMWORD[112+rsi],xmm9
732 pxor xmm9,xmm9
733 lea rsi,[128+rsi]
734 add rdx,0x80
735 jz NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800736
Adam Langleye9ada862015-05-11 17:20:37 -0700737$L$ecb_dec_tail:
738 movups xmm2,XMMWORD[rdi]
739 cmp rdx,0x20
740 jb NEAR $L$ecb_dec_one
741 movups xmm3,XMMWORD[16+rdi]
742 je NEAR $L$ecb_dec_two
743 movups xmm4,XMMWORD[32+rdi]
744 cmp rdx,0x40
745 jb NEAR $L$ecb_dec_three
746 movups xmm5,XMMWORD[48+rdi]
747 je NEAR $L$ecb_dec_four
748 movups xmm6,XMMWORD[64+rdi]
749 cmp rdx,0x60
750 jb NEAR $L$ecb_dec_five
751 movups xmm7,XMMWORD[80+rdi]
752 je NEAR $L$ecb_dec_six
753 movups xmm8,XMMWORD[96+rdi]
754 movups xmm0,XMMWORD[rcx]
755 xorps xmm9,xmm9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800756 call _aesni_decrypt8
Adam Langleye9ada862015-05-11 17:20:37 -0700757 movups XMMWORD[rsi],xmm2
758 pxor xmm2,xmm2
759 movups XMMWORD[16+rsi],xmm3
760 pxor xmm3,xmm3
761 movups XMMWORD[32+rsi],xmm4
762 pxor xmm4,xmm4
763 movups XMMWORD[48+rsi],xmm5
764 pxor xmm5,xmm5
765 movups XMMWORD[64+rsi],xmm6
766 pxor xmm6,xmm6
767 movups XMMWORD[80+rsi],xmm7
768 pxor xmm7,xmm7
769 movups XMMWORD[96+rsi],xmm8
770 pxor xmm8,xmm8
771 pxor xmm9,xmm9
772 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800773ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700774$L$ecb_dec_one:
775 movups xmm0,XMMWORD[rcx]
776 movups xmm1,XMMWORD[16+rcx]
777 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800778 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700779$L$oop_dec1_4:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800780DB 102,15,56,222,209
781 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -0700782 movups xmm1,XMMWORD[rcx]
783 lea rcx,[16+rcx]
784 jnz NEAR $L$oop_dec1_4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800785DB 102,15,56,223,209
Adam Langleye9ada862015-05-11 17:20:37 -0700786 movups XMMWORD[rsi],xmm2
787 pxor xmm2,xmm2
788 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800789ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700790$L$ecb_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800791 call _aesni_decrypt2
Adam Langleye9ada862015-05-11 17:20:37 -0700792 movups XMMWORD[rsi],xmm2
793 pxor xmm2,xmm2
794 movups XMMWORD[16+rsi],xmm3
795 pxor xmm3,xmm3
796 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800797ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700798$L$ecb_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800799 call _aesni_decrypt3
Adam Langleye9ada862015-05-11 17:20:37 -0700800 movups XMMWORD[rsi],xmm2
801 pxor xmm2,xmm2
802 movups XMMWORD[16+rsi],xmm3
803 pxor xmm3,xmm3
804 movups XMMWORD[32+rsi],xmm4
805 pxor xmm4,xmm4
806 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800807ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700808$L$ecb_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800809 call _aesni_decrypt4
Adam Langleye9ada862015-05-11 17:20:37 -0700810 movups XMMWORD[rsi],xmm2
811 pxor xmm2,xmm2
812 movups XMMWORD[16+rsi],xmm3
813 pxor xmm3,xmm3
814 movups XMMWORD[32+rsi],xmm4
815 pxor xmm4,xmm4
816 movups XMMWORD[48+rsi],xmm5
817 pxor xmm5,xmm5
818 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800819ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700820$L$ecb_dec_five:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800821 xorps xmm7,xmm7
822 call _aesni_decrypt6
Adam Langleye9ada862015-05-11 17:20:37 -0700823 movups XMMWORD[rsi],xmm2
824 pxor xmm2,xmm2
825 movups XMMWORD[16+rsi],xmm3
826 pxor xmm3,xmm3
827 movups XMMWORD[32+rsi],xmm4
828 pxor xmm4,xmm4
829 movups XMMWORD[48+rsi],xmm5
830 pxor xmm5,xmm5
831 movups XMMWORD[64+rsi],xmm6
832 pxor xmm6,xmm6
833 pxor xmm7,xmm7
834 jmp NEAR $L$ecb_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -0800835ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700836$L$ecb_dec_six:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800837 call _aesni_decrypt6
Adam Langleye9ada862015-05-11 17:20:37 -0700838 movups XMMWORD[rsi],xmm2
839 pxor xmm2,xmm2
840 movups XMMWORD[16+rsi],xmm3
841 pxor xmm3,xmm3
842 movups XMMWORD[32+rsi],xmm4
843 pxor xmm4,xmm4
844 movups XMMWORD[48+rsi],xmm5
845 pxor xmm5,xmm5
846 movups XMMWORD[64+rsi],xmm6
847 pxor xmm6,xmm6
848 movups XMMWORD[80+rsi],xmm7
849 pxor xmm7,xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800850
Adam Langleye9ada862015-05-11 17:20:37 -0700851$L$ecb_ret:
852 xorps xmm0,xmm0
853 pxor xmm1,xmm1
854 movaps xmm6,XMMWORD[rsp]
855 movaps XMMWORD[rsp],xmm0
856 movaps xmm7,XMMWORD[16+rsp]
857 movaps XMMWORD[16+rsp],xmm0
858 movaps xmm8,XMMWORD[32+rsp]
859 movaps XMMWORD[32+rsp],xmm0
860 movaps xmm9,XMMWORD[48+rsp]
861 movaps XMMWORD[48+rsp],xmm0
862 lea rsp,[88+rsp]
863$L$ecb_enc_ret:
864 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
865 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800866 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700867$L$SEH_end_aesni_ecb_encrypt:
868global aesni_ccm64_encrypt_blocks
Adam Langleyd9e397b2015-01-22 14:27:53 -0800869
870ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700871aesni_ccm64_encrypt_blocks:
872 mov QWORD[8+rsp],rdi ;WIN64 prologue
873 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800874 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700875$L$SEH_begin_aesni_ccm64_encrypt_blocks:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800876 mov rdi,rcx
877 mov rsi,rdx
878 mov rdx,r8
879 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700880 mov r8,QWORD[40+rsp]
881 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800882
883
Adam Langleye9ada862015-05-11 17:20:37 -0700884 lea rsp,[((-88))+rsp]
885 movaps XMMWORD[rsp],xmm6
886 movaps XMMWORD[16+rsp],xmm7
887 movaps XMMWORD[32+rsp],xmm8
888 movaps XMMWORD[48+rsp],xmm9
889$L$ccm64_enc_body:
890 mov eax,DWORD[240+rcx]
891 movdqu xmm6,XMMWORD[r8]
892 movdqa xmm9,XMMWORD[$L$increment64]
893 movdqa xmm7,XMMWORD[$L$bswap_mask]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800894
895 shl eax,4
896 mov r10d,16
Adam Langleye9ada862015-05-11 17:20:37 -0700897 lea r11,[rcx]
898 movdqu xmm3,XMMWORD[r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800899 movdqa xmm2,xmm6
Adam Langleye9ada862015-05-11 17:20:37 -0700900 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800901DB 102,15,56,0,247
902 sub r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700903 jmp NEAR $L$ccm64_enc_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800904ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700905$L$ccm64_enc_outer:
906 movups xmm0,XMMWORD[r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800907 mov rax,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700908 movups xmm8,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800909
910 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700911 movups xmm1,XMMWORD[16+r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800912 xorps xmm0,xmm8
913 xorps xmm3,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700914 movups xmm0,XMMWORD[32+r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800915
Adam Langleye9ada862015-05-11 17:20:37 -0700916$L$ccm64_enc2_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800917DB 102,15,56,220,209
918DB 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -0700919 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800920 add rax,32
921DB 102,15,56,220,208
922DB 102,15,56,220,216
Adam Langleye9ada862015-05-11 17:20:37 -0700923 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
924 jnz NEAR $L$ccm64_enc2_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -0800925DB 102,15,56,220,209
926DB 102,15,56,220,217
927 paddq xmm6,xmm9
928 dec rdx
929DB 102,15,56,221,208
930DB 102,15,56,221,216
931
Adam Langleye9ada862015-05-11 17:20:37 -0700932 lea rdi,[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800933 xorps xmm8,xmm2
934 movdqa xmm2,xmm6
Adam Langleye9ada862015-05-11 17:20:37 -0700935 movups XMMWORD[rsi],xmm8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800936DB 102,15,56,0,215
Adam Langleye9ada862015-05-11 17:20:37 -0700937 lea rsi,[16+rsi]
938 jnz NEAR $L$ccm64_enc_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800939
Adam Langleye9ada862015-05-11 17:20:37 -0700940 pxor xmm0,xmm0
941 pxor xmm1,xmm1
942 pxor xmm2,xmm2
943 movups XMMWORD[r9],xmm3
944 pxor xmm3,xmm3
945 pxor xmm8,xmm8
946 pxor xmm6,xmm6
947 movaps xmm6,XMMWORD[rsp]
948 movaps XMMWORD[rsp],xmm0
949 movaps xmm7,XMMWORD[16+rsp]
950 movaps XMMWORD[16+rsp],xmm0
951 movaps xmm8,XMMWORD[32+rsp]
952 movaps XMMWORD[32+rsp],xmm0
953 movaps xmm9,XMMWORD[48+rsp]
954 movaps XMMWORD[48+rsp],xmm0
955 lea rsp,[88+rsp]
956$L$ccm64_enc_ret:
957 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
958 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800959 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -0700960$L$SEH_end_aesni_ccm64_encrypt_blocks:
961global aesni_ccm64_decrypt_blocks
Adam Langleyd9e397b2015-01-22 14:27:53 -0800962
963ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700964aesni_ccm64_decrypt_blocks:
965 mov QWORD[8+rsp],rdi ;WIN64 prologue
966 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800967 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700968$L$SEH_begin_aesni_ccm64_decrypt_blocks:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800969 mov rdi,rcx
970 mov rsi,rdx
971 mov rdx,r8
972 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700973 mov r8,QWORD[40+rsp]
974 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800975
976
Adam Langleye9ada862015-05-11 17:20:37 -0700977 lea rsp,[((-88))+rsp]
978 movaps XMMWORD[rsp],xmm6
979 movaps XMMWORD[16+rsp],xmm7
980 movaps XMMWORD[32+rsp],xmm8
981 movaps XMMWORD[48+rsp],xmm9
982$L$ccm64_dec_body:
983 mov eax,DWORD[240+rcx]
984 movups xmm6,XMMWORD[r8]
985 movdqu xmm3,XMMWORD[r9]
986 movdqa xmm9,XMMWORD[$L$increment64]
987 movdqa xmm7,XMMWORD[$L$bswap_mask]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800988
989 movaps xmm2,xmm6
990 mov r10d,eax
991 mov r11,rcx
992DB 102,15,56,0,247
Adam Langleye9ada862015-05-11 17:20:37 -0700993 movups xmm0,XMMWORD[rcx]
994 movups xmm1,XMMWORD[16+rcx]
995 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800996 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700997$L$oop_enc1_5:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800998DB 102,15,56,220,209
999 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07001000 movups xmm1,XMMWORD[rcx]
1001 lea rcx,[16+rcx]
1002 jnz NEAR $L$oop_enc1_5
Adam Langleyd9e397b2015-01-22 14:27:53 -08001003DB 102,15,56,221,209
1004 shl r10d,4
1005 mov eax,16
Adam Langleye9ada862015-05-11 17:20:37 -07001006 movups xmm8,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001007 paddq xmm6,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07001008 lea rdi,[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001009 sub rax,r10
Adam Langleye9ada862015-05-11 17:20:37 -07001010 lea rcx,[32+r10*1+r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001011 mov r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -07001012 jmp NEAR $L$ccm64_dec_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -08001013ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001014$L$ccm64_dec_outer:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001015 xorps xmm8,xmm2
1016 movdqa xmm2,xmm6
Adam Langleye9ada862015-05-11 17:20:37 -07001017 movups XMMWORD[rsi],xmm8
1018 lea rsi,[16+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001019DB 102,15,56,0,215
1020
1021 sub rdx,1
Adam Langleye9ada862015-05-11 17:20:37 -07001022 jz NEAR $L$ccm64_dec_break
Adam Langleyd9e397b2015-01-22 14:27:53 -08001023
Adam Langleye9ada862015-05-11 17:20:37 -07001024 movups xmm0,XMMWORD[r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001025 mov rax,r10
Adam Langleye9ada862015-05-11 17:20:37 -07001026 movups xmm1,XMMWORD[16+r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001027 xorps xmm8,xmm0
1028 xorps xmm2,xmm0
1029 xorps xmm3,xmm8
Adam Langleye9ada862015-05-11 17:20:37 -07001030 movups xmm0,XMMWORD[32+r11]
1031 jmp NEAR $L$ccm64_dec2_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001032ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001033$L$ccm64_dec2_loop:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001034DB 102,15,56,220,209
1035DB 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -07001036 movups xmm1,XMMWORD[rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001037 add rax,32
1038DB 102,15,56,220,208
1039DB 102,15,56,220,216
Adam Langleye9ada862015-05-11 17:20:37 -07001040 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
1041 jnz NEAR $L$ccm64_dec2_loop
1042 movups xmm8,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001043 paddq xmm6,xmm9
1044DB 102,15,56,220,209
1045DB 102,15,56,220,217
1046DB 102,15,56,221,208
1047DB 102,15,56,221,216
Adam Langleye9ada862015-05-11 17:20:37 -07001048 lea rdi,[16+rdi]
1049 jmp NEAR $L$ccm64_dec_outer
Adam Langleyd9e397b2015-01-22 14:27:53 -08001050
1051ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001052$L$ccm64_dec_break:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001053
Adam Langleye9ada862015-05-11 17:20:37 -07001054 mov eax,DWORD[240+r11]
1055 movups xmm0,XMMWORD[r11]
1056 movups xmm1,XMMWORD[16+r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001057 xorps xmm8,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001058 lea r11,[32+r11]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001059 xorps xmm3,xmm8
Adam Langleye9ada862015-05-11 17:20:37 -07001060$L$oop_enc1_6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001061DB 102,15,56,220,217
1062 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07001063 movups xmm1,XMMWORD[r11]
1064 lea r11,[16+r11]
1065 jnz NEAR $L$oop_enc1_6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001066DB 102,15,56,221,217
Adam Langleye9ada862015-05-11 17:20:37 -07001067 pxor xmm0,xmm0
1068 pxor xmm1,xmm1
1069 pxor xmm2,xmm2
1070 movups XMMWORD[r9],xmm3
1071 pxor xmm3,xmm3
1072 pxor xmm8,xmm8
1073 pxor xmm6,xmm6
1074 movaps xmm6,XMMWORD[rsp]
1075 movaps XMMWORD[rsp],xmm0
1076 movaps xmm7,XMMWORD[16+rsp]
1077 movaps XMMWORD[16+rsp],xmm0
1078 movaps xmm8,XMMWORD[32+rsp]
1079 movaps XMMWORD[32+rsp],xmm0
1080 movaps xmm9,XMMWORD[48+rsp]
1081 movaps XMMWORD[48+rsp],xmm0
1082 lea rsp,[88+rsp]
1083$L$ccm64_dec_ret:
1084 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1085 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001086 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07001087$L$SEH_end_aesni_ccm64_decrypt_blocks:
1088global aesni_ctr32_encrypt_blocks
Adam Langleyd9e397b2015-01-22 14:27:53 -08001089
1090ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001091aesni_ctr32_encrypt_blocks:
1092 mov QWORD[8+rsp],rdi ;WIN64 prologue
1093 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -08001094 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -07001095$L$SEH_begin_aesni_ctr32_encrypt_blocks:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001096 mov rdi,rcx
1097 mov rsi,rdx
1098 mov rdx,r8
1099 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -07001100 mov r8,QWORD[40+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001101
1102
Adam Langleye9ada862015-05-11 17:20:37 -07001103 cmp rdx,1
1104 jne NEAR $L$ctr32_bulk
1105
1106
1107
1108 movups xmm2,XMMWORD[r8]
1109 movups xmm3,XMMWORD[rdi]
1110 mov edx,DWORD[240+rcx]
1111 movups xmm0,XMMWORD[rcx]
1112 movups xmm1,XMMWORD[16+rcx]
1113 lea rcx,[32+rcx]
1114 xorps xmm2,xmm0
1115$L$oop_enc1_7:
1116DB 102,15,56,220,209
1117 dec edx
1118 movups xmm1,XMMWORD[rcx]
1119 lea rcx,[16+rcx]
1120 jnz NEAR $L$oop_enc1_7
1121DB 102,15,56,221,209
1122 pxor xmm0,xmm0
1123 pxor xmm1,xmm1
1124 xorps xmm2,xmm3
1125 pxor xmm3,xmm3
1126 movups XMMWORD[rsi],xmm2
1127 xorps xmm2,xmm2
1128 jmp NEAR $L$ctr32_epilogue
1129
1130ALIGN 16
1131$L$ctr32_bulk:
Robert Sloana94fe052017-02-21 08:49:28 -08001132 lea r11,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001133 push rbp
1134 sub rsp,288
1135 and rsp,-16
Robert Sloana94fe052017-02-21 08:49:28 -08001136 movaps XMMWORD[(-168)+r11],xmm6
1137 movaps XMMWORD[(-152)+r11],xmm7
1138 movaps XMMWORD[(-136)+r11],xmm8
1139 movaps XMMWORD[(-120)+r11],xmm9
1140 movaps XMMWORD[(-104)+r11],xmm10
1141 movaps XMMWORD[(-88)+r11],xmm11
1142 movaps XMMWORD[(-72)+r11],xmm12
1143 movaps XMMWORD[(-56)+r11],xmm13
1144 movaps XMMWORD[(-40)+r11],xmm14
1145 movaps XMMWORD[(-24)+r11],xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07001146$L$ctr32_body:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001147
Adam Langleyd9e397b2015-01-22 14:27:53 -08001148
Adam Langleye9ada862015-05-11 17:20:37 -07001149
1150
1151 movdqu xmm2,XMMWORD[r8]
1152 movdqu xmm0,XMMWORD[rcx]
1153 mov r8d,DWORD[12+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001154 pxor xmm2,xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08001155 mov ebp,DWORD[12+rcx]
Adam Langleye9ada862015-05-11 17:20:37 -07001156 movdqa XMMWORD[rsp],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08001157 bswap r8d
1158 movdqa xmm3,xmm2
1159 movdqa xmm4,xmm2
1160 movdqa xmm5,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001161 movdqa XMMWORD[64+rsp],xmm2
1162 movdqa XMMWORD[80+rsp],xmm2
1163 movdqa XMMWORD[96+rsp],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08001164 mov r10,rdx
Adam Langleye9ada862015-05-11 17:20:37 -07001165 movdqa XMMWORD[112+rsp],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08001166
Adam Langleye9ada862015-05-11 17:20:37 -07001167 lea rax,[1+r8]
1168 lea rdx,[2+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001169 bswap eax
1170 bswap edx
Robert Sloana94fe052017-02-21 08:49:28 -08001171 xor eax,ebp
1172 xor edx,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001173DB 102,15,58,34,216,3
Adam Langleye9ada862015-05-11 17:20:37 -07001174 lea rax,[3+r8]
1175 movdqa XMMWORD[16+rsp],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001176DB 102,15,58,34,226,3
1177 bswap eax
1178 mov rdx,r10
Adam Langleye9ada862015-05-11 17:20:37 -07001179 lea r10,[4+r8]
1180 movdqa XMMWORD[32+rsp],xmm4
Robert Sloana94fe052017-02-21 08:49:28 -08001181 xor eax,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001182 bswap r10d
1183DB 102,15,58,34,232,3
Robert Sloana94fe052017-02-21 08:49:28 -08001184 xor r10d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001185 movdqa XMMWORD[48+rsp],xmm5
1186 lea r9,[5+r8]
1187 mov DWORD[((64+12))+rsp],r10d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001188 bswap r9d
Adam Langleye9ada862015-05-11 17:20:37 -07001189 lea r10,[6+r8]
1190 mov eax,DWORD[240+rcx]
Robert Sloana94fe052017-02-21 08:49:28 -08001191 xor r9d,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001192 bswap r10d
Adam Langleye9ada862015-05-11 17:20:37 -07001193 mov DWORD[((80+12))+rsp],r9d
Robert Sloana94fe052017-02-21 08:49:28 -08001194 xor r10d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001195 lea r9,[7+r8]
1196 mov DWORD[((96+12))+rsp],r10d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001197 bswap r9d
Robert Sloan2424d842017-05-01 07:46:28 -07001198 lea r10,[OPENSSL_ia32cap_P]
Robert Sloan572a4e22017-04-17 10:52:19 -07001199 mov r10d,DWORD[4+r10]
Robert Sloana94fe052017-02-21 08:49:28 -08001200 xor r9d,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001201 and r10d,71303168
Adam Langleye9ada862015-05-11 17:20:37 -07001202 mov DWORD[((112+12))+rsp],r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001203
Adam Langleye9ada862015-05-11 17:20:37 -07001204 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001205
Adam Langleye9ada862015-05-11 17:20:37 -07001206 movdqa xmm6,XMMWORD[64+rsp]
1207 movdqa xmm7,XMMWORD[80+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001208
1209 cmp rdx,8
Adam Langleye9ada862015-05-11 17:20:37 -07001210 jb NEAR $L$ctr32_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001211
1212 sub rdx,6
1213 cmp r10d,4194304
Adam Langleye9ada862015-05-11 17:20:37 -07001214 je NEAR $L$ctr32_6x
Adam Langleyd9e397b2015-01-22 14:27:53 -08001215
Adam Langleye9ada862015-05-11 17:20:37 -07001216 lea rcx,[128+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001217 sub rdx,2
Adam Langleye9ada862015-05-11 17:20:37 -07001218 jmp NEAR $L$ctr32_loop8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001219
1220ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001221$L$ctr32_6x:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001222 shl eax,4
1223 mov r10d,48
Robert Sloana94fe052017-02-21 08:49:28 -08001224 bswap ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001225 lea rcx,[32+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001226 sub r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -07001227 jmp NEAR $L$ctr32_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001228
1229ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001230$L$ctr32_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001231 add r8d,6
Adam Langleye9ada862015-05-11 17:20:37 -07001232 movups xmm0,XMMWORD[((-48))+r10*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001233DB 102,15,56,220,209
1234 mov eax,r8d
Robert Sloana94fe052017-02-21 08:49:28 -08001235 xor eax,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001236DB 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -07001237DB 0x0f,0x38,0xf1,0x44,0x24,12
1238 lea eax,[1+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001239DB 102,15,56,220,225
Robert Sloana94fe052017-02-21 08:49:28 -08001240 xor eax,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001241DB 0x0f,0x38,0xf1,0x44,0x24,28
Adam Langleyd9e397b2015-01-22 14:27:53 -08001242DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -07001243 lea eax,[2+r8]
Robert Sloana94fe052017-02-21 08:49:28 -08001244 xor eax,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001245DB 102,15,56,220,241
Adam Langleye9ada862015-05-11 17:20:37 -07001246DB 0x0f,0x38,0xf1,0x44,0x24,44
1247 lea eax,[3+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001248DB 102,15,56,220,249
Adam Langleye9ada862015-05-11 17:20:37 -07001249 movups xmm1,XMMWORD[((-32))+r10*1+rcx]
Robert Sloana94fe052017-02-21 08:49:28 -08001250 xor eax,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001251
1252DB 102,15,56,220,208
Adam Langleye9ada862015-05-11 17:20:37 -07001253DB 0x0f,0x38,0xf1,0x44,0x24,60
1254 lea eax,[4+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001255DB 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001256 xor eax,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001257DB 0x0f,0x38,0xf1,0x44,0x24,76
Adam Langleyd9e397b2015-01-22 14:27:53 -08001258DB 102,15,56,220,224
Adam Langleye9ada862015-05-11 17:20:37 -07001259 lea eax,[5+r8]
Robert Sloana94fe052017-02-21 08:49:28 -08001260 xor eax,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001261DB 102,15,56,220,232
Adam Langleye9ada862015-05-11 17:20:37 -07001262DB 0x0f,0x38,0xf1,0x44,0x24,92
Adam Langleyd9e397b2015-01-22 14:27:53 -08001263 mov rax,r10
1264DB 102,15,56,220,240
1265DB 102,15,56,220,248
Adam Langleye9ada862015-05-11 17:20:37 -07001266 movups xmm0,XMMWORD[((-16))+r10*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001267
1268 call $L$enc_loop6
1269
Adam Langleye9ada862015-05-11 17:20:37 -07001270 movdqu xmm8,XMMWORD[rdi]
1271 movdqu xmm9,XMMWORD[16+rdi]
1272 movdqu xmm10,XMMWORD[32+rdi]
1273 movdqu xmm11,XMMWORD[48+rdi]
1274 movdqu xmm12,XMMWORD[64+rdi]
1275 movdqu xmm13,XMMWORD[80+rdi]
1276 lea rdi,[96+rdi]
1277 movups xmm1,XMMWORD[((-64))+r10*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001278 pxor xmm8,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001279 movaps xmm2,XMMWORD[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001280 pxor xmm9,xmm3
Adam Langleye9ada862015-05-11 17:20:37 -07001281 movaps xmm3,XMMWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001282 pxor xmm10,xmm4
Adam Langleye9ada862015-05-11 17:20:37 -07001283 movaps xmm4,XMMWORD[32+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001284 pxor xmm11,xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07001285 movaps xmm5,XMMWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001286 pxor xmm12,xmm6
Adam Langleye9ada862015-05-11 17:20:37 -07001287 movaps xmm6,XMMWORD[64+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001288 pxor xmm13,xmm7
Adam Langleye9ada862015-05-11 17:20:37 -07001289 movaps xmm7,XMMWORD[80+rsp]
1290 movdqu XMMWORD[rsi],xmm8
1291 movdqu XMMWORD[16+rsi],xmm9
1292 movdqu XMMWORD[32+rsi],xmm10
1293 movdqu XMMWORD[48+rsi],xmm11
1294 movdqu XMMWORD[64+rsi],xmm12
1295 movdqu XMMWORD[80+rsi],xmm13
1296 lea rsi,[96+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001297
1298 sub rdx,6
Adam Langleye9ada862015-05-11 17:20:37 -07001299 jnc NEAR $L$ctr32_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001300
1301 add rdx,6
Adam Langleye9ada862015-05-11 17:20:37 -07001302 jz NEAR $L$ctr32_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001303
Adam Langleye9ada862015-05-11 17:20:37 -07001304 lea eax,[((-48))+r10]
1305 lea rcx,[((-80))+r10*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001306 neg eax
1307 shr eax,4
Adam Langleye9ada862015-05-11 17:20:37 -07001308 jmp NEAR $L$ctr32_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08001309
1310ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -07001311$L$ctr32_loop8:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001312 add r8d,8
Adam Langleye9ada862015-05-11 17:20:37 -07001313 movdqa xmm8,XMMWORD[96+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001314DB 102,15,56,220,209
1315 mov r9d,r8d
Adam Langleye9ada862015-05-11 17:20:37 -07001316 movdqa xmm9,XMMWORD[112+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001317DB 102,15,56,220,217
1318 bswap r9d
Adam Langleye9ada862015-05-11 17:20:37 -07001319 movups xmm0,XMMWORD[((32-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001320DB 102,15,56,220,225
Robert Sloana94fe052017-02-21 08:49:28 -08001321 xor r9d,ebp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001322 nop
1323DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -07001324 mov DWORD[((0+12))+rsp],r9d
1325 lea r9,[1+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001326DB 102,15,56,220,241
1327DB 102,15,56,220,249
1328DB 102,68,15,56,220,193
1329DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -07001330 movups xmm1,XMMWORD[((48-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001331 bswap r9d
1332DB 102,15,56,220,208
1333DB 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001334 xor r9d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001335DB 0x66,0x90
Adam Langleyd9e397b2015-01-22 14:27:53 -08001336DB 102,15,56,220,224
1337DB 102,15,56,220,232
Adam Langleye9ada862015-05-11 17:20:37 -07001338 mov DWORD[((16+12))+rsp],r9d
1339 lea r9,[2+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001340DB 102,15,56,220,240
1341DB 102,15,56,220,248
1342DB 102,68,15,56,220,192
1343DB 102,68,15,56,220,200
Adam Langleye9ada862015-05-11 17:20:37 -07001344 movups xmm0,XMMWORD[((64-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001345 bswap r9d
1346DB 102,15,56,220,209
1347DB 102,15,56,220,217
Robert Sloana94fe052017-02-21 08:49:28 -08001348 xor r9d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001349DB 0x66,0x90
Adam Langleyd9e397b2015-01-22 14:27:53 -08001350DB 102,15,56,220,225
1351DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -07001352 mov DWORD[((32+12))+rsp],r9d
1353 lea r9,[3+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001354DB 102,15,56,220,241
1355DB 102,15,56,220,249
1356DB 102,68,15,56,220,193
1357DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -07001358 movups xmm1,XMMWORD[((80-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001359 bswap r9d
1360DB 102,15,56,220,208
1361DB 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001362 xor r9d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001363DB 0x66,0x90
Adam Langleyd9e397b2015-01-22 14:27:53 -08001364DB 102,15,56,220,224
1365DB 102,15,56,220,232
Adam Langleye9ada862015-05-11 17:20:37 -07001366 mov DWORD[((48+12))+rsp],r9d
1367 lea r9,[4+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001368DB 102,15,56,220,240
1369DB 102,15,56,220,248
1370DB 102,68,15,56,220,192
1371DB 102,68,15,56,220,200
Adam Langleye9ada862015-05-11 17:20:37 -07001372 movups xmm0,XMMWORD[((96-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001373 bswap r9d
1374DB 102,15,56,220,209
1375DB 102,15,56,220,217
Robert Sloana94fe052017-02-21 08:49:28 -08001376 xor r9d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001377DB 0x66,0x90
Adam Langleyd9e397b2015-01-22 14:27:53 -08001378DB 102,15,56,220,225
1379DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -07001380 mov DWORD[((64+12))+rsp],r9d
1381 lea r9,[5+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001382DB 102,15,56,220,241
1383DB 102,15,56,220,249
1384DB 102,68,15,56,220,193
1385DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -07001386 movups xmm1,XMMWORD[((112-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001387 bswap r9d
1388DB 102,15,56,220,208
1389DB 102,15,56,220,216
Robert Sloana94fe052017-02-21 08:49:28 -08001390 xor r9d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001391DB 0x66,0x90
Adam Langleyd9e397b2015-01-22 14:27:53 -08001392DB 102,15,56,220,224
1393DB 102,15,56,220,232
Adam Langleye9ada862015-05-11 17:20:37 -07001394 mov DWORD[((80+12))+rsp],r9d
1395 lea r9,[6+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001396DB 102,15,56,220,240
1397DB 102,15,56,220,248
1398DB 102,68,15,56,220,192
1399DB 102,68,15,56,220,200
Adam Langleye9ada862015-05-11 17:20:37 -07001400 movups xmm0,XMMWORD[((128-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001401 bswap r9d
1402DB 102,15,56,220,209
1403DB 102,15,56,220,217
Robert Sloana94fe052017-02-21 08:49:28 -08001404 xor r9d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001405DB 0x66,0x90
Adam Langleyd9e397b2015-01-22 14:27:53 -08001406DB 102,15,56,220,225
1407DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -07001408 mov DWORD[((96+12))+rsp],r9d
1409 lea r9,[7+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001410DB 102,15,56,220,241
1411DB 102,15,56,220,249
1412DB 102,68,15,56,220,193
1413DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -07001414 movups xmm1,XMMWORD[((144-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001415 bswap r9d
1416DB 102,15,56,220,208
1417DB 102,15,56,220,216
1418DB 102,15,56,220,224
Robert Sloana94fe052017-02-21 08:49:28 -08001419 xor r9d,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001420 movdqu xmm10,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001421DB 102,15,56,220,232
Adam Langleye9ada862015-05-11 17:20:37 -07001422 mov DWORD[((112+12))+rsp],r9d
Adam Langleyd9e397b2015-01-22 14:27:53 -08001423 cmp eax,11
1424DB 102,15,56,220,240
1425DB 102,15,56,220,248
1426DB 102,68,15,56,220,192
1427DB 102,68,15,56,220,200
Adam Langleye9ada862015-05-11 17:20:37 -07001428 movups xmm0,XMMWORD[((160-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001429
Adam Langleye9ada862015-05-11 17:20:37 -07001430 jb NEAR $L$ctr32_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001431
1432DB 102,15,56,220,209
1433DB 102,15,56,220,217
1434DB 102,15,56,220,225
1435DB 102,15,56,220,233
1436DB 102,15,56,220,241
1437DB 102,15,56,220,249
1438DB 102,68,15,56,220,193
1439DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -07001440 movups xmm1,XMMWORD[((176-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001441
1442DB 102,15,56,220,208
1443DB 102,15,56,220,216
1444DB 102,15,56,220,224
1445DB 102,15,56,220,232
1446DB 102,15,56,220,240
1447DB 102,15,56,220,248
1448DB 102,68,15,56,220,192
1449DB 102,68,15,56,220,200
Adam Langleye9ada862015-05-11 17:20:37 -07001450 movups xmm0,XMMWORD[((192-128))+rcx]
1451 je NEAR $L$ctr32_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001452
1453DB 102,15,56,220,209
1454DB 102,15,56,220,217
1455DB 102,15,56,220,225
1456DB 102,15,56,220,233
1457DB 102,15,56,220,241
1458DB 102,15,56,220,249
1459DB 102,68,15,56,220,193
1460DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -07001461 movups xmm1,XMMWORD[((208-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001462
1463DB 102,15,56,220,208
1464DB 102,15,56,220,216
1465DB 102,15,56,220,224
1466DB 102,15,56,220,232
1467DB 102,15,56,220,240
1468DB 102,15,56,220,248
1469DB 102,68,15,56,220,192
1470DB 102,68,15,56,220,200
Adam Langleye9ada862015-05-11 17:20:37 -07001471 movups xmm0,XMMWORD[((224-128))+rcx]
1472 jmp NEAR $L$ctr32_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001473
1474ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001475$L$ctr32_enc_done:
1476 movdqu xmm11,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001477 pxor xmm10,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001478 movdqu xmm12,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001479 pxor xmm11,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001480 movdqu xmm13,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001481 pxor xmm12,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001482 movdqu xmm14,XMMWORD[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001483 pxor xmm13,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001484 movdqu xmm15,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001485 pxor xmm14,xmm0
1486 pxor xmm15,xmm0
1487DB 102,15,56,220,209
1488DB 102,15,56,220,217
1489DB 102,15,56,220,225
1490DB 102,15,56,220,233
1491DB 102,15,56,220,241
1492DB 102,15,56,220,249
1493DB 102,68,15,56,220,193
1494DB 102,68,15,56,220,201
Adam Langleye9ada862015-05-11 17:20:37 -07001495 movdqu xmm1,XMMWORD[96+rdi]
1496 lea rdi,[128+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001497
1498DB 102,65,15,56,221,210
1499 pxor xmm1,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001500 movdqu xmm10,XMMWORD[((112-128))+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001501DB 102,65,15,56,221,219
1502 pxor xmm10,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001503 movdqa xmm11,XMMWORD[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001504DB 102,65,15,56,221,228
1505DB 102,65,15,56,221,237
Adam Langleye9ada862015-05-11 17:20:37 -07001506 movdqa xmm12,XMMWORD[16+rsp]
1507 movdqa xmm13,XMMWORD[32+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001508DB 102,65,15,56,221,246
1509DB 102,65,15,56,221,255
Adam Langleye9ada862015-05-11 17:20:37 -07001510 movdqa xmm14,XMMWORD[48+rsp]
1511 movdqa xmm15,XMMWORD[64+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001512DB 102,68,15,56,221,193
Adam Langleye9ada862015-05-11 17:20:37 -07001513 movdqa xmm0,XMMWORD[80+rsp]
1514 movups xmm1,XMMWORD[((16-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001515DB 102,69,15,56,221,202
1516
Adam Langleye9ada862015-05-11 17:20:37 -07001517 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08001518 movdqa xmm2,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07001519 movups XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001520 movdqa xmm3,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07001521 movups XMMWORD[32+rsi],xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001522 movdqa xmm4,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07001523 movups XMMWORD[48+rsi],xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08001524 movdqa xmm5,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07001525 movups XMMWORD[64+rsi],xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001526 movdqa xmm6,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07001527 movups XMMWORD[80+rsi],xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08001528 movdqa xmm7,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001529 movups XMMWORD[96+rsi],xmm8
1530 movups XMMWORD[112+rsi],xmm9
1531 lea rsi,[128+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001532
1533 sub rdx,8
Adam Langleye9ada862015-05-11 17:20:37 -07001534 jnc NEAR $L$ctr32_loop8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001535
1536 add rdx,8
Adam Langleye9ada862015-05-11 17:20:37 -07001537 jz NEAR $L$ctr32_done
1538 lea rcx,[((-128))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001539
Adam Langleye9ada862015-05-11 17:20:37 -07001540$L$ctr32_tail:
1541
1542
1543 lea rcx,[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001544 cmp rdx,4
Adam Langleye9ada862015-05-11 17:20:37 -07001545 jb NEAR $L$ctr32_loop3
1546 je NEAR $L$ctr32_loop4
1547
Adam Langleyd9e397b2015-01-22 14:27:53 -08001548
1549 shl eax,4
Adam Langleye9ada862015-05-11 17:20:37 -07001550 movdqa xmm8,XMMWORD[96+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001551 pxor xmm9,xmm9
1552
Adam Langleye9ada862015-05-11 17:20:37 -07001553 movups xmm0,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001554DB 102,15,56,220,209
1555DB 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -07001556 lea rcx,[((32-16))+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001557 neg rax
1558DB 102,15,56,220,225
1559 add rax,16
Adam Langleye9ada862015-05-11 17:20:37 -07001560 movups xmm10,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001561DB 102,15,56,220,233
1562DB 102,15,56,220,241
Adam Langleye9ada862015-05-11 17:20:37 -07001563 movups xmm11,XMMWORD[16+rdi]
1564 movups xmm12,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001565DB 102,15,56,220,249
1566DB 102,68,15,56,220,193
1567
1568 call $L$enc_loop8_enter
1569
Adam Langleye9ada862015-05-11 17:20:37 -07001570 movdqu xmm13,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001571 pxor xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07001572 movdqu xmm10,XMMWORD[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001573 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07001574 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08001575 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07001576 movdqu XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001577 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07001578 movdqu XMMWORD[32+rsi],xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001579 pxor xmm6,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07001580 movdqu XMMWORD[48+rsi],xmm5
1581 movdqu XMMWORD[64+rsi],xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001582 cmp rdx,6
Adam Langleye9ada862015-05-11 17:20:37 -07001583 jb NEAR $L$ctr32_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001584
Adam Langleye9ada862015-05-11 17:20:37 -07001585 movups xmm11,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001586 xorps xmm7,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07001587 movups XMMWORD[80+rsi],xmm7
1588 je NEAR $L$ctr32_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001589
Adam Langleye9ada862015-05-11 17:20:37 -07001590 movups xmm12,XMMWORD[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001591 xorps xmm8,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07001592 movups XMMWORD[96+rsi],xmm8
1593 jmp NEAR $L$ctr32_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001594
1595ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -07001596$L$ctr32_loop4:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001597DB 102,15,56,220,209
Adam Langleye9ada862015-05-11 17:20:37 -07001598 lea rcx,[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001599 dec eax
1600DB 102,15,56,220,217
1601DB 102,15,56,220,225
1602DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -07001603 movups xmm1,XMMWORD[rcx]
1604 jnz NEAR $L$ctr32_loop4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001605DB 102,15,56,221,209
1606DB 102,15,56,221,217
Adam Langleye9ada862015-05-11 17:20:37 -07001607 movups xmm10,XMMWORD[rdi]
1608 movups xmm11,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001609DB 102,15,56,221,225
1610DB 102,15,56,221,233
Adam Langleye9ada862015-05-11 17:20:37 -07001611 movups xmm12,XMMWORD[32+rdi]
1612 movups xmm13,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001613
1614 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07001615 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08001616 xorps xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07001617 movups XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001618 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07001619 movdqu XMMWORD[32+rsi],xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001620 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07001621 movdqu XMMWORD[48+rsi],xmm5
1622 jmp NEAR $L$ctr32_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001623
1624ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -07001625$L$ctr32_loop3:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001626DB 102,15,56,220,209
Adam Langleye9ada862015-05-11 17:20:37 -07001627 lea rcx,[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001628 dec eax
1629DB 102,15,56,220,217
1630DB 102,15,56,220,225
Adam Langleye9ada862015-05-11 17:20:37 -07001631 movups xmm1,XMMWORD[rcx]
1632 jnz NEAR $L$ctr32_loop3
Adam Langleyd9e397b2015-01-22 14:27:53 -08001633DB 102,15,56,221,209
1634DB 102,15,56,221,217
1635DB 102,15,56,221,225
1636
Adam Langleye9ada862015-05-11 17:20:37 -07001637 movups xmm10,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001638 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07001639 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08001640 cmp rdx,2
Adam Langleye9ada862015-05-11 17:20:37 -07001641 jb NEAR $L$ctr32_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001642
Adam Langleye9ada862015-05-11 17:20:37 -07001643 movups xmm11,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001644 xorps xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07001645 movups XMMWORD[16+rsi],xmm3
1646 je NEAR $L$ctr32_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001647
Adam Langleye9ada862015-05-11 17:20:37 -07001648 movups xmm12,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001649 xorps xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07001650 movups XMMWORD[32+rsi],xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001651
Adam Langleye9ada862015-05-11 17:20:37 -07001652$L$ctr32_done:
1653 xorps xmm0,xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08001654 xor ebp,ebp
Adam Langleye9ada862015-05-11 17:20:37 -07001655 pxor xmm1,xmm1
1656 pxor xmm2,xmm2
1657 pxor xmm3,xmm3
1658 pxor xmm4,xmm4
1659 pxor xmm5,xmm5
Robert Sloana94fe052017-02-21 08:49:28 -08001660 movaps xmm6,XMMWORD[((-168))+r11]
1661 movaps XMMWORD[(-168)+r11],xmm0
1662 movaps xmm7,XMMWORD[((-152))+r11]
1663 movaps XMMWORD[(-152)+r11],xmm0
1664 movaps xmm8,XMMWORD[((-136))+r11]
1665 movaps XMMWORD[(-136)+r11],xmm0
1666 movaps xmm9,XMMWORD[((-120))+r11]
1667 movaps XMMWORD[(-120)+r11],xmm0
1668 movaps xmm10,XMMWORD[((-104))+r11]
1669 movaps XMMWORD[(-104)+r11],xmm0
1670 movaps xmm11,XMMWORD[((-88))+r11]
1671 movaps XMMWORD[(-88)+r11],xmm0
1672 movaps xmm12,XMMWORD[((-72))+r11]
1673 movaps XMMWORD[(-72)+r11],xmm0
1674 movaps xmm13,XMMWORD[((-56))+r11]
1675 movaps XMMWORD[(-56)+r11],xmm0
1676 movaps xmm14,XMMWORD[((-40))+r11]
1677 movaps XMMWORD[(-40)+r11],xmm0
1678 movaps xmm15,XMMWORD[((-24))+r11]
1679 movaps XMMWORD[(-24)+r11],xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001680 movaps XMMWORD[rsp],xmm0
1681 movaps XMMWORD[16+rsp],xmm0
1682 movaps XMMWORD[32+rsp],xmm0
1683 movaps XMMWORD[48+rsp],xmm0
1684 movaps XMMWORD[64+rsp],xmm0
1685 movaps XMMWORD[80+rsp],xmm0
1686 movaps XMMWORD[96+rsp],xmm0
1687 movaps XMMWORD[112+rsp],xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08001688 mov rbp,QWORD[((-8))+r11]
1689 lea rsp,[r11]
Adam Langleye9ada862015-05-11 17:20:37 -07001690$L$ctr32_epilogue:
1691 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1692 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001693 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07001694$L$SEH_end_aesni_ctr32_encrypt_blocks:
1695global aesni_xts_encrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -08001696
1697ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07001698aesni_xts_encrypt:
1699 mov QWORD[8+rsp],rdi ;WIN64 prologue
1700 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -08001701 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -07001702$L$SEH_begin_aesni_xts_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001703 mov rdi,rcx
1704 mov rsi,rdx
1705 mov rdx,r8
1706 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -07001707 mov r8,QWORD[40+rsp]
1708 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001709
1710
Robert Sloana94fe052017-02-21 08:49:28 -08001711 lea r11,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001712 push rbp
1713 sub rsp,272
1714 and rsp,-16
Robert Sloana94fe052017-02-21 08:49:28 -08001715 movaps XMMWORD[(-168)+r11],xmm6
1716 movaps XMMWORD[(-152)+r11],xmm7
1717 movaps XMMWORD[(-136)+r11],xmm8
1718 movaps XMMWORD[(-120)+r11],xmm9
1719 movaps XMMWORD[(-104)+r11],xmm10
1720 movaps XMMWORD[(-88)+r11],xmm11
1721 movaps XMMWORD[(-72)+r11],xmm12
1722 movaps XMMWORD[(-56)+r11],xmm13
1723 movaps XMMWORD[(-40)+r11],xmm14
1724 movaps XMMWORD[(-24)+r11],xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07001725$L$xts_enc_body:
Adam Langleye9ada862015-05-11 17:20:37 -07001726 movups xmm2,XMMWORD[r9]
1727 mov eax,DWORD[240+r8]
1728 mov r10d,DWORD[240+rcx]
1729 movups xmm0,XMMWORD[r8]
1730 movups xmm1,XMMWORD[16+r8]
1731 lea r8,[32+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001732 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001733$L$oop_enc1_8:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001734DB 102,15,56,220,209
1735 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07001736 movups xmm1,XMMWORD[r8]
1737 lea r8,[16+r8]
1738 jnz NEAR $L$oop_enc1_8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001739DB 102,15,56,221,209
Adam Langleye9ada862015-05-11 17:20:37 -07001740 movups xmm0,XMMWORD[rcx]
Robert Sloana94fe052017-02-21 08:49:28 -08001741 mov rbp,rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08001742 mov eax,r10d
1743 shl r10d,4
1744 mov r9,rdx
1745 and rdx,-16
1746
Adam Langleye9ada862015-05-11 17:20:37 -07001747 movups xmm1,XMMWORD[16+r10*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001748
Adam Langleye9ada862015-05-11 17:20:37 -07001749 movdqa xmm8,XMMWORD[$L$xts_magic]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001750 movdqa xmm15,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07001751 pshufd xmm9,xmm2,0x5f
Adam Langleyd9e397b2015-01-22 14:27:53 -08001752 pxor xmm1,xmm0
1753 movdqa xmm14,xmm9
1754 paddd xmm9,xmm9
1755 movdqa xmm10,xmm15
1756 psrad xmm14,31
1757 paddq xmm15,xmm15
1758 pand xmm14,xmm8
1759 pxor xmm10,xmm0
1760 pxor xmm15,xmm14
1761 movdqa xmm14,xmm9
1762 paddd xmm9,xmm9
1763 movdqa xmm11,xmm15
1764 psrad xmm14,31
1765 paddq xmm15,xmm15
1766 pand xmm14,xmm8
1767 pxor xmm11,xmm0
1768 pxor xmm15,xmm14
1769 movdqa xmm14,xmm9
1770 paddd xmm9,xmm9
1771 movdqa xmm12,xmm15
1772 psrad xmm14,31
1773 paddq xmm15,xmm15
1774 pand xmm14,xmm8
1775 pxor xmm12,xmm0
1776 pxor xmm15,xmm14
1777 movdqa xmm14,xmm9
1778 paddd xmm9,xmm9
1779 movdqa xmm13,xmm15
1780 psrad xmm14,31
1781 paddq xmm15,xmm15
1782 pand xmm14,xmm8
1783 pxor xmm13,xmm0
1784 pxor xmm15,xmm14
1785 movdqa xmm14,xmm15
1786 psrad xmm9,31
1787 paddq xmm15,xmm15
1788 pand xmm9,xmm8
1789 pxor xmm14,xmm0
1790 pxor xmm15,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07001791 movaps XMMWORD[96+rsp],xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08001792
1793 sub rdx,16*6
Adam Langleye9ada862015-05-11 17:20:37 -07001794 jc NEAR $L$xts_enc_short
Adam Langleyd9e397b2015-01-22 14:27:53 -08001795
1796 mov eax,16+96
Robert Sloana94fe052017-02-21 08:49:28 -08001797 lea rcx,[32+r10*1+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001798 sub rax,r10
Robert Sloana94fe052017-02-21 08:49:28 -08001799 movups xmm1,XMMWORD[16+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001800 mov r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -07001801 lea r8,[$L$xts_magic]
1802 jmp NEAR $L$xts_enc_grandloop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001803
1804ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -07001805$L$xts_enc_grandloop:
1806 movdqu xmm2,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001807 movdqa xmm8,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001808 movdqu xmm3,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001809 pxor xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07001810 movdqu xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001811 pxor xmm3,xmm11
1812DB 102,15,56,220,209
Adam Langleye9ada862015-05-11 17:20:37 -07001813 movdqu xmm5,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001814 pxor xmm4,xmm12
1815DB 102,15,56,220,217
Adam Langleye9ada862015-05-11 17:20:37 -07001816 movdqu xmm6,XMMWORD[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001817 pxor xmm5,xmm13
1818DB 102,15,56,220,225
Adam Langleye9ada862015-05-11 17:20:37 -07001819 movdqu xmm7,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001820 pxor xmm8,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07001821 movdqa xmm9,XMMWORD[96+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001822 pxor xmm6,xmm14
1823DB 102,15,56,220,233
Robert Sloana94fe052017-02-21 08:49:28 -08001824 movups xmm0,XMMWORD[32+rbp]
Adam Langleye9ada862015-05-11 17:20:37 -07001825 lea rdi,[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001826 pxor xmm7,xmm8
1827
1828 pxor xmm10,xmm9
1829DB 102,15,56,220,241
1830 pxor xmm11,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07001831 movdqa XMMWORD[rsp],xmm10
Adam Langleyd9e397b2015-01-22 14:27:53 -08001832DB 102,15,56,220,249
Robert Sloana94fe052017-02-21 08:49:28 -08001833 movups xmm1,XMMWORD[48+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001834 pxor xmm12,xmm9
1835
1836DB 102,15,56,220,208
1837 pxor xmm13,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07001838 movdqa XMMWORD[16+rsp],xmm11
Adam Langleyd9e397b2015-01-22 14:27:53 -08001839DB 102,15,56,220,216
1840 pxor xmm14,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07001841 movdqa XMMWORD[32+rsp],xmm12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001842DB 102,15,56,220,224
1843DB 102,15,56,220,232
1844 pxor xmm8,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07001845 movdqa XMMWORD[64+rsp],xmm14
Adam Langleyd9e397b2015-01-22 14:27:53 -08001846DB 102,15,56,220,240
1847DB 102,15,56,220,248
Robert Sloana94fe052017-02-21 08:49:28 -08001848 movups xmm0,XMMWORD[64+rbp]
Adam Langleye9ada862015-05-11 17:20:37 -07001849 movdqa XMMWORD[80+rsp],xmm8
1850 pshufd xmm9,xmm15,0x5f
1851 jmp NEAR $L$xts_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001852ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -07001853$L$xts_enc_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001854DB 102,15,56,220,209
1855DB 102,15,56,220,217
1856DB 102,15,56,220,225
1857DB 102,15,56,220,233
1858DB 102,15,56,220,241
1859DB 102,15,56,220,249
Adam Langleye9ada862015-05-11 17:20:37 -07001860 movups xmm1,XMMWORD[((-64))+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001861 add rax,32
1862
1863DB 102,15,56,220,208
1864DB 102,15,56,220,216
1865DB 102,15,56,220,224
1866DB 102,15,56,220,232
1867DB 102,15,56,220,240
1868DB 102,15,56,220,248
Adam Langleye9ada862015-05-11 17:20:37 -07001869 movups xmm0,XMMWORD[((-80))+rax*1+rcx]
1870 jnz NEAR $L$xts_enc_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08001871
Adam Langleye9ada862015-05-11 17:20:37 -07001872 movdqa xmm8,XMMWORD[r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001873 movdqa xmm14,xmm9
1874 paddd xmm9,xmm9
1875DB 102,15,56,220,209
1876 paddq xmm15,xmm15
1877 psrad xmm14,31
1878DB 102,15,56,220,217
1879 pand xmm14,xmm8
Robert Sloana94fe052017-02-21 08:49:28 -08001880 movups xmm10,XMMWORD[rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001881DB 102,15,56,220,225
1882DB 102,15,56,220,233
1883DB 102,15,56,220,241
1884 pxor xmm15,xmm14
1885 movaps xmm11,xmm10
1886DB 102,15,56,220,249
Adam Langleye9ada862015-05-11 17:20:37 -07001887 movups xmm1,XMMWORD[((-64))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001888
1889 movdqa xmm14,xmm9
1890DB 102,15,56,220,208
1891 paddd xmm9,xmm9
1892 pxor xmm10,xmm15
1893DB 102,15,56,220,216
1894 psrad xmm14,31
1895 paddq xmm15,xmm15
1896DB 102,15,56,220,224
1897DB 102,15,56,220,232
1898 pand xmm14,xmm8
1899 movaps xmm12,xmm11
1900DB 102,15,56,220,240
1901 pxor xmm15,xmm14
1902 movdqa xmm14,xmm9
1903DB 102,15,56,220,248
Adam Langleye9ada862015-05-11 17:20:37 -07001904 movups xmm0,XMMWORD[((-48))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001905
1906 paddd xmm9,xmm9
1907DB 102,15,56,220,209
1908 pxor xmm11,xmm15
1909 psrad xmm14,31
1910DB 102,15,56,220,217
1911 paddq xmm15,xmm15
1912 pand xmm14,xmm8
1913DB 102,15,56,220,225
1914DB 102,15,56,220,233
Adam Langleye9ada862015-05-11 17:20:37 -07001915 movdqa XMMWORD[48+rsp],xmm13
Adam Langleyd9e397b2015-01-22 14:27:53 -08001916 pxor xmm15,xmm14
1917DB 102,15,56,220,241
1918 movaps xmm13,xmm12
1919 movdqa xmm14,xmm9
1920DB 102,15,56,220,249
Adam Langleye9ada862015-05-11 17:20:37 -07001921 movups xmm1,XMMWORD[((-32))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001922
1923 paddd xmm9,xmm9
1924DB 102,15,56,220,208
1925 pxor xmm12,xmm15
1926 psrad xmm14,31
1927DB 102,15,56,220,216
1928 paddq xmm15,xmm15
1929 pand xmm14,xmm8
1930DB 102,15,56,220,224
1931DB 102,15,56,220,232
1932DB 102,15,56,220,240
1933 pxor xmm15,xmm14
1934 movaps xmm14,xmm13
1935DB 102,15,56,220,248
1936
1937 movdqa xmm0,xmm9
1938 paddd xmm9,xmm9
1939DB 102,15,56,220,209
1940 pxor xmm13,xmm15
1941 psrad xmm0,31
1942DB 102,15,56,220,217
1943 paddq xmm15,xmm15
1944 pand xmm0,xmm8
1945DB 102,15,56,220,225
1946DB 102,15,56,220,233
1947 pxor xmm15,xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08001948 movups xmm0,XMMWORD[rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001949DB 102,15,56,220,241
1950DB 102,15,56,220,249
Robert Sloana94fe052017-02-21 08:49:28 -08001951 movups xmm1,XMMWORD[16+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001952
1953 pxor xmm14,xmm15
1954DB 102,15,56,221,84,36,0
1955 psrad xmm9,31
1956 paddq xmm15,xmm15
1957DB 102,15,56,221,92,36,16
1958DB 102,15,56,221,100,36,32
1959 pand xmm9,xmm8
1960 mov rax,r10
1961DB 102,15,56,221,108,36,48
1962DB 102,15,56,221,116,36,64
1963DB 102,15,56,221,124,36,80
1964 pxor xmm15,xmm9
1965
Adam Langleye9ada862015-05-11 17:20:37 -07001966 lea rsi,[96+rsi]
1967 movups XMMWORD[(-96)+rsi],xmm2
1968 movups XMMWORD[(-80)+rsi],xmm3
1969 movups XMMWORD[(-64)+rsi],xmm4
1970 movups XMMWORD[(-48)+rsi],xmm5
1971 movups XMMWORD[(-32)+rsi],xmm6
1972 movups XMMWORD[(-16)+rsi],xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08001973 sub rdx,16*6
Adam Langleye9ada862015-05-11 17:20:37 -07001974 jnc NEAR $L$xts_enc_grandloop
Adam Langleyd9e397b2015-01-22 14:27:53 -08001975
1976 mov eax,16+96
1977 sub eax,r10d
Robert Sloana94fe052017-02-21 08:49:28 -08001978 mov rcx,rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001979 shr eax,4
1980
Adam Langleye9ada862015-05-11 17:20:37 -07001981$L$xts_enc_short:
1982
Adam Langleyd9e397b2015-01-22 14:27:53 -08001983 mov r10d,eax
1984 pxor xmm10,xmm0
1985 add rdx,16*6
Adam Langleye9ada862015-05-11 17:20:37 -07001986 jz NEAR $L$xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08001987
1988 pxor xmm11,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001989 cmp rdx,0x20
1990 jb NEAR $L$xts_enc_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08001991 pxor xmm12,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001992 je NEAR $L$xts_enc_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08001993
1994 pxor xmm13,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001995 cmp rdx,0x40
1996 jb NEAR $L$xts_enc_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08001997 pxor xmm14,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07001998 je NEAR $L$xts_enc_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08001999
Adam Langleye9ada862015-05-11 17:20:37 -07002000 movdqu xmm2,XMMWORD[rdi]
2001 movdqu xmm3,XMMWORD[16+rdi]
2002 movdqu xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002003 pxor xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002004 movdqu xmm5,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002005 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07002006 movdqu xmm6,XMMWORD[64+rdi]
2007 lea rdi,[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002008 pxor xmm4,xmm12
2009 pxor xmm5,xmm13
2010 pxor xmm6,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07002011 pxor xmm7,xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08002012
2013 call _aesni_encrypt6
2014
2015 xorps xmm2,xmm10
2016 movdqa xmm10,xmm15
2017 xorps xmm3,xmm11
2018 xorps xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07002019 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002020 xorps xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07002021 movdqu XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08002022 xorps xmm6,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07002023 movdqu XMMWORD[32+rsi],xmm4
2024 movdqu XMMWORD[48+rsi],xmm5
2025 movdqu XMMWORD[64+rsi],xmm6
2026 lea rsi,[80+rsi]
2027 jmp NEAR $L$xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002028
2029ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002030$L$xts_enc_one:
2031 movups xmm2,XMMWORD[rdi]
2032 lea rdi,[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002033 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002034 movups xmm0,XMMWORD[rcx]
2035 movups xmm1,XMMWORD[16+rcx]
2036 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002037 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002038$L$oop_enc1_9:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002039DB 102,15,56,220,209
2040 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07002041 movups xmm1,XMMWORD[rcx]
2042 lea rcx,[16+rcx]
2043 jnz NEAR $L$oop_enc1_9
Adam Langleyd9e397b2015-01-22 14:27:53 -08002044DB 102,15,56,221,209
2045 xorps xmm2,xmm10
2046 movdqa xmm10,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07002047 movups XMMWORD[rsi],xmm2
2048 lea rsi,[16+rsi]
2049 jmp NEAR $L$xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002050
2051ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002052$L$xts_enc_two:
2053 movups xmm2,XMMWORD[rdi]
2054 movups xmm3,XMMWORD[16+rdi]
2055 lea rdi,[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002056 xorps xmm2,xmm10
2057 xorps xmm3,xmm11
2058
2059 call _aesni_encrypt2
2060
2061 xorps xmm2,xmm10
2062 movdqa xmm10,xmm12
2063 xorps xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07002064 movups XMMWORD[rsi],xmm2
2065 movups XMMWORD[16+rsi],xmm3
2066 lea rsi,[32+rsi]
2067 jmp NEAR $L$xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002068
2069ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002070$L$xts_enc_three:
2071 movups xmm2,XMMWORD[rdi]
2072 movups xmm3,XMMWORD[16+rdi]
2073 movups xmm4,XMMWORD[32+rdi]
2074 lea rdi,[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002075 xorps xmm2,xmm10
2076 xorps xmm3,xmm11
2077 xorps xmm4,xmm12
2078
2079 call _aesni_encrypt3
2080
2081 xorps xmm2,xmm10
2082 movdqa xmm10,xmm13
2083 xorps xmm3,xmm11
2084 xorps xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07002085 movups XMMWORD[rsi],xmm2
2086 movups XMMWORD[16+rsi],xmm3
2087 movups XMMWORD[32+rsi],xmm4
2088 lea rsi,[48+rsi]
2089 jmp NEAR $L$xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002090
2091ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002092$L$xts_enc_four:
2093 movups xmm2,XMMWORD[rdi]
2094 movups xmm3,XMMWORD[16+rdi]
2095 movups xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002096 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002097 movups xmm5,XMMWORD[48+rdi]
2098 lea rdi,[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002099 xorps xmm3,xmm11
2100 xorps xmm4,xmm12
2101 xorps xmm5,xmm13
2102
2103 call _aesni_encrypt4
2104
2105 pxor xmm2,xmm10
2106 movdqa xmm10,xmm14
2107 pxor xmm3,xmm11
2108 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07002109 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002110 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07002111 movdqu XMMWORD[16+rsi],xmm3
2112 movdqu XMMWORD[32+rsi],xmm4
2113 movdqu XMMWORD[48+rsi],xmm5
2114 lea rsi,[64+rsi]
2115 jmp NEAR $L$xts_enc_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002116
2117ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002118$L$xts_enc_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002119 and r9,15
Adam Langleye9ada862015-05-11 17:20:37 -07002120 jz NEAR $L$xts_enc_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08002121 mov rdx,r9
2122
Adam Langleye9ada862015-05-11 17:20:37 -07002123$L$xts_enc_steal:
2124 movzx eax,BYTE[rdi]
2125 movzx ecx,BYTE[((-16))+rsi]
2126 lea rdi,[1+rdi]
2127 mov BYTE[((-16))+rsi],al
2128 mov BYTE[rsi],cl
2129 lea rsi,[1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002130 sub rdx,1
Adam Langleye9ada862015-05-11 17:20:37 -07002131 jnz NEAR $L$xts_enc_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08002132
2133 sub rsi,r9
Robert Sloana94fe052017-02-21 08:49:28 -08002134 mov rcx,rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002135 mov eax,r10d
2136
Adam Langleye9ada862015-05-11 17:20:37 -07002137 movups xmm2,XMMWORD[((-16))+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002138 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002139 movups xmm0,XMMWORD[rcx]
2140 movups xmm1,XMMWORD[16+rcx]
2141 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002142 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002143$L$oop_enc1_10:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002144DB 102,15,56,220,209
2145 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07002146 movups xmm1,XMMWORD[rcx]
2147 lea rcx,[16+rcx]
2148 jnz NEAR $L$oop_enc1_10
Adam Langleyd9e397b2015-01-22 14:27:53 -08002149DB 102,15,56,221,209
2150 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002151 movups XMMWORD[(-16)+rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002152
Adam Langleye9ada862015-05-11 17:20:37 -07002153$L$xts_enc_ret:
2154 xorps xmm0,xmm0
2155 pxor xmm1,xmm1
2156 pxor xmm2,xmm2
2157 pxor xmm3,xmm3
2158 pxor xmm4,xmm4
2159 pxor xmm5,xmm5
Robert Sloana94fe052017-02-21 08:49:28 -08002160 movaps xmm6,XMMWORD[((-168))+r11]
2161 movaps XMMWORD[(-168)+r11],xmm0
2162 movaps xmm7,XMMWORD[((-152))+r11]
2163 movaps XMMWORD[(-152)+r11],xmm0
2164 movaps xmm8,XMMWORD[((-136))+r11]
2165 movaps XMMWORD[(-136)+r11],xmm0
2166 movaps xmm9,XMMWORD[((-120))+r11]
2167 movaps XMMWORD[(-120)+r11],xmm0
2168 movaps xmm10,XMMWORD[((-104))+r11]
2169 movaps XMMWORD[(-104)+r11],xmm0
2170 movaps xmm11,XMMWORD[((-88))+r11]
2171 movaps XMMWORD[(-88)+r11],xmm0
2172 movaps xmm12,XMMWORD[((-72))+r11]
2173 movaps XMMWORD[(-72)+r11],xmm0
2174 movaps xmm13,XMMWORD[((-56))+r11]
2175 movaps XMMWORD[(-56)+r11],xmm0
2176 movaps xmm14,XMMWORD[((-40))+r11]
2177 movaps XMMWORD[(-40)+r11],xmm0
2178 movaps xmm15,XMMWORD[((-24))+r11]
2179 movaps XMMWORD[(-24)+r11],xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002180 movaps XMMWORD[rsp],xmm0
2181 movaps XMMWORD[16+rsp],xmm0
2182 movaps XMMWORD[32+rsp],xmm0
2183 movaps XMMWORD[48+rsp],xmm0
2184 movaps XMMWORD[64+rsp],xmm0
2185 movaps XMMWORD[80+rsp],xmm0
2186 movaps XMMWORD[96+rsp],xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08002187 mov rbp,QWORD[((-8))+r11]
2188 lea rsp,[r11]
Adam Langleye9ada862015-05-11 17:20:37 -07002189$L$xts_enc_epilogue:
2190 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2191 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002192 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07002193$L$SEH_end_aesni_xts_encrypt:
2194global aesni_xts_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -08002195
2196ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002197aesni_xts_decrypt:
2198 mov QWORD[8+rsp],rdi ;WIN64 prologue
2199 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -08002200 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -07002201$L$SEH_begin_aesni_xts_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002202 mov rdi,rcx
2203 mov rsi,rdx
2204 mov rdx,r8
2205 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -07002206 mov r8,QWORD[40+rsp]
2207 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002208
2209
Robert Sloana94fe052017-02-21 08:49:28 -08002210 lea r11,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002211 push rbp
2212 sub rsp,272
2213 and rsp,-16
Robert Sloana94fe052017-02-21 08:49:28 -08002214 movaps XMMWORD[(-168)+r11],xmm6
2215 movaps XMMWORD[(-152)+r11],xmm7
2216 movaps XMMWORD[(-136)+r11],xmm8
2217 movaps XMMWORD[(-120)+r11],xmm9
2218 movaps XMMWORD[(-104)+r11],xmm10
2219 movaps XMMWORD[(-88)+r11],xmm11
2220 movaps XMMWORD[(-72)+r11],xmm12
2221 movaps XMMWORD[(-56)+r11],xmm13
2222 movaps XMMWORD[(-40)+r11],xmm14
2223 movaps XMMWORD[(-24)+r11],xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07002224$L$xts_dec_body:
Adam Langleye9ada862015-05-11 17:20:37 -07002225 movups xmm2,XMMWORD[r9]
2226 mov eax,DWORD[240+r8]
2227 mov r10d,DWORD[240+rcx]
2228 movups xmm0,XMMWORD[r8]
2229 movups xmm1,XMMWORD[16+r8]
2230 lea r8,[32+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002231 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002232$L$oop_enc1_11:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002233DB 102,15,56,220,209
2234 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07002235 movups xmm1,XMMWORD[r8]
2236 lea r8,[16+r8]
2237 jnz NEAR $L$oop_enc1_11
Adam Langleyd9e397b2015-01-22 14:27:53 -08002238DB 102,15,56,221,209
2239 xor eax,eax
2240 test rdx,15
2241 setnz al
2242 shl rax,4
2243 sub rdx,rax
2244
Adam Langleye9ada862015-05-11 17:20:37 -07002245 movups xmm0,XMMWORD[rcx]
Robert Sloana94fe052017-02-21 08:49:28 -08002246 mov rbp,rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -08002247 mov eax,r10d
2248 shl r10d,4
2249 mov r9,rdx
2250 and rdx,-16
2251
Adam Langleye9ada862015-05-11 17:20:37 -07002252 movups xmm1,XMMWORD[16+r10*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002253
Adam Langleye9ada862015-05-11 17:20:37 -07002254 movdqa xmm8,XMMWORD[$L$xts_magic]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002255 movdqa xmm15,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07002256 pshufd xmm9,xmm2,0x5f
Adam Langleyd9e397b2015-01-22 14:27:53 -08002257 pxor xmm1,xmm0
2258 movdqa xmm14,xmm9
2259 paddd xmm9,xmm9
2260 movdqa xmm10,xmm15
2261 psrad xmm14,31
2262 paddq xmm15,xmm15
2263 pand xmm14,xmm8
2264 pxor xmm10,xmm0
2265 pxor xmm15,xmm14
2266 movdqa xmm14,xmm9
2267 paddd xmm9,xmm9
2268 movdqa xmm11,xmm15
2269 psrad xmm14,31
2270 paddq xmm15,xmm15
2271 pand xmm14,xmm8
2272 pxor xmm11,xmm0
2273 pxor xmm15,xmm14
2274 movdqa xmm14,xmm9
2275 paddd xmm9,xmm9
2276 movdqa xmm12,xmm15
2277 psrad xmm14,31
2278 paddq xmm15,xmm15
2279 pand xmm14,xmm8
2280 pxor xmm12,xmm0
2281 pxor xmm15,xmm14
2282 movdqa xmm14,xmm9
2283 paddd xmm9,xmm9
2284 movdqa xmm13,xmm15
2285 psrad xmm14,31
2286 paddq xmm15,xmm15
2287 pand xmm14,xmm8
2288 pxor xmm13,xmm0
2289 pxor xmm15,xmm14
2290 movdqa xmm14,xmm15
2291 psrad xmm9,31
2292 paddq xmm15,xmm15
2293 pand xmm9,xmm8
2294 pxor xmm14,xmm0
2295 pxor xmm15,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07002296 movaps XMMWORD[96+rsp],xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08002297
2298 sub rdx,16*6
Adam Langleye9ada862015-05-11 17:20:37 -07002299 jc NEAR $L$xts_dec_short
Adam Langleyd9e397b2015-01-22 14:27:53 -08002300
2301 mov eax,16+96
Robert Sloana94fe052017-02-21 08:49:28 -08002302 lea rcx,[32+r10*1+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002303 sub rax,r10
Robert Sloana94fe052017-02-21 08:49:28 -08002304 movups xmm1,XMMWORD[16+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002305 mov r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -07002306 lea r8,[$L$xts_magic]
2307 jmp NEAR $L$xts_dec_grandloop
Adam Langleyd9e397b2015-01-22 14:27:53 -08002308
2309ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -07002310$L$xts_dec_grandloop:
2311 movdqu xmm2,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002312 movdqa xmm8,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002313 movdqu xmm3,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002314 pxor xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002315 movdqu xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002316 pxor xmm3,xmm11
2317DB 102,15,56,222,209
Adam Langleye9ada862015-05-11 17:20:37 -07002318 movdqu xmm5,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002319 pxor xmm4,xmm12
2320DB 102,15,56,222,217
Adam Langleye9ada862015-05-11 17:20:37 -07002321 movdqu xmm6,XMMWORD[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002322 pxor xmm5,xmm13
2323DB 102,15,56,222,225
Adam Langleye9ada862015-05-11 17:20:37 -07002324 movdqu xmm7,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002325 pxor xmm8,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07002326 movdqa xmm9,XMMWORD[96+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002327 pxor xmm6,xmm14
2328DB 102,15,56,222,233
Robert Sloana94fe052017-02-21 08:49:28 -08002329 movups xmm0,XMMWORD[32+rbp]
Adam Langleye9ada862015-05-11 17:20:37 -07002330 lea rdi,[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002331 pxor xmm7,xmm8
2332
2333 pxor xmm10,xmm9
2334DB 102,15,56,222,241
2335 pxor xmm11,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07002336 movdqa XMMWORD[rsp],xmm10
Adam Langleyd9e397b2015-01-22 14:27:53 -08002337DB 102,15,56,222,249
Robert Sloana94fe052017-02-21 08:49:28 -08002338 movups xmm1,XMMWORD[48+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002339 pxor xmm12,xmm9
2340
2341DB 102,15,56,222,208
2342 pxor xmm13,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07002343 movdqa XMMWORD[16+rsp],xmm11
Adam Langleyd9e397b2015-01-22 14:27:53 -08002344DB 102,15,56,222,216
2345 pxor xmm14,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07002346 movdqa XMMWORD[32+rsp],xmm12
Adam Langleyd9e397b2015-01-22 14:27:53 -08002347DB 102,15,56,222,224
2348DB 102,15,56,222,232
2349 pxor xmm8,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07002350 movdqa XMMWORD[64+rsp],xmm14
Adam Langleyd9e397b2015-01-22 14:27:53 -08002351DB 102,15,56,222,240
2352DB 102,15,56,222,248
Robert Sloana94fe052017-02-21 08:49:28 -08002353 movups xmm0,XMMWORD[64+rbp]
Adam Langleye9ada862015-05-11 17:20:37 -07002354 movdqa XMMWORD[80+rsp],xmm8
2355 pshufd xmm9,xmm15,0x5f
2356 jmp NEAR $L$xts_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08002357ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -07002358$L$xts_dec_loop6:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002359DB 102,15,56,222,209
2360DB 102,15,56,222,217
2361DB 102,15,56,222,225
2362DB 102,15,56,222,233
2363DB 102,15,56,222,241
2364DB 102,15,56,222,249
Adam Langleye9ada862015-05-11 17:20:37 -07002365 movups xmm1,XMMWORD[((-64))+rax*1+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002366 add rax,32
2367
2368DB 102,15,56,222,208
2369DB 102,15,56,222,216
2370DB 102,15,56,222,224
2371DB 102,15,56,222,232
2372DB 102,15,56,222,240
2373DB 102,15,56,222,248
Adam Langleye9ada862015-05-11 17:20:37 -07002374 movups xmm0,XMMWORD[((-80))+rax*1+rcx]
2375 jnz NEAR $L$xts_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08002376
Adam Langleye9ada862015-05-11 17:20:37 -07002377 movdqa xmm8,XMMWORD[r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002378 movdqa xmm14,xmm9
2379 paddd xmm9,xmm9
2380DB 102,15,56,222,209
2381 paddq xmm15,xmm15
2382 psrad xmm14,31
2383DB 102,15,56,222,217
2384 pand xmm14,xmm8
Robert Sloana94fe052017-02-21 08:49:28 -08002385 movups xmm10,XMMWORD[rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002386DB 102,15,56,222,225
2387DB 102,15,56,222,233
2388DB 102,15,56,222,241
2389 pxor xmm15,xmm14
2390 movaps xmm11,xmm10
2391DB 102,15,56,222,249
Adam Langleye9ada862015-05-11 17:20:37 -07002392 movups xmm1,XMMWORD[((-64))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002393
2394 movdqa xmm14,xmm9
2395DB 102,15,56,222,208
2396 paddd xmm9,xmm9
2397 pxor xmm10,xmm15
2398DB 102,15,56,222,216
2399 psrad xmm14,31
2400 paddq xmm15,xmm15
2401DB 102,15,56,222,224
2402DB 102,15,56,222,232
2403 pand xmm14,xmm8
2404 movaps xmm12,xmm11
2405DB 102,15,56,222,240
2406 pxor xmm15,xmm14
2407 movdqa xmm14,xmm9
2408DB 102,15,56,222,248
Adam Langleye9ada862015-05-11 17:20:37 -07002409 movups xmm0,XMMWORD[((-48))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002410
2411 paddd xmm9,xmm9
2412DB 102,15,56,222,209
2413 pxor xmm11,xmm15
2414 psrad xmm14,31
2415DB 102,15,56,222,217
2416 paddq xmm15,xmm15
2417 pand xmm14,xmm8
2418DB 102,15,56,222,225
2419DB 102,15,56,222,233
Adam Langleye9ada862015-05-11 17:20:37 -07002420 movdqa XMMWORD[48+rsp],xmm13
Adam Langleyd9e397b2015-01-22 14:27:53 -08002421 pxor xmm15,xmm14
2422DB 102,15,56,222,241
2423 movaps xmm13,xmm12
2424 movdqa xmm14,xmm9
2425DB 102,15,56,222,249
Adam Langleye9ada862015-05-11 17:20:37 -07002426 movups xmm1,XMMWORD[((-32))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002427
2428 paddd xmm9,xmm9
2429DB 102,15,56,222,208
2430 pxor xmm12,xmm15
2431 psrad xmm14,31
2432DB 102,15,56,222,216
2433 paddq xmm15,xmm15
2434 pand xmm14,xmm8
2435DB 102,15,56,222,224
2436DB 102,15,56,222,232
2437DB 102,15,56,222,240
2438 pxor xmm15,xmm14
2439 movaps xmm14,xmm13
2440DB 102,15,56,222,248
2441
2442 movdqa xmm0,xmm9
2443 paddd xmm9,xmm9
2444DB 102,15,56,222,209
2445 pxor xmm13,xmm15
2446 psrad xmm0,31
2447DB 102,15,56,222,217
2448 paddq xmm15,xmm15
2449 pand xmm0,xmm8
2450DB 102,15,56,222,225
2451DB 102,15,56,222,233
2452 pxor xmm15,xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08002453 movups xmm0,XMMWORD[rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002454DB 102,15,56,222,241
2455DB 102,15,56,222,249
Robert Sloana94fe052017-02-21 08:49:28 -08002456 movups xmm1,XMMWORD[16+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002457
2458 pxor xmm14,xmm15
2459DB 102,15,56,223,84,36,0
2460 psrad xmm9,31
2461 paddq xmm15,xmm15
2462DB 102,15,56,223,92,36,16
2463DB 102,15,56,223,100,36,32
2464 pand xmm9,xmm8
2465 mov rax,r10
2466DB 102,15,56,223,108,36,48
2467DB 102,15,56,223,116,36,64
2468DB 102,15,56,223,124,36,80
2469 pxor xmm15,xmm9
2470
Adam Langleye9ada862015-05-11 17:20:37 -07002471 lea rsi,[96+rsi]
2472 movups XMMWORD[(-96)+rsi],xmm2
2473 movups XMMWORD[(-80)+rsi],xmm3
2474 movups XMMWORD[(-64)+rsi],xmm4
2475 movups XMMWORD[(-48)+rsi],xmm5
2476 movups XMMWORD[(-32)+rsi],xmm6
2477 movups XMMWORD[(-16)+rsi],xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08002478 sub rdx,16*6
Adam Langleye9ada862015-05-11 17:20:37 -07002479 jnc NEAR $L$xts_dec_grandloop
Adam Langleyd9e397b2015-01-22 14:27:53 -08002480
2481 mov eax,16+96
2482 sub eax,r10d
Robert Sloana94fe052017-02-21 08:49:28 -08002483 mov rcx,rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002484 shr eax,4
2485
Adam Langleye9ada862015-05-11 17:20:37 -07002486$L$xts_dec_short:
2487
Adam Langleyd9e397b2015-01-22 14:27:53 -08002488 mov r10d,eax
2489 pxor xmm10,xmm0
2490 pxor xmm11,xmm0
2491 add rdx,16*6
Adam Langleye9ada862015-05-11 17:20:37 -07002492 jz NEAR $L$xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002493
2494 pxor xmm12,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002495 cmp rdx,0x20
2496 jb NEAR $L$xts_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08002497 pxor xmm13,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002498 je NEAR $L$xts_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08002499
2500 pxor xmm14,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002501 cmp rdx,0x40
2502 jb NEAR $L$xts_dec_three
2503 je NEAR $L$xts_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08002504
Adam Langleye9ada862015-05-11 17:20:37 -07002505 movdqu xmm2,XMMWORD[rdi]
2506 movdqu xmm3,XMMWORD[16+rdi]
2507 movdqu xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002508 pxor xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002509 movdqu xmm5,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002510 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07002511 movdqu xmm6,XMMWORD[64+rdi]
2512 lea rdi,[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002513 pxor xmm4,xmm12
2514 pxor xmm5,xmm13
2515 pxor xmm6,xmm14
2516
2517 call _aesni_decrypt6
2518
2519 xorps xmm2,xmm10
2520 xorps xmm3,xmm11
2521 xorps xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07002522 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002523 xorps xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07002524 movdqu XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08002525 xorps xmm6,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07002526 movdqu XMMWORD[32+rsi],xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08002527 pxor xmm14,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07002528 movdqu XMMWORD[48+rsi],xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08002529 pcmpgtd xmm14,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07002530 movdqu XMMWORD[64+rsi],xmm6
2531 lea rsi,[80+rsi]
2532 pshufd xmm11,xmm14,0x13
Adam Langleyd9e397b2015-01-22 14:27:53 -08002533 and r9,15
Adam Langleye9ada862015-05-11 17:20:37 -07002534 jz NEAR $L$xts_dec_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08002535
2536 movdqa xmm10,xmm15
2537 paddq xmm15,xmm15
2538 pand xmm11,xmm8
2539 pxor xmm11,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07002540 jmp NEAR $L$xts_dec_done2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002541
2542ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002543$L$xts_dec_one:
2544 movups xmm2,XMMWORD[rdi]
2545 lea rdi,[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002546 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002547 movups xmm0,XMMWORD[rcx]
2548 movups xmm1,XMMWORD[16+rcx]
2549 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002550 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002551$L$oop_dec1_12:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002552DB 102,15,56,222,209
2553 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07002554 movups xmm1,XMMWORD[rcx]
2555 lea rcx,[16+rcx]
2556 jnz NEAR $L$oop_dec1_12
Adam Langleyd9e397b2015-01-22 14:27:53 -08002557DB 102,15,56,223,209
2558 xorps xmm2,xmm10
2559 movdqa xmm10,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07002560 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002561 movdqa xmm11,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07002562 lea rsi,[16+rsi]
2563 jmp NEAR $L$xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002564
2565ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002566$L$xts_dec_two:
2567 movups xmm2,XMMWORD[rdi]
2568 movups xmm3,XMMWORD[16+rdi]
2569 lea rdi,[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002570 xorps xmm2,xmm10
2571 xorps xmm3,xmm11
2572
2573 call _aesni_decrypt2
2574
2575 xorps xmm2,xmm10
2576 movdqa xmm10,xmm12
2577 xorps xmm3,xmm11
2578 movdqa xmm11,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07002579 movups XMMWORD[rsi],xmm2
2580 movups XMMWORD[16+rsi],xmm3
2581 lea rsi,[32+rsi]
2582 jmp NEAR $L$xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002583
2584ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002585$L$xts_dec_three:
2586 movups xmm2,XMMWORD[rdi]
2587 movups xmm3,XMMWORD[16+rdi]
2588 movups xmm4,XMMWORD[32+rdi]
2589 lea rdi,[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002590 xorps xmm2,xmm10
2591 xorps xmm3,xmm11
2592 xorps xmm4,xmm12
2593
2594 call _aesni_decrypt3
2595
2596 xorps xmm2,xmm10
2597 movdqa xmm10,xmm13
2598 xorps xmm3,xmm11
2599 movdqa xmm11,xmm14
2600 xorps xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07002601 movups XMMWORD[rsi],xmm2
2602 movups XMMWORD[16+rsi],xmm3
2603 movups XMMWORD[32+rsi],xmm4
2604 lea rsi,[48+rsi]
2605 jmp NEAR $L$xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002606
2607ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002608$L$xts_dec_four:
2609 movups xmm2,XMMWORD[rdi]
2610 movups xmm3,XMMWORD[16+rdi]
2611 movups xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002612 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002613 movups xmm5,XMMWORD[48+rdi]
2614 lea rdi,[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002615 xorps xmm3,xmm11
2616 xorps xmm4,xmm12
2617 xorps xmm5,xmm13
2618
2619 call _aesni_decrypt4
2620
2621 pxor xmm2,xmm10
2622 movdqa xmm10,xmm14
2623 pxor xmm3,xmm11
2624 movdqa xmm11,xmm15
2625 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07002626 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002627 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07002628 movdqu XMMWORD[16+rsi],xmm3
2629 movdqu XMMWORD[32+rsi],xmm4
2630 movdqu XMMWORD[48+rsi],xmm5
2631 lea rsi,[64+rsi]
2632 jmp NEAR $L$xts_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08002633
2634ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07002635$L$xts_dec_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002636 and r9,15
Adam Langleye9ada862015-05-11 17:20:37 -07002637 jz NEAR $L$xts_dec_ret
2638$L$xts_dec_done2:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002639 mov rdx,r9
Robert Sloana94fe052017-02-21 08:49:28 -08002640 mov rcx,rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002641 mov eax,r10d
2642
Adam Langleye9ada862015-05-11 17:20:37 -07002643 movups xmm2,XMMWORD[rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002644 xorps xmm2,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07002645 movups xmm0,XMMWORD[rcx]
2646 movups xmm1,XMMWORD[16+rcx]
2647 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002648 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002649$L$oop_dec1_13:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002650DB 102,15,56,222,209
2651 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07002652 movups xmm1,XMMWORD[rcx]
2653 lea rcx,[16+rcx]
2654 jnz NEAR $L$oop_dec1_13
Adam Langleyd9e397b2015-01-22 14:27:53 -08002655DB 102,15,56,223,209
2656 xorps xmm2,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07002657 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002658
Adam Langleye9ada862015-05-11 17:20:37 -07002659$L$xts_dec_steal:
2660 movzx eax,BYTE[16+rdi]
2661 movzx ecx,BYTE[rsi]
2662 lea rdi,[1+rdi]
2663 mov BYTE[rsi],al
2664 mov BYTE[16+rsi],cl
2665 lea rsi,[1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002666 sub rdx,1
Adam Langleye9ada862015-05-11 17:20:37 -07002667 jnz NEAR $L$xts_dec_steal
Adam Langleyd9e397b2015-01-22 14:27:53 -08002668
2669 sub rsi,r9
Robert Sloana94fe052017-02-21 08:49:28 -08002670 mov rcx,rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002671 mov eax,r10d
2672
Adam Langleye9ada862015-05-11 17:20:37 -07002673 movups xmm2,XMMWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002674 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002675 movups xmm0,XMMWORD[rcx]
2676 movups xmm1,XMMWORD[16+rcx]
2677 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002678 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002679$L$oop_dec1_14:
Adam Langleyd9e397b2015-01-22 14:27:53 -08002680DB 102,15,56,222,209
2681 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07002682 movups xmm1,XMMWORD[rcx]
2683 lea rcx,[16+rcx]
2684 jnz NEAR $L$oop_dec1_14
Adam Langleyd9e397b2015-01-22 14:27:53 -08002685DB 102,15,56,223,209
2686 xorps xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07002687 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08002688
Adam Langleye9ada862015-05-11 17:20:37 -07002689$L$xts_dec_ret:
2690 xorps xmm0,xmm0
2691 pxor xmm1,xmm1
2692 pxor xmm2,xmm2
2693 pxor xmm3,xmm3
2694 pxor xmm4,xmm4
2695 pxor xmm5,xmm5
Robert Sloana94fe052017-02-21 08:49:28 -08002696 movaps xmm6,XMMWORD[((-168))+r11]
2697 movaps XMMWORD[(-168)+r11],xmm0
2698 movaps xmm7,XMMWORD[((-152))+r11]
2699 movaps XMMWORD[(-152)+r11],xmm0
2700 movaps xmm8,XMMWORD[((-136))+r11]
2701 movaps XMMWORD[(-136)+r11],xmm0
2702 movaps xmm9,XMMWORD[((-120))+r11]
2703 movaps XMMWORD[(-120)+r11],xmm0
2704 movaps xmm10,XMMWORD[((-104))+r11]
2705 movaps XMMWORD[(-104)+r11],xmm0
2706 movaps xmm11,XMMWORD[((-88))+r11]
2707 movaps XMMWORD[(-88)+r11],xmm0
2708 movaps xmm12,XMMWORD[((-72))+r11]
2709 movaps XMMWORD[(-72)+r11],xmm0
2710 movaps xmm13,XMMWORD[((-56))+r11]
2711 movaps XMMWORD[(-56)+r11],xmm0
2712 movaps xmm14,XMMWORD[((-40))+r11]
2713 movaps XMMWORD[(-40)+r11],xmm0
2714 movaps xmm15,XMMWORD[((-24))+r11]
2715 movaps XMMWORD[(-24)+r11],xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07002716 movaps XMMWORD[rsp],xmm0
2717 movaps XMMWORD[16+rsp],xmm0
2718 movaps XMMWORD[32+rsp],xmm0
2719 movaps XMMWORD[48+rsp],xmm0
2720 movaps XMMWORD[64+rsp],xmm0
2721 movaps XMMWORD[80+rsp],xmm0
2722 movaps XMMWORD[96+rsp],xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08002723 mov rbp,QWORD[((-8))+r11]
2724 lea rsp,[r11]
Adam Langleye9ada862015-05-11 17:20:37 -07002725$L$xts_dec_epilogue:
2726 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2727 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002728 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07002729$L$SEH_end_aesni_xts_decrypt:
Robert Sloana94fe052017-02-21 08:49:28 -08002730global aesni_ocb_encrypt
2731
2732ALIGN 32
2733aesni_ocb_encrypt:
2734 mov QWORD[8+rsp],rdi ;WIN64 prologue
2735 mov QWORD[16+rsp],rsi
2736 mov rax,rsp
2737$L$SEH_begin_aesni_ocb_encrypt:
2738 mov rdi,rcx
2739 mov rsi,rdx
2740 mov rdx,r8
2741 mov rcx,r9
2742 mov r8,QWORD[40+rsp]
2743 mov r9,QWORD[48+rsp]
2744
2745
2746 lea rax,[rsp]
2747 push rbx
2748 push rbp
2749 push r12
2750 push r13
2751 push r14
2752 lea rsp,[((-160))+rsp]
2753 movaps XMMWORD[rsp],xmm6
2754 movaps XMMWORD[16+rsp],xmm7
2755 movaps XMMWORD[32+rsp],xmm8
2756 movaps XMMWORD[48+rsp],xmm9
2757 movaps XMMWORD[64+rsp],xmm10
2758 movaps XMMWORD[80+rsp],xmm11
2759 movaps XMMWORD[96+rsp],xmm12
2760 movaps XMMWORD[112+rsp],xmm13
2761 movaps XMMWORD[128+rsp],xmm14
2762 movaps XMMWORD[144+rsp],xmm15
2763$L$ocb_enc_body:
2764 mov rbx,QWORD[56+rax]
2765 mov rbp,QWORD[((56+8))+rax]
2766
2767 mov r10d,DWORD[240+rcx]
2768 mov r11,rcx
2769 shl r10d,4
2770 movups xmm9,XMMWORD[rcx]
2771 movups xmm1,XMMWORD[16+r10*1+rcx]
2772
2773 movdqu xmm15,XMMWORD[r9]
2774 pxor xmm9,xmm1
2775 pxor xmm15,xmm1
2776
2777 mov eax,16+32
2778 lea rcx,[32+r10*1+r11]
2779 movups xmm1,XMMWORD[16+r11]
2780 sub rax,r10
2781 mov r10,rax
2782
2783 movdqu xmm10,XMMWORD[rbx]
2784 movdqu xmm8,XMMWORD[rbp]
2785
2786 test r8,1
2787 jnz NEAR $L$ocb_enc_odd
2788
2789 bsf r12,r8
2790 add r8,1
2791 shl r12,4
2792 movdqu xmm7,XMMWORD[r12*1+rbx]
2793 movdqu xmm2,XMMWORD[rdi]
2794 lea rdi,[16+rdi]
2795
2796 call __ocb_encrypt1
2797
2798 movdqa xmm15,xmm7
2799 movups XMMWORD[rsi],xmm2
2800 lea rsi,[16+rsi]
2801 sub rdx,1
2802 jz NEAR $L$ocb_enc_done
2803
2804$L$ocb_enc_odd:
2805 lea r12,[1+r8]
2806 lea r13,[3+r8]
2807 lea r14,[5+r8]
2808 lea r8,[6+r8]
2809 bsf r12,r12
2810 bsf r13,r13
2811 bsf r14,r14
2812 shl r12,4
2813 shl r13,4
2814 shl r14,4
2815
2816 sub rdx,6
2817 jc NEAR $L$ocb_enc_short
2818 jmp NEAR $L$ocb_enc_grandloop
2819
2820ALIGN 32
2821$L$ocb_enc_grandloop:
2822 movdqu xmm2,XMMWORD[rdi]
2823 movdqu xmm3,XMMWORD[16+rdi]
2824 movdqu xmm4,XMMWORD[32+rdi]
2825 movdqu xmm5,XMMWORD[48+rdi]
2826 movdqu xmm6,XMMWORD[64+rdi]
2827 movdqu xmm7,XMMWORD[80+rdi]
2828 lea rdi,[96+rdi]
2829
2830 call __ocb_encrypt6
2831
2832 movups XMMWORD[rsi],xmm2
2833 movups XMMWORD[16+rsi],xmm3
2834 movups XMMWORD[32+rsi],xmm4
2835 movups XMMWORD[48+rsi],xmm5
2836 movups XMMWORD[64+rsi],xmm6
2837 movups XMMWORD[80+rsi],xmm7
2838 lea rsi,[96+rsi]
2839 sub rdx,6
2840 jnc NEAR $L$ocb_enc_grandloop
2841
2842$L$ocb_enc_short:
2843 add rdx,6
2844 jz NEAR $L$ocb_enc_done
2845
2846 movdqu xmm2,XMMWORD[rdi]
2847 cmp rdx,2
2848 jb NEAR $L$ocb_enc_one
2849 movdqu xmm3,XMMWORD[16+rdi]
2850 je NEAR $L$ocb_enc_two
2851
2852 movdqu xmm4,XMMWORD[32+rdi]
2853 cmp rdx,4
2854 jb NEAR $L$ocb_enc_three
2855 movdqu xmm5,XMMWORD[48+rdi]
2856 je NEAR $L$ocb_enc_four
2857
2858 movdqu xmm6,XMMWORD[64+rdi]
2859 pxor xmm7,xmm7
2860
2861 call __ocb_encrypt6
2862
2863 movdqa xmm15,xmm14
2864 movups XMMWORD[rsi],xmm2
2865 movups XMMWORD[16+rsi],xmm3
2866 movups XMMWORD[32+rsi],xmm4
2867 movups XMMWORD[48+rsi],xmm5
2868 movups XMMWORD[64+rsi],xmm6
2869
2870 jmp NEAR $L$ocb_enc_done
2871
2872ALIGN 16
2873$L$ocb_enc_one:
2874 movdqa xmm7,xmm10
2875
2876 call __ocb_encrypt1
2877
2878 movdqa xmm15,xmm7
2879 movups XMMWORD[rsi],xmm2
2880 jmp NEAR $L$ocb_enc_done
2881
2882ALIGN 16
2883$L$ocb_enc_two:
2884 pxor xmm4,xmm4
2885 pxor xmm5,xmm5
2886
2887 call __ocb_encrypt4
2888
2889 movdqa xmm15,xmm11
2890 movups XMMWORD[rsi],xmm2
2891 movups XMMWORD[16+rsi],xmm3
2892
2893 jmp NEAR $L$ocb_enc_done
2894
2895ALIGN 16
2896$L$ocb_enc_three:
2897 pxor xmm5,xmm5
2898
2899 call __ocb_encrypt4
2900
2901 movdqa xmm15,xmm12
2902 movups XMMWORD[rsi],xmm2
2903 movups XMMWORD[16+rsi],xmm3
2904 movups XMMWORD[32+rsi],xmm4
2905
2906 jmp NEAR $L$ocb_enc_done
2907
2908ALIGN 16
2909$L$ocb_enc_four:
2910 call __ocb_encrypt4
2911
2912 movdqa xmm15,xmm13
2913 movups XMMWORD[rsi],xmm2
2914 movups XMMWORD[16+rsi],xmm3
2915 movups XMMWORD[32+rsi],xmm4
2916 movups XMMWORD[48+rsi],xmm5
2917
2918$L$ocb_enc_done:
2919 pxor xmm15,xmm0
2920 movdqu XMMWORD[rbp],xmm8
2921 movdqu XMMWORD[r9],xmm15
2922
2923 xorps xmm0,xmm0
2924 pxor xmm1,xmm1
2925 pxor xmm2,xmm2
2926 pxor xmm3,xmm3
2927 pxor xmm4,xmm4
2928 pxor xmm5,xmm5
2929 movaps xmm6,XMMWORD[rsp]
2930 movaps XMMWORD[rsp],xmm0
2931 movaps xmm7,XMMWORD[16+rsp]
2932 movaps XMMWORD[16+rsp],xmm0
2933 movaps xmm8,XMMWORD[32+rsp]
2934 movaps XMMWORD[32+rsp],xmm0
2935 movaps xmm9,XMMWORD[48+rsp]
2936 movaps XMMWORD[48+rsp],xmm0
2937 movaps xmm10,XMMWORD[64+rsp]
2938 movaps XMMWORD[64+rsp],xmm0
2939 movaps xmm11,XMMWORD[80+rsp]
2940 movaps XMMWORD[80+rsp],xmm0
2941 movaps xmm12,XMMWORD[96+rsp]
2942 movaps XMMWORD[96+rsp],xmm0
2943 movaps xmm13,XMMWORD[112+rsp]
2944 movaps XMMWORD[112+rsp],xmm0
2945 movaps xmm14,XMMWORD[128+rsp]
2946 movaps XMMWORD[128+rsp],xmm0
2947 movaps xmm15,XMMWORD[144+rsp]
2948 movaps XMMWORD[144+rsp],xmm0
2949 lea rax,[((160+40))+rsp]
2950$L$ocb_enc_pop:
2951 mov r14,QWORD[((-40))+rax]
2952 mov r13,QWORD[((-32))+rax]
2953 mov r12,QWORD[((-24))+rax]
2954 mov rbp,QWORD[((-16))+rax]
2955 mov rbx,QWORD[((-8))+rax]
2956 lea rsp,[rax]
2957$L$ocb_enc_epilogue:
2958 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2959 mov rsi,QWORD[16+rsp]
2960 DB 0F3h,0C3h ;repret
2961$L$SEH_end_aesni_ocb_encrypt:
2962
2963
2964ALIGN 32
2965__ocb_encrypt6:
2966 pxor xmm15,xmm9
2967 movdqu xmm11,XMMWORD[r12*1+rbx]
2968 movdqa xmm12,xmm10
2969 movdqu xmm13,XMMWORD[r13*1+rbx]
2970 movdqa xmm14,xmm10
2971 pxor xmm10,xmm15
2972 movdqu xmm15,XMMWORD[r14*1+rbx]
2973 pxor xmm11,xmm10
2974 pxor xmm8,xmm2
2975 pxor xmm2,xmm10
2976 pxor xmm12,xmm11
2977 pxor xmm8,xmm3
2978 pxor xmm3,xmm11
2979 pxor xmm13,xmm12
2980 pxor xmm8,xmm4
2981 pxor xmm4,xmm12
2982 pxor xmm14,xmm13
2983 pxor xmm8,xmm5
2984 pxor xmm5,xmm13
2985 pxor xmm15,xmm14
2986 pxor xmm8,xmm6
2987 pxor xmm6,xmm14
2988 pxor xmm8,xmm7
2989 pxor xmm7,xmm15
2990 movups xmm0,XMMWORD[32+r11]
2991
2992 lea r12,[1+r8]
2993 lea r13,[3+r8]
2994 lea r14,[5+r8]
2995 add r8,6
2996 pxor xmm10,xmm9
2997 bsf r12,r12
2998 bsf r13,r13
2999 bsf r14,r14
3000
3001DB 102,15,56,220,209
3002DB 102,15,56,220,217
3003DB 102,15,56,220,225
3004DB 102,15,56,220,233
3005 pxor xmm11,xmm9
3006 pxor xmm12,xmm9
3007DB 102,15,56,220,241
3008 pxor xmm13,xmm9
3009 pxor xmm14,xmm9
3010DB 102,15,56,220,249
3011 movups xmm1,XMMWORD[48+r11]
3012 pxor xmm15,xmm9
3013
3014DB 102,15,56,220,208
3015DB 102,15,56,220,216
3016DB 102,15,56,220,224
3017DB 102,15,56,220,232
3018DB 102,15,56,220,240
3019DB 102,15,56,220,248
3020 movups xmm0,XMMWORD[64+r11]
3021 shl r12,4
3022 shl r13,4
3023 jmp NEAR $L$ocb_enc_loop6
3024
3025ALIGN 32
3026$L$ocb_enc_loop6:
3027DB 102,15,56,220,209
3028DB 102,15,56,220,217
3029DB 102,15,56,220,225
3030DB 102,15,56,220,233
3031DB 102,15,56,220,241
3032DB 102,15,56,220,249
3033 movups xmm1,XMMWORD[rax*1+rcx]
3034 add rax,32
3035
3036DB 102,15,56,220,208
3037DB 102,15,56,220,216
3038DB 102,15,56,220,224
3039DB 102,15,56,220,232
3040DB 102,15,56,220,240
3041DB 102,15,56,220,248
3042 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3043 jnz NEAR $L$ocb_enc_loop6
3044
3045DB 102,15,56,220,209
3046DB 102,15,56,220,217
3047DB 102,15,56,220,225
3048DB 102,15,56,220,233
3049DB 102,15,56,220,241
3050DB 102,15,56,220,249
3051 movups xmm1,XMMWORD[16+r11]
3052 shl r14,4
3053
3054DB 102,65,15,56,221,210
3055 movdqu xmm10,XMMWORD[rbx]
3056 mov rax,r10
3057DB 102,65,15,56,221,219
3058DB 102,65,15,56,221,228
3059DB 102,65,15,56,221,237
3060DB 102,65,15,56,221,246
3061DB 102,65,15,56,221,255
3062 DB 0F3h,0C3h ;repret
3063
3064
3065
3066ALIGN 32
3067__ocb_encrypt4:
3068 pxor xmm15,xmm9
3069 movdqu xmm11,XMMWORD[r12*1+rbx]
3070 movdqa xmm12,xmm10
3071 movdqu xmm13,XMMWORD[r13*1+rbx]
3072 pxor xmm10,xmm15
3073 pxor xmm11,xmm10
3074 pxor xmm8,xmm2
3075 pxor xmm2,xmm10
3076 pxor xmm12,xmm11
3077 pxor xmm8,xmm3
3078 pxor xmm3,xmm11
3079 pxor xmm13,xmm12
3080 pxor xmm8,xmm4
3081 pxor xmm4,xmm12
3082 pxor xmm8,xmm5
3083 pxor xmm5,xmm13
3084 movups xmm0,XMMWORD[32+r11]
3085
3086 pxor xmm10,xmm9
3087 pxor xmm11,xmm9
3088 pxor xmm12,xmm9
3089 pxor xmm13,xmm9
3090
3091DB 102,15,56,220,209
3092DB 102,15,56,220,217
3093DB 102,15,56,220,225
3094DB 102,15,56,220,233
3095 movups xmm1,XMMWORD[48+r11]
3096
3097DB 102,15,56,220,208
3098DB 102,15,56,220,216
3099DB 102,15,56,220,224
3100DB 102,15,56,220,232
3101 movups xmm0,XMMWORD[64+r11]
3102 jmp NEAR $L$ocb_enc_loop4
3103
3104ALIGN 32
3105$L$ocb_enc_loop4:
3106DB 102,15,56,220,209
3107DB 102,15,56,220,217
3108DB 102,15,56,220,225
3109DB 102,15,56,220,233
3110 movups xmm1,XMMWORD[rax*1+rcx]
3111 add rax,32
3112
3113DB 102,15,56,220,208
3114DB 102,15,56,220,216
3115DB 102,15,56,220,224
3116DB 102,15,56,220,232
3117 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3118 jnz NEAR $L$ocb_enc_loop4
3119
3120DB 102,15,56,220,209
3121DB 102,15,56,220,217
3122DB 102,15,56,220,225
3123DB 102,15,56,220,233
3124 movups xmm1,XMMWORD[16+r11]
3125 mov rax,r10
3126
3127DB 102,65,15,56,221,210
3128DB 102,65,15,56,221,219
3129DB 102,65,15,56,221,228
3130DB 102,65,15,56,221,237
3131 DB 0F3h,0C3h ;repret
3132
3133
3134
3135ALIGN 32
3136__ocb_encrypt1:
3137 pxor xmm7,xmm15
3138 pxor xmm7,xmm9
3139 pxor xmm8,xmm2
3140 pxor xmm2,xmm7
3141 movups xmm0,XMMWORD[32+r11]
3142
3143DB 102,15,56,220,209
3144 movups xmm1,XMMWORD[48+r11]
3145 pxor xmm7,xmm9
3146
3147DB 102,15,56,220,208
3148 movups xmm0,XMMWORD[64+r11]
3149 jmp NEAR $L$ocb_enc_loop1
3150
3151ALIGN 32
3152$L$ocb_enc_loop1:
3153DB 102,15,56,220,209
3154 movups xmm1,XMMWORD[rax*1+rcx]
3155 add rax,32
3156
3157DB 102,15,56,220,208
3158 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3159 jnz NEAR $L$ocb_enc_loop1
3160
3161DB 102,15,56,220,209
3162 movups xmm1,XMMWORD[16+r11]
3163 mov rax,r10
3164
3165DB 102,15,56,221,215
3166 DB 0F3h,0C3h ;repret
3167
3168
3169global aesni_ocb_decrypt
3170
3171ALIGN 32
3172aesni_ocb_decrypt:
3173 mov QWORD[8+rsp],rdi ;WIN64 prologue
3174 mov QWORD[16+rsp],rsi
3175 mov rax,rsp
3176$L$SEH_begin_aesni_ocb_decrypt:
3177 mov rdi,rcx
3178 mov rsi,rdx
3179 mov rdx,r8
3180 mov rcx,r9
3181 mov r8,QWORD[40+rsp]
3182 mov r9,QWORD[48+rsp]
3183
3184
3185 lea rax,[rsp]
3186 push rbx
3187 push rbp
3188 push r12
3189 push r13
3190 push r14
3191 lea rsp,[((-160))+rsp]
3192 movaps XMMWORD[rsp],xmm6
3193 movaps XMMWORD[16+rsp],xmm7
3194 movaps XMMWORD[32+rsp],xmm8
3195 movaps XMMWORD[48+rsp],xmm9
3196 movaps XMMWORD[64+rsp],xmm10
3197 movaps XMMWORD[80+rsp],xmm11
3198 movaps XMMWORD[96+rsp],xmm12
3199 movaps XMMWORD[112+rsp],xmm13
3200 movaps XMMWORD[128+rsp],xmm14
3201 movaps XMMWORD[144+rsp],xmm15
3202$L$ocb_dec_body:
3203 mov rbx,QWORD[56+rax]
3204 mov rbp,QWORD[((56+8))+rax]
3205
3206 mov r10d,DWORD[240+rcx]
3207 mov r11,rcx
3208 shl r10d,4
3209 movups xmm9,XMMWORD[rcx]
3210 movups xmm1,XMMWORD[16+r10*1+rcx]
3211
3212 movdqu xmm15,XMMWORD[r9]
3213 pxor xmm9,xmm1
3214 pxor xmm15,xmm1
3215
3216 mov eax,16+32
3217 lea rcx,[32+r10*1+r11]
3218 movups xmm1,XMMWORD[16+r11]
3219 sub rax,r10
3220 mov r10,rax
3221
3222 movdqu xmm10,XMMWORD[rbx]
3223 movdqu xmm8,XMMWORD[rbp]
3224
3225 test r8,1
3226 jnz NEAR $L$ocb_dec_odd
3227
3228 bsf r12,r8
3229 add r8,1
3230 shl r12,4
3231 movdqu xmm7,XMMWORD[r12*1+rbx]
3232 movdqu xmm2,XMMWORD[rdi]
3233 lea rdi,[16+rdi]
3234
3235 call __ocb_decrypt1
3236
3237 movdqa xmm15,xmm7
3238 movups XMMWORD[rsi],xmm2
3239 xorps xmm8,xmm2
3240 lea rsi,[16+rsi]
3241 sub rdx,1
3242 jz NEAR $L$ocb_dec_done
3243
3244$L$ocb_dec_odd:
3245 lea r12,[1+r8]
3246 lea r13,[3+r8]
3247 lea r14,[5+r8]
3248 lea r8,[6+r8]
3249 bsf r12,r12
3250 bsf r13,r13
3251 bsf r14,r14
3252 shl r12,4
3253 shl r13,4
3254 shl r14,4
3255
3256 sub rdx,6
3257 jc NEAR $L$ocb_dec_short
3258 jmp NEAR $L$ocb_dec_grandloop
3259
3260ALIGN 32
3261$L$ocb_dec_grandloop:
3262 movdqu xmm2,XMMWORD[rdi]
3263 movdqu xmm3,XMMWORD[16+rdi]
3264 movdqu xmm4,XMMWORD[32+rdi]
3265 movdqu xmm5,XMMWORD[48+rdi]
3266 movdqu xmm6,XMMWORD[64+rdi]
3267 movdqu xmm7,XMMWORD[80+rdi]
3268 lea rdi,[96+rdi]
3269
3270 call __ocb_decrypt6
3271
3272 movups XMMWORD[rsi],xmm2
3273 pxor xmm8,xmm2
3274 movups XMMWORD[16+rsi],xmm3
3275 pxor xmm8,xmm3
3276 movups XMMWORD[32+rsi],xmm4
3277 pxor xmm8,xmm4
3278 movups XMMWORD[48+rsi],xmm5
3279 pxor xmm8,xmm5
3280 movups XMMWORD[64+rsi],xmm6
3281 pxor xmm8,xmm6
3282 movups XMMWORD[80+rsi],xmm7
3283 pxor xmm8,xmm7
3284 lea rsi,[96+rsi]
3285 sub rdx,6
3286 jnc NEAR $L$ocb_dec_grandloop
3287
3288$L$ocb_dec_short:
3289 add rdx,6
3290 jz NEAR $L$ocb_dec_done
3291
3292 movdqu xmm2,XMMWORD[rdi]
3293 cmp rdx,2
3294 jb NEAR $L$ocb_dec_one
3295 movdqu xmm3,XMMWORD[16+rdi]
3296 je NEAR $L$ocb_dec_two
3297
3298 movdqu xmm4,XMMWORD[32+rdi]
3299 cmp rdx,4
3300 jb NEAR $L$ocb_dec_three
3301 movdqu xmm5,XMMWORD[48+rdi]
3302 je NEAR $L$ocb_dec_four
3303
3304 movdqu xmm6,XMMWORD[64+rdi]
3305 pxor xmm7,xmm7
3306
3307 call __ocb_decrypt6
3308
3309 movdqa xmm15,xmm14
3310 movups XMMWORD[rsi],xmm2
3311 pxor xmm8,xmm2
3312 movups XMMWORD[16+rsi],xmm3
3313 pxor xmm8,xmm3
3314 movups XMMWORD[32+rsi],xmm4
3315 pxor xmm8,xmm4
3316 movups XMMWORD[48+rsi],xmm5
3317 pxor xmm8,xmm5
3318 movups XMMWORD[64+rsi],xmm6
3319 pxor xmm8,xmm6
3320
3321 jmp NEAR $L$ocb_dec_done
3322
3323ALIGN 16
3324$L$ocb_dec_one:
3325 movdqa xmm7,xmm10
3326
3327 call __ocb_decrypt1
3328
3329 movdqa xmm15,xmm7
3330 movups XMMWORD[rsi],xmm2
3331 xorps xmm8,xmm2
3332 jmp NEAR $L$ocb_dec_done
3333
3334ALIGN 16
3335$L$ocb_dec_two:
3336 pxor xmm4,xmm4
3337 pxor xmm5,xmm5
3338
3339 call __ocb_decrypt4
3340
3341 movdqa xmm15,xmm11
3342 movups XMMWORD[rsi],xmm2
3343 xorps xmm8,xmm2
3344 movups XMMWORD[16+rsi],xmm3
3345 xorps xmm8,xmm3
3346
3347 jmp NEAR $L$ocb_dec_done
3348
3349ALIGN 16
3350$L$ocb_dec_three:
3351 pxor xmm5,xmm5
3352
3353 call __ocb_decrypt4
3354
3355 movdqa xmm15,xmm12
3356 movups XMMWORD[rsi],xmm2
3357 xorps xmm8,xmm2
3358 movups XMMWORD[16+rsi],xmm3
3359 xorps xmm8,xmm3
3360 movups XMMWORD[32+rsi],xmm4
3361 xorps xmm8,xmm4
3362
3363 jmp NEAR $L$ocb_dec_done
3364
3365ALIGN 16
3366$L$ocb_dec_four:
3367 call __ocb_decrypt4
3368
3369 movdqa xmm15,xmm13
3370 movups XMMWORD[rsi],xmm2
3371 pxor xmm8,xmm2
3372 movups XMMWORD[16+rsi],xmm3
3373 pxor xmm8,xmm3
3374 movups XMMWORD[32+rsi],xmm4
3375 pxor xmm8,xmm4
3376 movups XMMWORD[48+rsi],xmm5
3377 pxor xmm8,xmm5
3378
3379$L$ocb_dec_done:
3380 pxor xmm15,xmm0
3381 movdqu XMMWORD[rbp],xmm8
3382 movdqu XMMWORD[r9],xmm15
3383
3384 xorps xmm0,xmm0
3385 pxor xmm1,xmm1
3386 pxor xmm2,xmm2
3387 pxor xmm3,xmm3
3388 pxor xmm4,xmm4
3389 pxor xmm5,xmm5
3390 movaps xmm6,XMMWORD[rsp]
3391 movaps XMMWORD[rsp],xmm0
3392 movaps xmm7,XMMWORD[16+rsp]
3393 movaps XMMWORD[16+rsp],xmm0
3394 movaps xmm8,XMMWORD[32+rsp]
3395 movaps XMMWORD[32+rsp],xmm0
3396 movaps xmm9,XMMWORD[48+rsp]
3397 movaps XMMWORD[48+rsp],xmm0
3398 movaps xmm10,XMMWORD[64+rsp]
3399 movaps XMMWORD[64+rsp],xmm0
3400 movaps xmm11,XMMWORD[80+rsp]
3401 movaps XMMWORD[80+rsp],xmm0
3402 movaps xmm12,XMMWORD[96+rsp]
3403 movaps XMMWORD[96+rsp],xmm0
3404 movaps xmm13,XMMWORD[112+rsp]
3405 movaps XMMWORD[112+rsp],xmm0
3406 movaps xmm14,XMMWORD[128+rsp]
3407 movaps XMMWORD[128+rsp],xmm0
3408 movaps xmm15,XMMWORD[144+rsp]
3409 movaps XMMWORD[144+rsp],xmm0
3410 lea rax,[((160+40))+rsp]
3411$L$ocb_dec_pop:
3412 mov r14,QWORD[((-40))+rax]
3413 mov r13,QWORD[((-32))+rax]
3414 mov r12,QWORD[((-24))+rax]
3415 mov rbp,QWORD[((-16))+rax]
3416 mov rbx,QWORD[((-8))+rax]
3417 lea rsp,[rax]
3418$L$ocb_dec_epilogue:
3419 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3420 mov rsi,QWORD[16+rsp]
3421 DB 0F3h,0C3h ;repret
3422$L$SEH_end_aesni_ocb_decrypt:
3423
3424
3425ALIGN 32
3426__ocb_decrypt6:
3427 pxor xmm15,xmm9
3428 movdqu xmm11,XMMWORD[r12*1+rbx]
3429 movdqa xmm12,xmm10
3430 movdqu xmm13,XMMWORD[r13*1+rbx]
3431 movdqa xmm14,xmm10
3432 pxor xmm10,xmm15
3433 movdqu xmm15,XMMWORD[r14*1+rbx]
3434 pxor xmm11,xmm10
3435 pxor xmm2,xmm10
3436 pxor xmm12,xmm11
3437 pxor xmm3,xmm11
3438 pxor xmm13,xmm12
3439 pxor xmm4,xmm12
3440 pxor xmm14,xmm13
3441 pxor xmm5,xmm13
3442 pxor xmm15,xmm14
3443 pxor xmm6,xmm14
3444 pxor xmm7,xmm15
3445 movups xmm0,XMMWORD[32+r11]
3446
3447 lea r12,[1+r8]
3448 lea r13,[3+r8]
3449 lea r14,[5+r8]
3450 add r8,6
3451 pxor xmm10,xmm9
3452 bsf r12,r12
3453 bsf r13,r13
3454 bsf r14,r14
3455
3456DB 102,15,56,222,209
3457DB 102,15,56,222,217
3458DB 102,15,56,222,225
3459DB 102,15,56,222,233
3460 pxor xmm11,xmm9
3461 pxor xmm12,xmm9
3462DB 102,15,56,222,241
3463 pxor xmm13,xmm9
3464 pxor xmm14,xmm9
3465DB 102,15,56,222,249
3466 movups xmm1,XMMWORD[48+r11]
3467 pxor xmm15,xmm9
3468
3469DB 102,15,56,222,208
3470DB 102,15,56,222,216
3471DB 102,15,56,222,224
3472DB 102,15,56,222,232
3473DB 102,15,56,222,240
3474DB 102,15,56,222,248
3475 movups xmm0,XMMWORD[64+r11]
3476 shl r12,4
3477 shl r13,4
3478 jmp NEAR $L$ocb_dec_loop6
3479
3480ALIGN 32
3481$L$ocb_dec_loop6:
3482DB 102,15,56,222,209
3483DB 102,15,56,222,217
3484DB 102,15,56,222,225
3485DB 102,15,56,222,233
3486DB 102,15,56,222,241
3487DB 102,15,56,222,249
3488 movups xmm1,XMMWORD[rax*1+rcx]
3489 add rax,32
3490
3491DB 102,15,56,222,208
3492DB 102,15,56,222,216
3493DB 102,15,56,222,224
3494DB 102,15,56,222,232
3495DB 102,15,56,222,240
3496DB 102,15,56,222,248
3497 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3498 jnz NEAR $L$ocb_dec_loop6
3499
3500DB 102,15,56,222,209
3501DB 102,15,56,222,217
3502DB 102,15,56,222,225
3503DB 102,15,56,222,233
3504DB 102,15,56,222,241
3505DB 102,15,56,222,249
3506 movups xmm1,XMMWORD[16+r11]
3507 shl r14,4
3508
3509DB 102,65,15,56,223,210
3510 movdqu xmm10,XMMWORD[rbx]
3511 mov rax,r10
3512DB 102,65,15,56,223,219
3513DB 102,65,15,56,223,228
3514DB 102,65,15,56,223,237
3515DB 102,65,15,56,223,246
3516DB 102,65,15,56,223,255
3517 DB 0F3h,0C3h ;repret
3518
3519
3520
3521ALIGN 32
3522__ocb_decrypt4:
3523 pxor xmm15,xmm9
3524 movdqu xmm11,XMMWORD[r12*1+rbx]
3525 movdqa xmm12,xmm10
3526 movdqu xmm13,XMMWORD[r13*1+rbx]
3527 pxor xmm10,xmm15
3528 pxor xmm11,xmm10
3529 pxor xmm2,xmm10
3530 pxor xmm12,xmm11
3531 pxor xmm3,xmm11
3532 pxor xmm13,xmm12
3533 pxor xmm4,xmm12
3534 pxor xmm5,xmm13
3535 movups xmm0,XMMWORD[32+r11]
3536
3537 pxor xmm10,xmm9
3538 pxor xmm11,xmm9
3539 pxor xmm12,xmm9
3540 pxor xmm13,xmm9
3541
3542DB 102,15,56,222,209
3543DB 102,15,56,222,217
3544DB 102,15,56,222,225
3545DB 102,15,56,222,233
3546 movups xmm1,XMMWORD[48+r11]
3547
3548DB 102,15,56,222,208
3549DB 102,15,56,222,216
3550DB 102,15,56,222,224
3551DB 102,15,56,222,232
3552 movups xmm0,XMMWORD[64+r11]
3553 jmp NEAR $L$ocb_dec_loop4
3554
3555ALIGN 32
3556$L$ocb_dec_loop4:
3557DB 102,15,56,222,209
3558DB 102,15,56,222,217
3559DB 102,15,56,222,225
3560DB 102,15,56,222,233
3561 movups xmm1,XMMWORD[rax*1+rcx]
3562 add rax,32
3563
3564DB 102,15,56,222,208
3565DB 102,15,56,222,216
3566DB 102,15,56,222,224
3567DB 102,15,56,222,232
3568 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3569 jnz NEAR $L$ocb_dec_loop4
3570
3571DB 102,15,56,222,209
3572DB 102,15,56,222,217
3573DB 102,15,56,222,225
3574DB 102,15,56,222,233
3575 movups xmm1,XMMWORD[16+r11]
3576 mov rax,r10
3577
3578DB 102,65,15,56,223,210
3579DB 102,65,15,56,223,219
3580DB 102,65,15,56,223,228
3581DB 102,65,15,56,223,237
3582 DB 0F3h,0C3h ;repret
3583
3584
3585
3586ALIGN 32
3587__ocb_decrypt1:
3588 pxor xmm7,xmm15
3589 pxor xmm7,xmm9
3590 pxor xmm2,xmm7
3591 movups xmm0,XMMWORD[32+r11]
3592
3593DB 102,15,56,222,209
3594 movups xmm1,XMMWORD[48+r11]
3595 pxor xmm7,xmm9
3596
3597DB 102,15,56,222,208
3598 movups xmm0,XMMWORD[64+r11]
3599 jmp NEAR $L$ocb_dec_loop1
3600
3601ALIGN 32
3602$L$ocb_dec_loop1:
3603DB 102,15,56,222,209
3604 movups xmm1,XMMWORD[rax*1+rcx]
3605 add rax,32
3606
3607DB 102,15,56,222,208
3608 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3609 jnz NEAR $L$ocb_dec_loop1
3610
3611DB 102,15,56,222,209
3612 movups xmm1,XMMWORD[16+r11]
3613 mov rax,r10
3614
3615DB 102,15,56,223,215
3616 DB 0F3h,0C3h ;repret
3617
Adam Langleye9ada862015-05-11 17:20:37 -07003618global aesni_cbc_encrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -08003619
3620ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07003621aesni_cbc_encrypt:
3622 mov QWORD[8+rsp],rdi ;WIN64 prologue
3623 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -08003624 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -07003625$L$SEH_begin_aesni_cbc_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -08003626 mov rdi,rcx
3627 mov rsi,rdx
3628 mov rdx,r8
3629 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -07003630 mov r8,QWORD[40+rsp]
3631 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003632
3633
3634 test rdx,rdx
Adam Langleye9ada862015-05-11 17:20:37 -07003635 jz NEAR $L$cbc_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08003636
Adam Langleye9ada862015-05-11 17:20:37 -07003637 mov r10d,DWORD[240+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003638 mov r11,rcx
3639 test r9d,r9d
Adam Langleye9ada862015-05-11 17:20:37 -07003640 jz NEAR $L$cbc_decrypt
Adam Langleyd9e397b2015-01-22 14:27:53 -08003641
Adam Langleye9ada862015-05-11 17:20:37 -07003642 movups xmm2,XMMWORD[r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003643 mov eax,r10d
3644 cmp rdx,16
Adam Langleye9ada862015-05-11 17:20:37 -07003645 jb NEAR $L$cbc_enc_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08003646 sub rdx,16
Adam Langleye9ada862015-05-11 17:20:37 -07003647 jmp NEAR $L$cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08003648ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07003649$L$cbc_enc_loop:
3650 movups xmm3,XMMWORD[rdi]
3651 lea rdi,[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003652
Adam Langleye9ada862015-05-11 17:20:37 -07003653 movups xmm0,XMMWORD[rcx]
3654 movups xmm1,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003655 xorps xmm3,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07003656 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003657 xorps xmm2,xmm3
Adam Langleye9ada862015-05-11 17:20:37 -07003658$L$oop_enc1_15:
Adam Langleyd9e397b2015-01-22 14:27:53 -08003659DB 102,15,56,220,209
3660 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07003661 movups xmm1,XMMWORD[rcx]
3662 lea rcx,[16+rcx]
3663 jnz NEAR $L$oop_enc1_15
Adam Langleyd9e397b2015-01-22 14:27:53 -08003664DB 102,15,56,221,209
3665 mov eax,r10d
3666 mov rcx,r11
Adam Langleye9ada862015-05-11 17:20:37 -07003667 movups XMMWORD[rsi],xmm2
3668 lea rsi,[16+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003669 sub rdx,16
Adam Langleye9ada862015-05-11 17:20:37 -07003670 jnc NEAR $L$cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08003671 add rdx,16
Adam Langleye9ada862015-05-11 17:20:37 -07003672 jnz NEAR $L$cbc_enc_tail
3673 pxor xmm0,xmm0
3674 pxor xmm1,xmm1
3675 movups XMMWORD[r8],xmm2
3676 pxor xmm2,xmm2
3677 pxor xmm3,xmm3
3678 jmp NEAR $L$cbc_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08003679
Adam Langleye9ada862015-05-11 17:20:37 -07003680$L$cbc_enc_tail:
Adam Langleyd9e397b2015-01-22 14:27:53 -08003681 mov rcx,rdx
3682 xchg rsi,rdi
Adam Langleye9ada862015-05-11 17:20:37 -07003683 DD 0x9066A4F3
Adam Langleyd9e397b2015-01-22 14:27:53 -08003684 mov ecx,16
3685 sub rcx,rdx
3686 xor eax,eax
Adam Langleye9ada862015-05-11 17:20:37 -07003687 DD 0x9066AAF3
3688 lea rdi,[((-16))+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003689 mov eax,r10d
3690 mov rsi,rdi
3691 mov rcx,r11
3692 xor rdx,rdx
Adam Langleye9ada862015-05-11 17:20:37 -07003693 jmp NEAR $L$cbc_enc_loop
Adam Langleyd9e397b2015-01-22 14:27:53 -08003694
3695ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07003696$L$cbc_decrypt:
3697 cmp rdx,16
3698 jne NEAR $L$cbc_decrypt_bulk
3699
3700
3701
3702 movdqu xmm2,XMMWORD[rdi]
3703 movdqu xmm3,XMMWORD[r8]
3704 movdqa xmm4,xmm2
3705 movups xmm0,XMMWORD[rcx]
3706 movups xmm1,XMMWORD[16+rcx]
3707 lea rcx,[32+rcx]
3708 xorps xmm2,xmm0
3709$L$oop_dec1_16:
3710DB 102,15,56,222,209
3711 dec r10d
3712 movups xmm1,XMMWORD[rcx]
3713 lea rcx,[16+rcx]
3714 jnz NEAR $L$oop_dec1_16
3715DB 102,15,56,223,209
3716 pxor xmm0,xmm0
3717 pxor xmm1,xmm1
3718 movdqu XMMWORD[r8],xmm4
3719 xorps xmm2,xmm3
3720 pxor xmm3,xmm3
3721 movups XMMWORD[rsi],xmm2
3722 pxor xmm2,xmm2
3723 jmp NEAR $L$cbc_ret
3724ALIGN 16
3725$L$cbc_decrypt_bulk:
Robert Sloana94fe052017-02-21 08:49:28 -08003726 lea r11,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003727 push rbp
3728 sub rsp,176
3729 and rsp,-16
Adam Langleye9ada862015-05-11 17:20:37 -07003730 movaps XMMWORD[16+rsp],xmm6
3731 movaps XMMWORD[32+rsp],xmm7
3732 movaps XMMWORD[48+rsp],xmm8
3733 movaps XMMWORD[64+rsp],xmm9
3734 movaps XMMWORD[80+rsp],xmm10
3735 movaps XMMWORD[96+rsp],xmm11
3736 movaps XMMWORD[112+rsp],xmm12
3737 movaps XMMWORD[128+rsp],xmm13
3738 movaps XMMWORD[144+rsp],xmm14
3739 movaps XMMWORD[160+rsp],xmm15
3740$L$cbc_decrypt_body:
Robert Sloana94fe052017-02-21 08:49:28 -08003741 mov rbp,rcx
Adam Langleye9ada862015-05-11 17:20:37 -07003742 movups xmm10,XMMWORD[r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003743 mov eax,r10d
Adam Langleye9ada862015-05-11 17:20:37 -07003744 cmp rdx,0x50
3745 jbe NEAR $L$cbc_dec_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08003746
Adam Langleye9ada862015-05-11 17:20:37 -07003747 movups xmm0,XMMWORD[rcx]
3748 movdqu xmm2,XMMWORD[rdi]
3749 movdqu xmm3,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003750 movdqa xmm11,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07003751 movdqu xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003752 movdqa xmm12,xmm3
Adam Langleye9ada862015-05-11 17:20:37 -07003753 movdqu xmm5,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003754 movdqa xmm13,xmm4
Adam Langleye9ada862015-05-11 17:20:37 -07003755 movdqu xmm6,XMMWORD[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003756 movdqa xmm14,xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07003757 movdqu xmm7,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003758 movdqa xmm15,xmm6
Robert Sloan2424d842017-05-01 07:46:28 -07003759 lea r9,[OPENSSL_ia32cap_P]
Robert Sloan572a4e22017-04-17 10:52:19 -07003760 mov r9d,DWORD[4+r9]
Adam Langleye9ada862015-05-11 17:20:37 -07003761 cmp rdx,0x70
3762 jbe NEAR $L$cbc_dec_six_or_seven
Adam Langleyd9e397b2015-01-22 14:27:53 -08003763
3764 and r9d,71303168
Adam Langleye9ada862015-05-11 17:20:37 -07003765 sub rdx,0x50
Adam Langleyd9e397b2015-01-22 14:27:53 -08003766 cmp r9d,4194304
Adam Langleye9ada862015-05-11 17:20:37 -07003767 je NEAR $L$cbc_dec_loop6_enter
3768 sub rdx,0x20
3769 lea rcx,[112+rcx]
3770 jmp NEAR $L$cbc_dec_loop8_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -08003771ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07003772$L$cbc_dec_loop8:
3773 movups XMMWORD[rsi],xmm9
3774 lea rsi,[16+rsi]
3775$L$cbc_dec_loop8_enter:
3776 movdqu xmm8,XMMWORD[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003777 pxor xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07003778 movdqu xmm9,XMMWORD[112+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003779 pxor xmm3,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07003780 movups xmm1,XMMWORD[((16-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003781 pxor xmm4,xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08003782 mov rbp,-1
Adam Langleye9ada862015-05-11 17:20:37 -07003783 cmp rdx,0x70
Adam Langleyd9e397b2015-01-22 14:27:53 -08003784 pxor xmm5,xmm0
3785 pxor xmm6,xmm0
3786 pxor xmm7,xmm0
3787 pxor xmm8,xmm0
3788
3789DB 102,15,56,222,209
3790 pxor xmm9,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07003791 movups xmm0,XMMWORD[((32-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003792DB 102,15,56,222,217
3793DB 102,15,56,222,225
3794DB 102,15,56,222,233
3795DB 102,15,56,222,241
3796DB 102,15,56,222,249
3797DB 102,68,15,56,222,193
Robert Sloana94fe052017-02-21 08:49:28 -08003798 adc rbp,0
3799 and rbp,128
Adam Langleyd9e397b2015-01-22 14:27:53 -08003800DB 102,68,15,56,222,201
Robert Sloana94fe052017-02-21 08:49:28 -08003801 add rbp,rdi
Adam Langleye9ada862015-05-11 17:20:37 -07003802 movups xmm1,XMMWORD[((48-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003803DB 102,15,56,222,208
3804DB 102,15,56,222,216
3805DB 102,15,56,222,224
3806DB 102,15,56,222,232
3807DB 102,15,56,222,240
3808DB 102,15,56,222,248
3809DB 102,68,15,56,222,192
3810DB 102,68,15,56,222,200
Adam Langleye9ada862015-05-11 17:20:37 -07003811 movups xmm0,XMMWORD[((64-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003812 nop
3813DB 102,15,56,222,209
3814DB 102,15,56,222,217
3815DB 102,15,56,222,225
3816DB 102,15,56,222,233
3817DB 102,15,56,222,241
3818DB 102,15,56,222,249
3819DB 102,68,15,56,222,193
3820DB 102,68,15,56,222,201
Adam Langleye9ada862015-05-11 17:20:37 -07003821 movups xmm1,XMMWORD[((80-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003822 nop
3823DB 102,15,56,222,208
3824DB 102,15,56,222,216
3825DB 102,15,56,222,224
3826DB 102,15,56,222,232
3827DB 102,15,56,222,240
3828DB 102,15,56,222,248
3829DB 102,68,15,56,222,192
3830DB 102,68,15,56,222,200
Adam Langleye9ada862015-05-11 17:20:37 -07003831 movups xmm0,XMMWORD[((96-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003832 nop
3833DB 102,15,56,222,209
3834DB 102,15,56,222,217
3835DB 102,15,56,222,225
3836DB 102,15,56,222,233
3837DB 102,15,56,222,241
3838DB 102,15,56,222,249
3839DB 102,68,15,56,222,193
3840DB 102,68,15,56,222,201
Adam Langleye9ada862015-05-11 17:20:37 -07003841 movups xmm1,XMMWORD[((112-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003842 nop
3843DB 102,15,56,222,208
3844DB 102,15,56,222,216
3845DB 102,15,56,222,224
3846DB 102,15,56,222,232
3847DB 102,15,56,222,240
3848DB 102,15,56,222,248
3849DB 102,68,15,56,222,192
3850DB 102,68,15,56,222,200
Adam Langleye9ada862015-05-11 17:20:37 -07003851 movups xmm0,XMMWORD[((128-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003852 nop
3853DB 102,15,56,222,209
3854DB 102,15,56,222,217
3855DB 102,15,56,222,225
3856DB 102,15,56,222,233
3857DB 102,15,56,222,241
3858DB 102,15,56,222,249
3859DB 102,68,15,56,222,193
3860DB 102,68,15,56,222,201
Adam Langleye9ada862015-05-11 17:20:37 -07003861 movups xmm1,XMMWORD[((144-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003862 cmp eax,11
3863DB 102,15,56,222,208
3864DB 102,15,56,222,216
3865DB 102,15,56,222,224
3866DB 102,15,56,222,232
3867DB 102,15,56,222,240
3868DB 102,15,56,222,248
3869DB 102,68,15,56,222,192
3870DB 102,68,15,56,222,200
Adam Langleye9ada862015-05-11 17:20:37 -07003871 movups xmm0,XMMWORD[((160-112))+rcx]
3872 jb NEAR $L$cbc_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08003873DB 102,15,56,222,209
3874DB 102,15,56,222,217
3875DB 102,15,56,222,225
3876DB 102,15,56,222,233
3877DB 102,15,56,222,241
3878DB 102,15,56,222,249
3879DB 102,68,15,56,222,193
3880DB 102,68,15,56,222,201
Adam Langleye9ada862015-05-11 17:20:37 -07003881 movups xmm1,XMMWORD[((176-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003882 nop
3883DB 102,15,56,222,208
3884DB 102,15,56,222,216
3885DB 102,15,56,222,224
3886DB 102,15,56,222,232
3887DB 102,15,56,222,240
3888DB 102,15,56,222,248
3889DB 102,68,15,56,222,192
3890DB 102,68,15,56,222,200
Adam Langleye9ada862015-05-11 17:20:37 -07003891 movups xmm0,XMMWORD[((192-112))+rcx]
3892 je NEAR $L$cbc_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08003893DB 102,15,56,222,209
3894DB 102,15,56,222,217
3895DB 102,15,56,222,225
3896DB 102,15,56,222,233
3897DB 102,15,56,222,241
3898DB 102,15,56,222,249
3899DB 102,68,15,56,222,193
3900DB 102,68,15,56,222,201
Adam Langleye9ada862015-05-11 17:20:37 -07003901 movups xmm1,XMMWORD[((208-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003902 nop
3903DB 102,15,56,222,208
3904DB 102,15,56,222,216
3905DB 102,15,56,222,224
3906DB 102,15,56,222,232
3907DB 102,15,56,222,240
3908DB 102,15,56,222,248
3909DB 102,68,15,56,222,192
3910DB 102,68,15,56,222,200
Adam Langleye9ada862015-05-11 17:20:37 -07003911 movups xmm0,XMMWORD[((224-112))+rcx]
3912 jmp NEAR $L$cbc_dec_done
Adam Langleyd9e397b2015-01-22 14:27:53 -08003913ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07003914$L$cbc_dec_done:
Adam Langleyd9e397b2015-01-22 14:27:53 -08003915DB 102,15,56,222,209
3916DB 102,15,56,222,217
3917 pxor xmm10,xmm0
3918 pxor xmm11,xmm0
3919DB 102,15,56,222,225
3920DB 102,15,56,222,233
3921 pxor xmm12,xmm0
3922 pxor xmm13,xmm0
3923DB 102,15,56,222,241
3924DB 102,15,56,222,249
3925 pxor xmm14,xmm0
3926 pxor xmm15,xmm0
3927DB 102,68,15,56,222,193
3928DB 102,68,15,56,222,201
Adam Langleye9ada862015-05-11 17:20:37 -07003929 movdqu xmm1,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003930
3931DB 102,65,15,56,223,210
Adam Langleye9ada862015-05-11 17:20:37 -07003932 movdqu xmm10,XMMWORD[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003933 pxor xmm1,xmm0
3934DB 102,65,15,56,223,219
3935 pxor xmm10,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07003936 movdqu xmm0,XMMWORD[112+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003937DB 102,65,15,56,223,228
Adam Langleye9ada862015-05-11 17:20:37 -07003938 lea rdi,[128+rdi]
Robert Sloana94fe052017-02-21 08:49:28 -08003939 movdqu xmm11,XMMWORD[rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003940DB 102,65,15,56,223,237
3941DB 102,65,15,56,223,246
Robert Sloana94fe052017-02-21 08:49:28 -08003942 movdqu xmm12,XMMWORD[16+rbp]
3943 movdqu xmm13,XMMWORD[32+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003944DB 102,65,15,56,223,255
3945DB 102,68,15,56,223,193
Robert Sloana94fe052017-02-21 08:49:28 -08003946 movdqu xmm14,XMMWORD[48+rbp]
3947 movdqu xmm15,XMMWORD[64+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003948DB 102,69,15,56,223,202
3949 movdqa xmm10,xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08003950 movdqu xmm1,XMMWORD[80+rbp]
Adam Langleye9ada862015-05-11 17:20:37 -07003951 movups xmm0,XMMWORD[((-112))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003952
Adam Langleye9ada862015-05-11 17:20:37 -07003953 movups XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08003954 movdqa xmm2,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07003955 movups XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08003956 movdqa xmm3,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07003957 movups XMMWORD[32+rsi],xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08003958 movdqa xmm4,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07003959 movups XMMWORD[48+rsi],xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08003960 movdqa xmm5,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07003961 movups XMMWORD[64+rsi],xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08003962 movdqa xmm6,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07003963 movups XMMWORD[80+rsi],xmm7
Adam Langleyd9e397b2015-01-22 14:27:53 -08003964 movdqa xmm7,xmm1
Adam Langleye9ada862015-05-11 17:20:37 -07003965 movups XMMWORD[96+rsi],xmm8
3966 lea rsi,[112+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08003967
Adam Langleye9ada862015-05-11 17:20:37 -07003968 sub rdx,0x80
3969 ja NEAR $L$cbc_dec_loop8
Adam Langleyd9e397b2015-01-22 14:27:53 -08003970
3971 movaps xmm2,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07003972 lea rcx,[((-112))+rcx]
3973 add rdx,0x70
3974 jle NEAR $L$cbc_dec_clear_tail_collected
3975 movups XMMWORD[rsi],xmm9
3976 lea rsi,[16+rsi]
3977 cmp rdx,0x50
3978 jbe NEAR $L$cbc_dec_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08003979
3980 movaps xmm2,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07003981$L$cbc_dec_six_or_seven:
3982 cmp rdx,0x60
3983 ja NEAR $L$cbc_dec_seven
Adam Langleyd9e397b2015-01-22 14:27:53 -08003984
3985 movaps xmm8,xmm7
3986 call _aesni_decrypt6
3987 pxor xmm2,xmm10
3988 movaps xmm10,xmm8
3989 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07003990 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08003991 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07003992 movdqu XMMWORD[16+rsi],xmm3
3993 pxor xmm3,xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08003994 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07003995 movdqu XMMWORD[32+rsi],xmm4
3996 pxor xmm4,xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08003997 pxor xmm6,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07003998 movdqu XMMWORD[48+rsi],xmm5
3999 pxor xmm5,xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08004000 pxor xmm7,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07004001 movdqu XMMWORD[64+rsi],xmm6
4002 pxor xmm6,xmm6
4003 lea rsi,[80+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004004 movdqa xmm2,xmm7
Adam Langleye9ada862015-05-11 17:20:37 -07004005 pxor xmm7,xmm7
4006 jmp NEAR $L$cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08004007
4008ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004009$L$cbc_dec_seven:
4010 movups xmm8,XMMWORD[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004011 xorps xmm9,xmm9
4012 call _aesni_decrypt8
Adam Langleye9ada862015-05-11 17:20:37 -07004013 movups xmm9,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004014 pxor xmm2,xmm10
Adam Langleye9ada862015-05-11 17:20:37 -07004015 movups xmm10,XMMWORD[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004016 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07004017 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004018 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07004019 movdqu XMMWORD[16+rsi],xmm3
4020 pxor xmm3,xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08004021 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07004022 movdqu XMMWORD[32+rsi],xmm4
4023 pxor xmm4,xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08004024 pxor xmm6,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07004025 movdqu XMMWORD[48+rsi],xmm5
4026 pxor xmm5,xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08004027 pxor xmm7,xmm15
Adam Langleye9ada862015-05-11 17:20:37 -07004028 movdqu XMMWORD[64+rsi],xmm6
4029 pxor xmm6,xmm6
Adam Langleyd9e397b2015-01-22 14:27:53 -08004030 pxor xmm8,xmm9
Adam Langleye9ada862015-05-11 17:20:37 -07004031 movdqu XMMWORD[80+rsi],xmm7
4032 pxor xmm7,xmm7
4033 lea rsi,[96+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004034 movdqa xmm2,xmm8
Adam Langleye9ada862015-05-11 17:20:37 -07004035 pxor xmm8,xmm8
4036 pxor xmm9,xmm9
4037 jmp NEAR $L$cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08004038
4039ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004040$L$cbc_dec_loop6:
4041 movups XMMWORD[rsi],xmm7
4042 lea rsi,[16+rsi]
4043 movdqu xmm2,XMMWORD[rdi]
4044 movdqu xmm3,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004045 movdqa xmm11,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07004046 movdqu xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004047 movdqa xmm12,xmm3
Adam Langleye9ada862015-05-11 17:20:37 -07004048 movdqu xmm5,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004049 movdqa xmm13,xmm4
Adam Langleye9ada862015-05-11 17:20:37 -07004050 movdqu xmm6,XMMWORD[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004051 movdqa xmm14,xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07004052 movdqu xmm7,XMMWORD[80+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004053 movdqa xmm15,xmm6
Adam Langleye9ada862015-05-11 17:20:37 -07004054$L$cbc_dec_loop6_enter:
4055 lea rdi,[96+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004056 movdqa xmm8,xmm7
4057
4058 call _aesni_decrypt6
4059
4060 pxor xmm2,xmm10
4061 movdqa xmm10,xmm8
4062 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07004063 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004064 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07004065 movdqu XMMWORD[16+rsi],xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08004066 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07004067 movdqu XMMWORD[32+rsi],xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08004068 pxor xmm6,xmm14
Robert Sloana94fe052017-02-21 08:49:28 -08004069 mov rcx,rbp
Adam Langleye9ada862015-05-11 17:20:37 -07004070 movdqu XMMWORD[48+rsi],xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08004071 pxor xmm7,xmm15
4072 mov eax,r10d
Adam Langleye9ada862015-05-11 17:20:37 -07004073 movdqu XMMWORD[64+rsi],xmm6
4074 lea rsi,[80+rsi]
4075 sub rdx,0x60
4076 ja NEAR $L$cbc_dec_loop6
Adam Langleyd9e397b2015-01-22 14:27:53 -08004077
4078 movdqa xmm2,xmm7
Adam Langleye9ada862015-05-11 17:20:37 -07004079 add rdx,0x50
4080 jle NEAR $L$cbc_dec_clear_tail_collected
4081 movups XMMWORD[rsi],xmm7
4082 lea rsi,[16+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004083
Adam Langleye9ada862015-05-11 17:20:37 -07004084$L$cbc_dec_tail:
4085 movups xmm2,XMMWORD[rdi]
4086 sub rdx,0x10
4087 jbe NEAR $L$cbc_dec_one
Adam Langleyd9e397b2015-01-22 14:27:53 -08004088
Adam Langleye9ada862015-05-11 17:20:37 -07004089 movups xmm3,XMMWORD[16+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004090 movaps xmm11,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07004091 sub rdx,0x10
4092 jbe NEAR $L$cbc_dec_two
Adam Langleyd9e397b2015-01-22 14:27:53 -08004093
Adam Langleye9ada862015-05-11 17:20:37 -07004094 movups xmm4,XMMWORD[32+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004095 movaps xmm12,xmm3
Adam Langleye9ada862015-05-11 17:20:37 -07004096 sub rdx,0x10
4097 jbe NEAR $L$cbc_dec_three
Adam Langleyd9e397b2015-01-22 14:27:53 -08004098
Adam Langleye9ada862015-05-11 17:20:37 -07004099 movups xmm5,XMMWORD[48+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004100 movaps xmm13,xmm4
Adam Langleye9ada862015-05-11 17:20:37 -07004101 sub rdx,0x10
4102 jbe NEAR $L$cbc_dec_four
Adam Langleyd9e397b2015-01-22 14:27:53 -08004103
Adam Langleye9ada862015-05-11 17:20:37 -07004104 movups xmm6,XMMWORD[64+rdi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004105 movaps xmm14,xmm5
4106 movaps xmm15,xmm6
4107 xorps xmm7,xmm7
4108 call _aesni_decrypt6
4109 pxor xmm2,xmm10
4110 movaps xmm10,xmm15
4111 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07004112 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004113 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07004114 movdqu XMMWORD[16+rsi],xmm3
4115 pxor xmm3,xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08004116 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07004117 movdqu XMMWORD[32+rsi],xmm4
4118 pxor xmm4,xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08004119 pxor xmm6,xmm14
Adam Langleye9ada862015-05-11 17:20:37 -07004120 movdqu XMMWORD[48+rsi],xmm5
4121 pxor xmm5,xmm5
4122 lea rsi,[64+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004123 movdqa xmm2,xmm6
Adam Langleye9ada862015-05-11 17:20:37 -07004124 pxor xmm6,xmm6
4125 pxor xmm7,xmm7
4126 sub rdx,0x10
4127 jmp NEAR $L$cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08004128
4129ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004130$L$cbc_dec_one:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004131 movaps xmm11,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07004132 movups xmm0,XMMWORD[rcx]
4133 movups xmm1,XMMWORD[16+rcx]
4134 lea rcx,[32+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004135 xorps xmm2,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -07004136$L$oop_dec1_17:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004137DB 102,15,56,222,209
4138 dec eax
Adam Langleye9ada862015-05-11 17:20:37 -07004139 movups xmm1,XMMWORD[rcx]
4140 lea rcx,[16+rcx]
4141 jnz NEAR $L$oop_dec1_17
Adam Langleyd9e397b2015-01-22 14:27:53 -08004142DB 102,15,56,223,209
4143 xorps xmm2,xmm10
4144 movaps xmm10,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07004145 jmp NEAR $L$cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08004146ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004147$L$cbc_dec_two:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004148 movaps xmm12,xmm3
4149 call _aesni_decrypt2
4150 pxor xmm2,xmm10
4151 movaps xmm10,xmm12
4152 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07004153 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004154 movdqa xmm2,xmm3
Adam Langleye9ada862015-05-11 17:20:37 -07004155 pxor xmm3,xmm3
4156 lea rsi,[16+rsi]
4157 jmp NEAR $L$cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08004158ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004159$L$cbc_dec_three:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004160 movaps xmm13,xmm4
4161 call _aesni_decrypt3
4162 pxor xmm2,xmm10
4163 movaps xmm10,xmm13
4164 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07004165 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004166 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07004167 movdqu XMMWORD[16+rsi],xmm3
4168 pxor xmm3,xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08004169 movdqa xmm2,xmm4
Adam Langleye9ada862015-05-11 17:20:37 -07004170 pxor xmm4,xmm4
4171 lea rsi,[32+rsi]
4172 jmp NEAR $L$cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08004173ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004174$L$cbc_dec_four:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004175 movaps xmm14,xmm5
4176 call _aesni_decrypt4
4177 pxor xmm2,xmm10
4178 movaps xmm10,xmm14
4179 pxor xmm3,xmm11
Adam Langleye9ada862015-05-11 17:20:37 -07004180 movdqu XMMWORD[rsi],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004181 pxor xmm4,xmm12
Adam Langleye9ada862015-05-11 17:20:37 -07004182 movdqu XMMWORD[16+rsi],xmm3
4183 pxor xmm3,xmm3
Adam Langleyd9e397b2015-01-22 14:27:53 -08004184 pxor xmm5,xmm13
Adam Langleye9ada862015-05-11 17:20:37 -07004185 movdqu XMMWORD[32+rsi],xmm4
4186 pxor xmm4,xmm4
Adam Langleyd9e397b2015-01-22 14:27:53 -08004187 movdqa xmm2,xmm5
Adam Langleye9ada862015-05-11 17:20:37 -07004188 pxor xmm5,xmm5
4189 lea rsi,[48+rsi]
4190 jmp NEAR $L$cbc_dec_tail_collected
Adam Langleyd9e397b2015-01-22 14:27:53 -08004191
4192ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004193$L$cbc_dec_clear_tail_collected:
4194 pxor xmm3,xmm3
4195 pxor xmm4,xmm4
4196 pxor xmm5,xmm5
4197$L$cbc_dec_tail_collected:
4198 movups XMMWORD[r8],xmm10
Adam Langleyd9e397b2015-01-22 14:27:53 -08004199 and rdx,15
Adam Langleye9ada862015-05-11 17:20:37 -07004200 jnz NEAR $L$cbc_dec_tail_partial
4201 movups XMMWORD[rsi],xmm2
4202 pxor xmm2,xmm2
4203 jmp NEAR $L$cbc_dec_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08004204ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004205$L$cbc_dec_tail_partial:
4206 movaps XMMWORD[rsp],xmm2
4207 pxor xmm2,xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004208 mov rcx,16
4209 mov rdi,rsi
4210 sub rcx,rdx
Adam Langleye9ada862015-05-11 17:20:37 -07004211 lea rsi,[rsp]
4212 DD 0x9066A4F3
4213 movdqa XMMWORD[rsp],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004214
Adam Langleye9ada862015-05-11 17:20:37 -07004215$L$cbc_dec_ret:
4216 xorps xmm0,xmm0
4217 pxor xmm1,xmm1
4218 movaps xmm6,XMMWORD[16+rsp]
4219 movaps XMMWORD[16+rsp],xmm0
4220 movaps xmm7,XMMWORD[32+rsp]
4221 movaps XMMWORD[32+rsp],xmm0
4222 movaps xmm8,XMMWORD[48+rsp]
4223 movaps XMMWORD[48+rsp],xmm0
4224 movaps xmm9,XMMWORD[64+rsp]
4225 movaps XMMWORD[64+rsp],xmm0
4226 movaps xmm10,XMMWORD[80+rsp]
4227 movaps XMMWORD[80+rsp],xmm0
4228 movaps xmm11,XMMWORD[96+rsp]
4229 movaps XMMWORD[96+rsp],xmm0
4230 movaps xmm12,XMMWORD[112+rsp]
4231 movaps XMMWORD[112+rsp],xmm0
4232 movaps xmm13,XMMWORD[128+rsp]
4233 movaps XMMWORD[128+rsp],xmm0
4234 movaps xmm14,XMMWORD[144+rsp]
4235 movaps XMMWORD[144+rsp],xmm0
4236 movaps xmm15,XMMWORD[160+rsp]
4237 movaps XMMWORD[160+rsp],xmm0
Robert Sloana94fe052017-02-21 08:49:28 -08004238 mov rbp,QWORD[((-8))+r11]
4239 lea rsp,[r11]
Adam Langleye9ada862015-05-11 17:20:37 -07004240$L$cbc_ret:
4241 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4242 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004243 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07004244$L$SEH_end_aesni_cbc_encrypt:
4245global aesni_set_decrypt_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08004246
4247ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004248aesni_set_decrypt_key:
4249DB 0x48,0x83,0xEC,0x08
Adam Langleyd9e397b2015-01-22 14:27:53 -08004250 call __aesni_set_encrypt_key
4251 shl edx,4
4252 test eax,eax
Adam Langleye9ada862015-05-11 17:20:37 -07004253 jnz NEAR $L$dec_key_ret
4254 lea rcx,[16+rdx*1+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004255
Adam Langleye9ada862015-05-11 17:20:37 -07004256 movups xmm0,XMMWORD[r8]
4257 movups xmm1,XMMWORD[rcx]
4258 movups XMMWORD[rcx],xmm0
4259 movups XMMWORD[r8],xmm1
4260 lea r8,[16+r8]
4261 lea rcx,[((-16))+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004262
Adam Langleye9ada862015-05-11 17:20:37 -07004263$L$dec_key_inverse:
4264 movups xmm0,XMMWORD[r8]
4265 movups xmm1,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004266DB 102,15,56,219,192
4267DB 102,15,56,219,201
Adam Langleye9ada862015-05-11 17:20:37 -07004268 lea r8,[16+r8]
4269 lea rcx,[((-16))+rcx]
4270 movups XMMWORD[16+rcx],xmm0
4271 movups XMMWORD[(-16)+r8],xmm1
Adam Langleyd9e397b2015-01-22 14:27:53 -08004272 cmp rcx,r8
Adam Langleye9ada862015-05-11 17:20:37 -07004273 ja NEAR $L$dec_key_inverse
Adam Langleyd9e397b2015-01-22 14:27:53 -08004274
Adam Langleye9ada862015-05-11 17:20:37 -07004275 movups xmm0,XMMWORD[r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004276DB 102,15,56,219,192
Adam Langleye9ada862015-05-11 17:20:37 -07004277 pxor xmm1,xmm1
4278 movups XMMWORD[rcx],xmm0
4279 pxor xmm0,xmm0
4280$L$dec_key_ret:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004281 add rsp,8
4282 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07004283$L$SEH_end_set_decrypt_key:
4284
4285global aesni_set_encrypt_key
Adam Langleyd9e397b2015-01-22 14:27:53 -08004286
4287ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004288aesni_set_encrypt_key:
4289__aesni_set_encrypt_key:
4290DB 0x48,0x83,0xEC,0x08
Adam Langleyd9e397b2015-01-22 14:27:53 -08004291 mov rax,-1
4292 test rcx,rcx
Adam Langleye9ada862015-05-11 17:20:37 -07004293 jz NEAR $L$enc_key_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08004294 test r8,r8
Adam Langleye9ada862015-05-11 17:20:37 -07004295 jz NEAR $L$enc_key_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08004296
Adam Langleye9ada862015-05-11 17:20:37 -07004297 movups xmm0,XMMWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004298 xorps xmm4,xmm4
Robert Sloan2424d842017-05-01 07:46:28 -07004299 lea r10,[OPENSSL_ia32cap_P]
Robert Sloan572a4e22017-04-17 10:52:19 -07004300 mov r10d,DWORD[4+r10]
4301 and r10d,268437504
Adam Langleye9ada862015-05-11 17:20:37 -07004302 lea rax,[16+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004303 cmp edx,256
Adam Langleye9ada862015-05-11 17:20:37 -07004304 je NEAR $L$14rounds
Adam Langleyd9e397b2015-01-22 14:27:53 -08004305 cmp edx,192
Adam Langleye9ada862015-05-11 17:20:37 -07004306 je NEAR $L$12rounds
Adam Langleyd9e397b2015-01-22 14:27:53 -08004307 cmp edx,128
Adam Langleye9ada862015-05-11 17:20:37 -07004308 jne NEAR $L$bad_keybits
Adam Langleyd9e397b2015-01-22 14:27:53 -08004309
Adam Langleye9ada862015-05-11 17:20:37 -07004310$L$10rounds:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004311 mov edx,9
Adam Langleye9ada862015-05-11 17:20:37 -07004312 cmp r10d,268435456
4313 je NEAR $L$10rounds_alt
4314
4315 movups XMMWORD[r8],xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08004316DB 102,15,58,223,200,1
4317 call $L$key_expansion_128_cold
4318DB 102,15,58,223,200,2
4319 call $L$key_expansion_128
4320DB 102,15,58,223,200,4
4321 call $L$key_expansion_128
4322DB 102,15,58,223,200,8
4323 call $L$key_expansion_128
4324DB 102,15,58,223,200,16
4325 call $L$key_expansion_128
4326DB 102,15,58,223,200,32
4327 call $L$key_expansion_128
4328DB 102,15,58,223,200,64
4329 call $L$key_expansion_128
4330DB 102,15,58,223,200,128
4331 call $L$key_expansion_128
4332DB 102,15,58,223,200,27
4333 call $L$key_expansion_128
4334DB 102,15,58,223,200,54
4335 call $L$key_expansion_128
Adam Langleye9ada862015-05-11 17:20:37 -07004336 movups XMMWORD[rax],xmm0
4337 mov DWORD[80+rax],edx
Adam Langleyd9e397b2015-01-22 14:27:53 -08004338 xor eax,eax
Adam Langleye9ada862015-05-11 17:20:37 -07004339 jmp NEAR $L$enc_key_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08004340
4341ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004342$L$10rounds_alt:
4343 movdqa xmm5,XMMWORD[$L$key_rotate]
4344 mov r10d,8
4345 movdqa xmm4,XMMWORD[$L$key_rcon1]
4346 movdqa xmm2,xmm0
4347 movdqu XMMWORD[r8],xmm0
4348 jmp NEAR $L$oop_key128
4349
4350ALIGN 16
4351$L$oop_key128:
4352DB 102,15,56,0,197
4353DB 102,15,56,221,196
4354 pslld xmm4,1
4355 lea rax,[16+rax]
4356
4357 movdqa xmm3,xmm2
4358 pslldq xmm2,4
4359 pxor xmm3,xmm2
4360 pslldq xmm2,4
4361 pxor xmm3,xmm2
4362 pslldq xmm2,4
4363 pxor xmm2,xmm3
4364
4365 pxor xmm0,xmm2
4366 movdqu XMMWORD[(-16)+rax],xmm0
4367 movdqa xmm2,xmm0
4368
4369 dec r10d
4370 jnz NEAR $L$oop_key128
4371
4372 movdqa xmm4,XMMWORD[$L$key_rcon1b]
4373
4374DB 102,15,56,0,197
4375DB 102,15,56,221,196
4376 pslld xmm4,1
4377
4378 movdqa xmm3,xmm2
4379 pslldq xmm2,4
4380 pxor xmm3,xmm2
4381 pslldq xmm2,4
4382 pxor xmm3,xmm2
4383 pslldq xmm2,4
4384 pxor xmm2,xmm3
4385
4386 pxor xmm0,xmm2
4387 movdqu XMMWORD[rax],xmm0
4388
4389 movdqa xmm2,xmm0
4390DB 102,15,56,0,197
4391DB 102,15,56,221,196
4392
4393 movdqa xmm3,xmm2
4394 pslldq xmm2,4
4395 pxor xmm3,xmm2
4396 pslldq xmm2,4
4397 pxor xmm3,xmm2
4398 pslldq xmm2,4
4399 pxor xmm2,xmm3
4400
4401 pxor xmm0,xmm2
4402 movdqu XMMWORD[16+rax],xmm0
4403
4404 mov DWORD[96+rax],edx
4405 xor eax,eax
4406 jmp NEAR $L$enc_key_ret
4407
4408ALIGN 16
4409$L$12rounds:
4410 movq xmm2,QWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004411 mov edx,11
Adam Langleye9ada862015-05-11 17:20:37 -07004412 cmp r10d,268435456
4413 je NEAR $L$12rounds_alt
4414
4415 movups XMMWORD[r8],xmm0
Adam Langleyd9e397b2015-01-22 14:27:53 -08004416DB 102,15,58,223,202,1
4417 call $L$key_expansion_192a_cold
4418DB 102,15,58,223,202,2
4419 call $L$key_expansion_192b
4420DB 102,15,58,223,202,4
4421 call $L$key_expansion_192a
4422DB 102,15,58,223,202,8
4423 call $L$key_expansion_192b
4424DB 102,15,58,223,202,16
4425 call $L$key_expansion_192a
4426DB 102,15,58,223,202,32
4427 call $L$key_expansion_192b
4428DB 102,15,58,223,202,64
4429 call $L$key_expansion_192a
4430DB 102,15,58,223,202,128
4431 call $L$key_expansion_192b
Adam Langleye9ada862015-05-11 17:20:37 -07004432 movups XMMWORD[rax],xmm0
4433 mov DWORD[48+rax],edx
Adam Langleyd9e397b2015-01-22 14:27:53 -08004434 xor rax,rax
Adam Langleye9ada862015-05-11 17:20:37 -07004435 jmp NEAR $L$enc_key_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08004436
4437ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004438$L$12rounds_alt:
4439 movdqa xmm5,XMMWORD[$L$key_rotate192]
4440 movdqa xmm4,XMMWORD[$L$key_rcon1]
4441 mov r10d,8
4442 movdqu XMMWORD[r8],xmm0
4443 jmp NEAR $L$oop_key192
4444
4445ALIGN 16
4446$L$oop_key192:
4447 movq QWORD[rax],xmm2
4448 movdqa xmm1,xmm2
4449DB 102,15,56,0,213
4450DB 102,15,56,221,212
4451 pslld xmm4,1
4452 lea rax,[24+rax]
4453
4454 movdqa xmm3,xmm0
4455 pslldq xmm0,4
4456 pxor xmm3,xmm0
4457 pslldq xmm0,4
4458 pxor xmm3,xmm0
4459 pslldq xmm0,4
4460 pxor xmm0,xmm3
4461
4462 pshufd xmm3,xmm0,0xff
4463 pxor xmm3,xmm1
4464 pslldq xmm1,4
4465 pxor xmm3,xmm1
4466
4467 pxor xmm0,xmm2
4468 pxor xmm2,xmm3
4469 movdqu XMMWORD[(-16)+rax],xmm0
4470
4471 dec r10d
4472 jnz NEAR $L$oop_key192
4473
4474 mov DWORD[32+rax],edx
4475 xor eax,eax
4476 jmp NEAR $L$enc_key_ret
4477
4478ALIGN 16
4479$L$14rounds:
4480 movups xmm2,XMMWORD[16+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004481 mov edx,13
Adam Langleye9ada862015-05-11 17:20:37 -07004482 lea rax,[16+rax]
4483 cmp r10d,268435456
4484 je NEAR $L$14rounds_alt
4485
4486 movups XMMWORD[r8],xmm0
4487 movups XMMWORD[16+r8],xmm2
Adam Langleyd9e397b2015-01-22 14:27:53 -08004488DB 102,15,58,223,202,1
4489 call $L$key_expansion_256a_cold
4490DB 102,15,58,223,200,1
4491 call $L$key_expansion_256b
4492DB 102,15,58,223,202,2
4493 call $L$key_expansion_256a
4494DB 102,15,58,223,200,2
4495 call $L$key_expansion_256b
4496DB 102,15,58,223,202,4
4497 call $L$key_expansion_256a
4498DB 102,15,58,223,200,4
4499 call $L$key_expansion_256b
4500DB 102,15,58,223,202,8
4501 call $L$key_expansion_256a
4502DB 102,15,58,223,200,8
4503 call $L$key_expansion_256b
4504DB 102,15,58,223,202,16
4505 call $L$key_expansion_256a
4506DB 102,15,58,223,200,16
4507 call $L$key_expansion_256b
4508DB 102,15,58,223,202,32
4509 call $L$key_expansion_256a
4510DB 102,15,58,223,200,32
4511 call $L$key_expansion_256b
4512DB 102,15,58,223,202,64
4513 call $L$key_expansion_256a
Adam Langleye9ada862015-05-11 17:20:37 -07004514 movups XMMWORD[rax],xmm0
4515 mov DWORD[16+rax],edx
Adam Langleyd9e397b2015-01-22 14:27:53 -08004516 xor rax,rax
Adam Langleye9ada862015-05-11 17:20:37 -07004517 jmp NEAR $L$enc_key_ret
Adam Langleyd9e397b2015-01-22 14:27:53 -08004518
4519ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004520$L$14rounds_alt:
4521 movdqa xmm5,XMMWORD[$L$key_rotate]
4522 movdqa xmm4,XMMWORD[$L$key_rcon1]
4523 mov r10d,7
4524 movdqu XMMWORD[r8],xmm0
4525 movdqa xmm1,xmm2
4526 movdqu XMMWORD[16+r8],xmm2
4527 jmp NEAR $L$oop_key256
4528
4529ALIGN 16
4530$L$oop_key256:
4531DB 102,15,56,0,213
4532DB 102,15,56,221,212
4533
4534 movdqa xmm3,xmm0
4535 pslldq xmm0,4
4536 pxor xmm3,xmm0
4537 pslldq xmm0,4
4538 pxor xmm3,xmm0
4539 pslldq xmm0,4
4540 pxor xmm0,xmm3
4541 pslld xmm4,1
4542
4543 pxor xmm0,xmm2
4544 movdqu XMMWORD[rax],xmm0
4545
4546 dec r10d
4547 jz NEAR $L$done_key256
4548
4549 pshufd xmm2,xmm0,0xff
4550 pxor xmm3,xmm3
4551DB 102,15,56,221,211
4552
4553 movdqa xmm3,xmm1
4554 pslldq xmm1,4
4555 pxor xmm3,xmm1
4556 pslldq xmm1,4
4557 pxor xmm3,xmm1
4558 pslldq xmm1,4
4559 pxor xmm1,xmm3
4560
4561 pxor xmm2,xmm1
4562 movdqu XMMWORD[16+rax],xmm2
4563 lea rax,[32+rax]
4564 movdqa xmm1,xmm2
4565
4566 jmp NEAR $L$oop_key256
4567
4568$L$done_key256:
4569 mov DWORD[16+rax],edx
4570 xor eax,eax
4571 jmp NEAR $L$enc_key_ret
4572
4573ALIGN 16
4574$L$bad_keybits:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004575 mov rax,-2
Adam Langleye9ada862015-05-11 17:20:37 -07004576$L$enc_key_ret:
4577 pxor xmm0,xmm0
4578 pxor xmm1,xmm1
4579 pxor xmm2,xmm2
4580 pxor xmm3,xmm3
4581 pxor xmm4,xmm4
4582 pxor xmm5,xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08004583 add rsp,8
4584 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07004585$L$SEH_end_set_encrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004586
4587ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004588$L$key_expansion_128:
4589 movups XMMWORD[rax],xmm0
4590 lea rax,[16+rax]
4591$L$key_expansion_128_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004592 shufps xmm4,xmm0,16
4593 xorps xmm0,xmm4
4594 shufps xmm4,xmm0,140
4595 xorps xmm0,xmm4
4596 shufps xmm1,xmm1,255
4597 xorps xmm0,xmm1
4598 DB 0F3h,0C3h ;repret
4599
4600ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004601$L$key_expansion_192a:
4602 movups XMMWORD[rax],xmm0
4603 lea rax,[16+rax]
4604$L$key_expansion_192a_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004605 movaps xmm5,xmm2
Adam Langleye9ada862015-05-11 17:20:37 -07004606$L$key_expansion_192b_warm:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004607 shufps xmm4,xmm0,16
4608 movdqa xmm3,xmm2
4609 xorps xmm0,xmm4
4610 shufps xmm4,xmm0,140
4611 pslldq xmm3,4
4612 xorps xmm0,xmm4
4613 pshufd xmm1,xmm1,85
4614 pxor xmm2,xmm3
4615 pxor xmm0,xmm1
4616 pshufd xmm3,xmm0,255
4617 pxor xmm2,xmm3
4618 DB 0F3h,0C3h ;repret
4619
4620ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004621$L$key_expansion_192b:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004622 movaps xmm3,xmm0
4623 shufps xmm5,xmm0,68
Adam Langleye9ada862015-05-11 17:20:37 -07004624 movups XMMWORD[rax],xmm5
Adam Langleyd9e397b2015-01-22 14:27:53 -08004625 shufps xmm3,xmm2,78
Adam Langleye9ada862015-05-11 17:20:37 -07004626 movups XMMWORD[16+rax],xmm3
4627 lea rax,[32+rax]
4628 jmp NEAR $L$key_expansion_192b_warm
Adam Langleyd9e397b2015-01-22 14:27:53 -08004629
4630ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004631$L$key_expansion_256a:
4632 movups XMMWORD[rax],xmm2
4633 lea rax,[16+rax]
4634$L$key_expansion_256a_cold:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004635 shufps xmm4,xmm0,16
4636 xorps xmm0,xmm4
4637 shufps xmm4,xmm0,140
4638 xorps xmm0,xmm4
4639 shufps xmm1,xmm1,255
4640 xorps xmm0,xmm1
4641 DB 0F3h,0C3h ;repret
4642
4643ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004644$L$key_expansion_256b:
4645 movups XMMWORD[rax],xmm0
4646 lea rax,[16+rax]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004647
4648 shufps xmm4,xmm2,16
4649 xorps xmm2,xmm4
4650 shufps xmm4,xmm2,140
4651 xorps xmm2,xmm4
4652 shufps xmm1,xmm1,170
4653 xorps xmm2,xmm1
4654 DB 0F3h,0C3h ;repret
Adam Langleye9ada862015-05-11 17:20:37 -07004655
Adam Langleyd9e397b2015-01-22 14:27:53 -08004656
4657ALIGN 64
Adam Langleye9ada862015-05-11 17:20:37 -07004658$L$bswap_mask:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004659DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
Adam Langleye9ada862015-05-11 17:20:37 -07004660$L$increment32:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004661 DD 6,6,6,0
Adam Langleye9ada862015-05-11 17:20:37 -07004662$L$increment64:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004663 DD 1,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004664$L$xts_magic:
4665 DD 0x87,0,1,0
4666$L$increment1:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004667DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
Adam Langleye9ada862015-05-11 17:20:37 -07004668$L$key_rotate:
4669 DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4670$L$key_rotate192:
4671 DD 0x04070605,0x04070605,0x04070605,0x04070605
4672$L$key_rcon1:
4673 DD 1,1,1,1
4674$L$key_rcon1b:
4675 DD 0x1b,0x1b,0x1b,0x1b
Adam Langleyd9e397b2015-01-22 14:27:53 -08004676
4677DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
4678DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
4679DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
4680DB 115,108,46,111,114,103,62,0
4681ALIGN 64
Adam Langleye9ada862015-05-11 17:20:37 -07004682EXTERN __imp_RtlVirtualUnwind
Adam Langleyd9e397b2015-01-22 14:27:53 -08004683
4684ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004685ecb_ccm64_se_handler:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004686 push rsi
4687 push rdi
4688 push rbx
4689 push rbp
4690 push r12
4691 push r13
4692 push r14
4693 push r15
4694 pushfq
4695 sub rsp,64
4696
Adam Langleye9ada862015-05-11 17:20:37 -07004697 mov rax,QWORD[120+r8]
4698 mov rbx,QWORD[248+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004699
Adam Langleye9ada862015-05-11 17:20:37 -07004700 mov rsi,QWORD[8+r9]
4701 mov r11,QWORD[56+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004702
Adam Langleye9ada862015-05-11 17:20:37 -07004703 mov r10d,DWORD[r11]
4704 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004705 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -07004706 jb NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08004707
Adam Langleye9ada862015-05-11 17:20:37 -07004708 mov rax,QWORD[152+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004709
Adam Langleye9ada862015-05-11 17:20:37 -07004710 mov r10d,DWORD[4+r11]
4711 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004712 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -07004713 jae NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08004714
Adam Langleye9ada862015-05-11 17:20:37 -07004715 lea rsi,[rax]
4716 lea rdi,[512+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004717 mov ecx,8
Adam Langleye9ada862015-05-11 17:20:37 -07004718 DD 0xa548f3fc
4719 lea rax,[88+rax]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004720
Adam Langleye9ada862015-05-11 17:20:37 -07004721 jmp NEAR $L$common_seh_tail
4722
Adam Langleyd9e397b2015-01-22 14:27:53 -08004723
4724
4725ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004726ctr_xts_se_handler:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004727 push rsi
4728 push rdi
4729 push rbx
4730 push rbp
4731 push r12
4732 push r13
4733 push r14
4734 push r15
4735 pushfq
4736 sub rsp,64
4737
Adam Langleye9ada862015-05-11 17:20:37 -07004738 mov rax,QWORD[120+r8]
4739 mov rbx,QWORD[248+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004740
Adam Langleye9ada862015-05-11 17:20:37 -07004741 mov rsi,QWORD[8+r9]
4742 mov r11,QWORD[56+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004743
Adam Langleye9ada862015-05-11 17:20:37 -07004744 mov r10d,DWORD[r11]
4745 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004746 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -07004747 jb NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08004748
Adam Langleye9ada862015-05-11 17:20:37 -07004749 mov rax,QWORD[152+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004750
Adam Langleye9ada862015-05-11 17:20:37 -07004751 mov r10d,DWORD[4+r11]
4752 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004753 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -07004754 jae NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08004755
Robert Sloana94fe052017-02-21 08:49:28 -08004756 mov rax,QWORD[208+r8]
4757
4758 lea rsi,[((-168))+rax]
Adam Langleye9ada862015-05-11 17:20:37 -07004759 lea rdi,[512+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004760 mov ecx,20
Adam Langleye9ada862015-05-11 17:20:37 -07004761 DD 0xa548f3fc
Adam Langleyd9e397b2015-01-22 14:27:53 -08004762
Robert Sloana94fe052017-02-21 08:49:28 -08004763 mov rbp,QWORD[((-8))+rax]
4764 mov QWORD[160+r8],rbp
4765 jmp NEAR $L$common_seh_tail
4766
4767
4768
4769ALIGN 16
4770ocb_se_handler:
4771 push rsi
4772 push rdi
4773 push rbx
4774 push rbp
4775 push r12
4776 push r13
4777 push r14
4778 push r15
4779 pushfq
4780 sub rsp,64
4781
4782 mov rax,QWORD[120+r8]
4783 mov rbx,QWORD[248+r8]
4784
4785 mov rsi,QWORD[8+r9]
4786 mov r11,QWORD[56+r9]
4787
4788 mov r10d,DWORD[r11]
4789 lea r10,[r10*1+rsi]
4790 cmp rbx,r10
4791 jb NEAR $L$common_seh_tail
4792
4793 mov r10d,DWORD[4+r11]
4794 lea r10,[r10*1+rsi]
4795 cmp rbx,r10
4796 jae NEAR $L$common_seh_tail
4797
4798 mov r10d,DWORD[8+r11]
4799 lea r10,[r10*1+rsi]
4800 cmp rbx,r10
4801 jae NEAR $L$ocb_no_xmm
4802
4803 mov rax,QWORD[152+r8]
4804
4805 lea rsi,[rax]
4806 lea rdi,[512+r8]
4807 mov ecx,20
4808 DD 0xa548f3fc
4809 lea rax,[((160+40))+rax]
4810
4811$L$ocb_no_xmm:
4812 mov rbx,QWORD[((-8))+rax]
4813 mov rbp,QWORD[((-16))+rax]
4814 mov r12,QWORD[((-24))+rax]
4815 mov r13,QWORD[((-32))+rax]
4816 mov r14,QWORD[((-40))+rax]
4817
4818 mov QWORD[144+r8],rbx
4819 mov QWORD[160+r8],rbp
4820 mov QWORD[216+r8],r12
4821 mov QWORD[224+r8],r13
4822 mov QWORD[232+r8],r14
4823
4824 jmp NEAR $L$common_seh_tail
Adam Langleye9ada862015-05-11 17:20:37 -07004825
Adam Langleyd9e397b2015-01-22 14:27:53 -08004826
4827ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -07004828cbc_se_handler:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004829 push rsi
4830 push rdi
4831 push rbx
4832 push rbp
4833 push r12
4834 push r13
4835 push r14
4836 push r15
4837 pushfq
4838 sub rsp,64
4839
Adam Langleye9ada862015-05-11 17:20:37 -07004840 mov rax,QWORD[152+r8]
4841 mov rbx,QWORD[248+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004842
Adam Langleye9ada862015-05-11 17:20:37 -07004843 lea r10,[$L$cbc_decrypt_bulk]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004844 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -07004845 jb NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08004846
Robert Sloana94fe052017-02-21 08:49:28 -08004847 mov rax,QWORD[120+r8]
4848
Adam Langleye9ada862015-05-11 17:20:37 -07004849 lea r10,[$L$cbc_decrypt_body]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004850 cmp rbx,r10
Robert Sloana94fe052017-02-21 08:49:28 -08004851 jb NEAR $L$common_seh_tail
4852
4853 mov rax,QWORD[152+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004854
Adam Langleye9ada862015-05-11 17:20:37 -07004855 lea r10,[$L$cbc_ret]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004856 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -07004857 jae NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -08004858
Adam Langleye9ada862015-05-11 17:20:37 -07004859 lea rsi,[16+rax]
4860 lea rdi,[512+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004861 mov ecx,20
Adam Langleye9ada862015-05-11 17:20:37 -07004862 DD 0xa548f3fc
Adam Langleyd9e397b2015-01-22 14:27:53 -08004863
Robert Sloana94fe052017-02-21 08:49:28 -08004864 mov rax,QWORD[208+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004865
Robert Sloana94fe052017-02-21 08:49:28 -08004866 mov rbp,QWORD[((-8))+rax]
4867 mov QWORD[160+r8],rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -08004868
Adam Langleye9ada862015-05-11 17:20:37 -07004869$L$common_seh_tail:
4870 mov rdi,QWORD[8+rax]
4871 mov rsi,QWORD[16+rax]
4872 mov QWORD[152+r8],rax
4873 mov QWORD[168+r8],rsi
4874 mov QWORD[176+r8],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -08004875
Adam Langleye9ada862015-05-11 17:20:37 -07004876 mov rdi,QWORD[40+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004877 mov rsi,r8
4878 mov ecx,154
Adam Langleye9ada862015-05-11 17:20:37 -07004879 DD 0xa548f3fc
Adam Langleyd9e397b2015-01-22 14:27:53 -08004880
4881 mov rsi,r9
4882 xor rcx,rcx
Adam Langleye9ada862015-05-11 17:20:37 -07004883 mov rdx,QWORD[8+rsi]
4884 mov r8,QWORD[rsi]
4885 mov r9,QWORD[16+rsi]
4886 mov r10,QWORD[40+rsi]
4887 lea r11,[56+rsi]
4888 lea r12,[24+rsi]
4889 mov QWORD[32+rsp],r10
4890 mov QWORD[40+rsp],r11
4891 mov QWORD[48+rsp],r12
4892 mov QWORD[56+rsp],rcx
4893 call QWORD[__imp_RtlVirtualUnwind]
Adam Langleyd9e397b2015-01-22 14:27:53 -08004894
4895 mov eax,1
4896 add rsp,64
4897 popfq
4898 pop r15
4899 pop r14
4900 pop r13
4901 pop r12
4902 pop rbp
4903 pop rbx
4904 pop rdi
4905 pop rsi
4906 DB 0F3h,0C3h ;repret
Adam Langleyd9e397b2015-01-22 14:27:53 -08004907
Adam Langleye9ada862015-05-11 17:20:37 -07004908
4909section .pdata rdata align=4
Adam Langleyd9e397b2015-01-22 14:27:53 -08004910ALIGN 4
Adam Langleye9ada862015-05-11 17:20:37 -07004911 DD $L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
4912 DD $L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
4913 DD $L$SEH_info_ecb wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08004914
Adam Langleye9ada862015-05-11 17:20:37 -07004915 DD $L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
4916 DD $L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
4917 DD $L$SEH_info_ccm64_enc wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08004918
Adam Langleye9ada862015-05-11 17:20:37 -07004919 DD $L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
4920 DD $L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
4921 DD $L$SEH_info_ccm64_dec wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08004922
Adam Langleye9ada862015-05-11 17:20:37 -07004923 DD $L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
4924 DD $L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
4925 DD $L$SEH_info_ctr32 wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08004926
Adam Langleye9ada862015-05-11 17:20:37 -07004927 DD $L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
4928 DD $L$SEH_end_aesni_xts_encrypt wrt ..imagebase
4929 DD $L$SEH_info_xts_enc wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08004930
Adam Langleye9ada862015-05-11 17:20:37 -07004931 DD $L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
4932 DD $L$SEH_end_aesni_xts_decrypt wrt ..imagebase
4933 DD $L$SEH_info_xts_dec wrt ..imagebase
Robert Sloana94fe052017-02-21 08:49:28 -08004934
4935 DD $L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase
4936 DD $L$SEH_end_aesni_ocb_encrypt wrt ..imagebase
4937 DD $L$SEH_info_ocb_enc wrt ..imagebase
4938
4939 DD $L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase
4940 DD $L$SEH_end_aesni_ocb_decrypt wrt ..imagebase
4941 DD $L$SEH_info_ocb_dec wrt ..imagebase
Adam Langleye9ada862015-05-11 17:20:37 -07004942 DD $L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
4943 DD $L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
4944 DD $L$SEH_info_cbc wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08004945
Adam Langleye9ada862015-05-11 17:20:37 -07004946 DD aesni_set_decrypt_key wrt ..imagebase
4947 DD $L$SEH_end_set_decrypt_key wrt ..imagebase
4948 DD $L$SEH_info_key wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08004949
Adam Langleye9ada862015-05-11 17:20:37 -07004950 DD aesni_set_encrypt_key wrt ..imagebase
4951 DD $L$SEH_end_set_encrypt_key wrt ..imagebase
4952 DD $L$SEH_info_key wrt ..imagebase
4953section .xdata rdata align=8
Adam Langleyd9e397b2015-01-22 14:27:53 -08004954ALIGN 8
Adam Langleye9ada862015-05-11 17:20:37 -07004955$L$SEH_info_ecb:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004956DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004957 DD ecb_ccm64_se_handler wrt ..imagebase
4958 DD $L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
4959$L$SEH_info_ccm64_enc:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004960DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004961 DD ecb_ccm64_se_handler wrt ..imagebase
4962 DD $L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
4963$L$SEH_info_ccm64_dec:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004964DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004965 DD ecb_ccm64_se_handler wrt ..imagebase
4966 DD $L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
4967$L$SEH_info_ctr32:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004968DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004969 DD ctr_xts_se_handler wrt ..imagebase
4970 DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
4971$L$SEH_info_xts_enc:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004972DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004973 DD ctr_xts_se_handler wrt ..imagebase
4974 DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
4975$L$SEH_info_xts_dec:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004976DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004977 DD ctr_xts_se_handler wrt ..imagebase
4978 DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
Robert Sloana94fe052017-02-21 08:49:28 -08004979$L$SEH_info_ocb_enc:
4980DB 9,0,0,0
4981 DD ocb_se_handler wrt ..imagebase
4982 DD $L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase
4983 DD $L$ocb_enc_pop wrt ..imagebase
4984 DD 0
4985$L$SEH_info_ocb_dec:
4986DB 9,0,0,0
4987 DD ocb_se_handler wrt ..imagebase
4988 DD $L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase
4989 DD $L$ocb_dec_pop wrt ..imagebase
4990 DD 0
Adam Langleye9ada862015-05-11 17:20:37 -07004991$L$SEH_info_cbc:
Adam Langleyd9e397b2015-01-22 14:27:53 -08004992DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07004993 DD cbc_se_handler wrt ..imagebase
4994$L$SEH_info_key:
4995DB 0x01,0x04,0x01,0x00
4996DB 0x04,0x02,0x00,0x00