blob: 78e03795fc9036655d9e076202bde5bf1d8df1f6 [file] [log] [blame]
Robert Sloanc9abfe42018-11-26 12:19:07 -08001// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
Robert Sloan726e9d12018-09-11 11:45:04 -07004#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
Robert Sloan8ff03552017-06-14 12:40:58 -070014#include <openssl/arm_arch.h>
15
16#if __ARM_MAX_ARCH__>=7
17.text
Robert Sloan8ff03552017-06-14 12:40:58 -070018
Robert Sloanc9abfe42018-11-26 12:19:07 -080019.section __TEXT,__const
Robert Sloan8ff03552017-06-14 12:40:58 -070020.align 5
21Lrcon:
22.long 0x01,0x01,0x01,0x01
23.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
24.long 0x1b,0x1b,0x1b,0x1b
25
Robert Sloanc9abfe42018-11-26 12:19:07 -080026.text
27
Robert Sloan8ff03552017-06-14 12:40:58 -070028.globl _aes_hw_set_encrypt_key
29.private_extern _aes_hw_set_encrypt_key
30
31.align 5
32_aes_hw_set_encrypt_key:
33Lenc_key:
34 stp x29,x30,[sp,#-16]!
35 add x29,sp,#0
36 mov x3,#-1
37 cmp x0,#0
38 b.eq Lenc_key_abort
39 cmp x2,#0
40 b.eq Lenc_key_abort
41 mov x3,#-2
42 cmp w1,#128
43 b.lt Lenc_key_abort
44 cmp w1,#256
45 b.gt Lenc_key_abort
46 tst w1,#0x3f
47 b.ne Lenc_key_abort
48
Robert Sloanc9abfe42018-11-26 12:19:07 -080049 adrp x3,Lrcon@PAGE
50 add x3,x3,Lrcon@PAGEOFF
Robert Sloan8ff03552017-06-14 12:40:58 -070051 cmp w1,#192
52
53 eor v0.16b,v0.16b,v0.16b
54 ld1 {v3.16b},[x0],#16
55 mov w1,#8 // reuse w1
56 ld1 {v1.4s,v2.4s},[x3],#32
57
58 b.lt Loop128
59 b.eq L192
60 b L256
61
62.align 4
63Loop128:
64 tbl v6.16b,{v3.16b},v2.16b
65 ext v5.16b,v0.16b,v3.16b,#12
66 st1 {v3.4s},[x2],#16
67 aese v6.16b,v0.16b
68 subs w1,w1,#1
69
70 eor v3.16b,v3.16b,v5.16b
71 ext v5.16b,v0.16b,v5.16b,#12
72 eor v3.16b,v3.16b,v5.16b
73 ext v5.16b,v0.16b,v5.16b,#12
74 eor v6.16b,v6.16b,v1.16b
75 eor v3.16b,v3.16b,v5.16b
76 shl v1.16b,v1.16b,#1
77 eor v3.16b,v3.16b,v6.16b
78 b.ne Loop128
79
80 ld1 {v1.4s},[x3]
81
82 tbl v6.16b,{v3.16b},v2.16b
83 ext v5.16b,v0.16b,v3.16b,#12
84 st1 {v3.4s},[x2],#16
85 aese v6.16b,v0.16b
86
87 eor v3.16b,v3.16b,v5.16b
88 ext v5.16b,v0.16b,v5.16b,#12
89 eor v3.16b,v3.16b,v5.16b
90 ext v5.16b,v0.16b,v5.16b,#12
91 eor v6.16b,v6.16b,v1.16b
92 eor v3.16b,v3.16b,v5.16b
93 shl v1.16b,v1.16b,#1
94 eor v3.16b,v3.16b,v6.16b
95
96 tbl v6.16b,{v3.16b},v2.16b
97 ext v5.16b,v0.16b,v3.16b,#12
98 st1 {v3.4s},[x2],#16
99 aese v6.16b,v0.16b
100
101 eor v3.16b,v3.16b,v5.16b
102 ext v5.16b,v0.16b,v5.16b,#12
103 eor v3.16b,v3.16b,v5.16b
104 ext v5.16b,v0.16b,v5.16b,#12
105 eor v6.16b,v6.16b,v1.16b
106 eor v3.16b,v3.16b,v5.16b
107 eor v3.16b,v3.16b,v6.16b
108 st1 {v3.4s},[x2]
109 add x2,x2,#0x50
110
111 mov w12,#10
112 b Ldone
113
114.align 4
115L192:
116 ld1 {v4.8b},[x0],#8
117 movi v6.16b,#8 // borrow v6.16b
118 st1 {v3.4s},[x2],#16
119 sub v2.16b,v2.16b,v6.16b // adjust the mask
120
121Loop192:
122 tbl v6.16b,{v4.16b},v2.16b
123 ext v5.16b,v0.16b,v3.16b,#12
124 st1 {v4.8b},[x2],#8
125 aese v6.16b,v0.16b
126 subs w1,w1,#1
127
128 eor v3.16b,v3.16b,v5.16b
129 ext v5.16b,v0.16b,v5.16b,#12
130 eor v3.16b,v3.16b,v5.16b
131 ext v5.16b,v0.16b,v5.16b,#12
132 eor v3.16b,v3.16b,v5.16b
133
134 dup v5.4s,v3.s[3]
135 eor v5.16b,v5.16b,v4.16b
136 eor v6.16b,v6.16b,v1.16b
137 ext v4.16b,v0.16b,v4.16b,#12
138 shl v1.16b,v1.16b,#1
139 eor v4.16b,v4.16b,v5.16b
140 eor v3.16b,v3.16b,v6.16b
141 eor v4.16b,v4.16b,v6.16b
142 st1 {v3.4s},[x2],#16
143 b.ne Loop192
144
145 mov w12,#12
146 add x2,x2,#0x20
147 b Ldone
148
149.align 4
150L256:
151 ld1 {v4.16b},[x0]
152 mov w1,#7
153 mov w12,#14
154 st1 {v3.4s},[x2],#16
155
156Loop256:
157 tbl v6.16b,{v4.16b},v2.16b
158 ext v5.16b,v0.16b,v3.16b,#12
159 st1 {v4.4s},[x2],#16
160 aese v6.16b,v0.16b
161 subs w1,w1,#1
162
163 eor v3.16b,v3.16b,v5.16b
164 ext v5.16b,v0.16b,v5.16b,#12
165 eor v3.16b,v3.16b,v5.16b
166 ext v5.16b,v0.16b,v5.16b,#12
167 eor v6.16b,v6.16b,v1.16b
168 eor v3.16b,v3.16b,v5.16b
169 shl v1.16b,v1.16b,#1
170 eor v3.16b,v3.16b,v6.16b
171 st1 {v3.4s},[x2],#16
172 b.eq Ldone
173
174 dup v6.4s,v3.s[3] // just splat
175 ext v5.16b,v0.16b,v4.16b,#12
176 aese v6.16b,v0.16b
177
178 eor v4.16b,v4.16b,v5.16b
179 ext v5.16b,v0.16b,v5.16b,#12
180 eor v4.16b,v4.16b,v5.16b
181 ext v5.16b,v0.16b,v5.16b,#12
182 eor v4.16b,v4.16b,v5.16b
183
184 eor v4.16b,v4.16b,v6.16b
185 b Loop256
186
187Ldone:
188 str w12,[x2]
189 mov x3,#0
190
191Lenc_key_abort:
192 mov x0,x3 // return value
193 ldr x29,[sp],#16
194 ret
195
196
197.globl _aes_hw_set_decrypt_key
198.private_extern _aes_hw_set_decrypt_key
199
200.align 5
201_aes_hw_set_decrypt_key:
202 stp x29,x30,[sp,#-16]!
203 add x29,sp,#0
204 bl Lenc_key
205
206 cmp x0,#0
207 b.ne Ldec_key_abort
208
209 sub x2,x2,#240 // restore original x2
210 mov x4,#-16
211 add x0,x2,x12,lsl#4 // end of key schedule
212
213 ld1 {v0.4s},[x2]
214 ld1 {v1.4s},[x0]
215 st1 {v0.4s},[x0],x4
216 st1 {v1.4s},[x2],#16
217
218Loop_imc:
219 ld1 {v0.4s},[x2]
220 ld1 {v1.4s},[x0]
221 aesimc v0.16b,v0.16b
222 aesimc v1.16b,v1.16b
223 st1 {v0.4s},[x0],x4
224 st1 {v1.4s},[x2],#16
225 cmp x0,x2
226 b.hi Loop_imc
227
228 ld1 {v0.4s},[x2]
229 aesimc v0.16b,v0.16b
230 st1 {v0.4s},[x0]
231
232 eor x0,x0,x0 // return value
233Ldec_key_abort:
234 ldp x29,x30,[sp],#16
235 ret
236
237.globl _aes_hw_encrypt
238.private_extern _aes_hw_encrypt
239
240.align 5
241_aes_hw_encrypt:
242 ldr w3,[x2,#240]
243 ld1 {v0.4s},[x2],#16
244 ld1 {v2.16b},[x0]
245 sub w3,w3,#2
246 ld1 {v1.4s},[x2],#16
247
248Loop_enc:
249 aese v2.16b,v0.16b
250 aesmc v2.16b,v2.16b
251 ld1 {v0.4s},[x2],#16
252 subs w3,w3,#2
253 aese v2.16b,v1.16b
254 aesmc v2.16b,v2.16b
255 ld1 {v1.4s},[x2],#16
256 b.gt Loop_enc
257
258 aese v2.16b,v0.16b
259 aesmc v2.16b,v2.16b
260 ld1 {v0.4s},[x2]
261 aese v2.16b,v1.16b
262 eor v2.16b,v2.16b,v0.16b
263
264 st1 {v2.16b},[x1]
265 ret
266
267.globl _aes_hw_decrypt
268.private_extern _aes_hw_decrypt
269
270.align 5
271_aes_hw_decrypt:
272 ldr w3,[x2,#240]
273 ld1 {v0.4s},[x2],#16
274 ld1 {v2.16b},[x0]
275 sub w3,w3,#2
276 ld1 {v1.4s},[x2],#16
277
278Loop_dec:
279 aesd v2.16b,v0.16b
280 aesimc v2.16b,v2.16b
281 ld1 {v0.4s},[x2],#16
282 subs w3,w3,#2
283 aesd v2.16b,v1.16b
284 aesimc v2.16b,v2.16b
285 ld1 {v1.4s},[x2],#16
286 b.gt Loop_dec
287
288 aesd v2.16b,v0.16b
289 aesimc v2.16b,v2.16b
290 ld1 {v0.4s},[x2]
291 aesd v2.16b,v1.16b
292 eor v2.16b,v2.16b,v0.16b
293
294 st1 {v2.16b},[x1]
295 ret
296
297.globl _aes_hw_cbc_encrypt
298.private_extern _aes_hw_cbc_encrypt
299
300.align 5
301_aes_hw_cbc_encrypt:
302 stp x29,x30,[sp,#-16]!
303 add x29,sp,#0
304 subs x2,x2,#16
305 mov x8,#16
306 b.lo Lcbc_abort
307 csel x8,xzr,x8,eq
308
309 cmp w5,#0 // en- or decrypting?
310 ldr w5,[x3,#240]
311 and x2,x2,#-16
312 ld1 {v6.16b},[x4]
313 ld1 {v0.16b},[x0],x8
314
315 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
316 sub w5,w5,#6
317 add x7,x3,x5,lsl#4 // pointer to last 7 round keys
318 sub w5,w5,#2
319 ld1 {v18.4s,v19.4s},[x7],#32
320 ld1 {v20.4s,v21.4s},[x7],#32
321 ld1 {v22.4s,v23.4s},[x7],#32
322 ld1 {v7.4s},[x7]
323
324 add x7,x3,#32
325 mov w6,w5
326 b.eq Lcbc_dec
327
328 cmp w5,#2
329 eor v0.16b,v0.16b,v6.16b
330 eor v5.16b,v16.16b,v7.16b
331 b.eq Lcbc_enc128
332
333 ld1 {v2.4s,v3.4s},[x7]
334 add x7,x3,#16
335 add x6,x3,#16*4
336 add x12,x3,#16*5
337 aese v0.16b,v16.16b
338 aesmc v0.16b,v0.16b
339 add x14,x3,#16*6
340 add x3,x3,#16*7
341 b Lenter_cbc_enc
342
343.align 4
344Loop_cbc_enc:
345 aese v0.16b,v16.16b
346 aesmc v0.16b,v0.16b
347 st1 {v6.16b},[x1],#16
348Lenter_cbc_enc:
349 aese v0.16b,v17.16b
350 aesmc v0.16b,v0.16b
351 aese v0.16b,v2.16b
352 aesmc v0.16b,v0.16b
353 ld1 {v16.4s},[x6]
354 cmp w5,#4
355 aese v0.16b,v3.16b
356 aesmc v0.16b,v0.16b
357 ld1 {v17.4s},[x12]
358 b.eq Lcbc_enc192
359
360 aese v0.16b,v16.16b
361 aesmc v0.16b,v0.16b
362 ld1 {v16.4s},[x14]
363 aese v0.16b,v17.16b
364 aesmc v0.16b,v0.16b
365 ld1 {v17.4s},[x3]
366 nop
367
368Lcbc_enc192:
369 aese v0.16b,v16.16b
370 aesmc v0.16b,v0.16b
371 subs x2,x2,#16
372 aese v0.16b,v17.16b
373 aesmc v0.16b,v0.16b
374 csel x8,xzr,x8,eq
375 aese v0.16b,v18.16b
376 aesmc v0.16b,v0.16b
377 aese v0.16b,v19.16b
378 aesmc v0.16b,v0.16b
379 ld1 {v16.16b},[x0],x8
380 aese v0.16b,v20.16b
381 aesmc v0.16b,v0.16b
382 eor v16.16b,v16.16b,v5.16b
383 aese v0.16b,v21.16b
384 aesmc v0.16b,v0.16b
385 ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
386 aese v0.16b,v22.16b
387 aesmc v0.16b,v0.16b
388 aese v0.16b,v23.16b
389 eor v6.16b,v0.16b,v7.16b
390 b.hs Loop_cbc_enc
391
392 st1 {v6.16b},[x1],#16
393 b Lcbc_done
394
395.align 5
396Lcbc_enc128:
397 ld1 {v2.4s,v3.4s},[x7]
398 aese v0.16b,v16.16b
399 aesmc v0.16b,v0.16b
400 b Lenter_cbc_enc128
401Loop_cbc_enc128:
402 aese v0.16b,v16.16b
403 aesmc v0.16b,v0.16b
404 st1 {v6.16b},[x1],#16
405Lenter_cbc_enc128:
406 aese v0.16b,v17.16b
407 aesmc v0.16b,v0.16b
408 subs x2,x2,#16
409 aese v0.16b,v2.16b
410 aesmc v0.16b,v0.16b
411 csel x8,xzr,x8,eq
412 aese v0.16b,v3.16b
413 aesmc v0.16b,v0.16b
414 aese v0.16b,v18.16b
415 aesmc v0.16b,v0.16b
416 aese v0.16b,v19.16b
417 aesmc v0.16b,v0.16b
418 ld1 {v16.16b},[x0],x8
419 aese v0.16b,v20.16b
420 aesmc v0.16b,v0.16b
421 aese v0.16b,v21.16b
422 aesmc v0.16b,v0.16b
423 aese v0.16b,v22.16b
424 aesmc v0.16b,v0.16b
425 eor v16.16b,v16.16b,v5.16b
426 aese v0.16b,v23.16b
427 eor v6.16b,v0.16b,v7.16b
428 b.hs Loop_cbc_enc128
429
430 st1 {v6.16b},[x1],#16
431 b Lcbc_done
432.align 5
433Lcbc_dec:
434 ld1 {v18.16b},[x0],#16
435 subs x2,x2,#32 // bias
436 add w6,w5,#2
437 orr v3.16b,v0.16b,v0.16b
438 orr v1.16b,v0.16b,v0.16b
439 orr v19.16b,v18.16b,v18.16b
440 b.lo Lcbc_dec_tail
441
442 orr v1.16b,v18.16b,v18.16b
443 ld1 {v18.16b},[x0],#16
444 orr v2.16b,v0.16b,v0.16b
445 orr v3.16b,v1.16b,v1.16b
446 orr v19.16b,v18.16b,v18.16b
447
448Loop3x_cbc_dec:
449 aesd v0.16b,v16.16b
450 aesimc v0.16b,v0.16b
451 aesd v1.16b,v16.16b
452 aesimc v1.16b,v1.16b
453 aesd v18.16b,v16.16b
454 aesimc v18.16b,v18.16b
455 ld1 {v16.4s},[x7],#16
456 subs w6,w6,#2
457 aesd v0.16b,v17.16b
458 aesimc v0.16b,v0.16b
459 aesd v1.16b,v17.16b
460 aesimc v1.16b,v1.16b
461 aesd v18.16b,v17.16b
462 aesimc v18.16b,v18.16b
463 ld1 {v17.4s},[x7],#16
464 b.gt Loop3x_cbc_dec
465
466 aesd v0.16b,v16.16b
467 aesimc v0.16b,v0.16b
468 aesd v1.16b,v16.16b
469 aesimc v1.16b,v1.16b
470 aesd v18.16b,v16.16b
471 aesimc v18.16b,v18.16b
472 eor v4.16b,v6.16b,v7.16b
473 subs x2,x2,#0x30
474 eor v5.16b,v2.16b,v7.16b
475 csel x6,x2,x6,lo // x6, w6, is zero at this point
476 aesd v0.16b,v17.16b
477 aesimc v0.16b,v0.16b
478 aesd v1.16b,v17.16b
479 aesimc v1.16b,v1.16b
480 aesd v18.16b,v17.16b
481 aesimc v18.16b,v18.16b
482 eor v17.16b,v3.16b,v7.16b
483 add x0,x0,x6 // x0 is adjusted in such way that
484 // at exit from the loop v1.16b-v18.16b
485 // are loaded with last "words"
486 orr v6.16b,v19.16b,v19.16b
487 mov x7,x3
488 aesd v0.16b,v20.16b
489 aesimc v0.16b,v0.16b
490 aesd v1.16b,v20.16b
491 aesimc v1.16b,v1.16b
492 aesd v18.16b,v20.16b
493 aesimc v18.16b,v18.16b
494 ld1 {v2.16b},[x0],#16
495 aesd v0.16b,v21.16b
496 aesimc v0.16b,v0.16b
497 aesd v1.16b,v21.16b
498 aesimc v1.16b,v1.16b
499 aesd v18.16b,v21.16b
500 aesimc v18.16b,v18.16b
501 ld1 {v3.16b},[x0],#16
502 aesd v0.16b,v22.16b
503 aesimc v0.16b,v0.16b
504 aesd v1.16b,v22.16b
505 aesimc v1.16b,v1.16b
506 aesd v18.16b,v22.16b
507 aesimc v18.16b,v18.16b
508 ld1 {v19.16b},[x0],#16
509 aesd v0.16b,v23.16b
510 aesd v1.16b,v23.16b
511 aesd v18.16b,v23.16b
512 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
513 add w6,w5,#2
514 eor v4.16b,v4.16b,v0.16b
515 eor v5.16b,v5.16b,v1.16b
516 eor v18.16b,v18.16b,v17.16b
517 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
518 st1 {v4.16b},[x1],#16
519 orr v0.16b,v2.16b,v2.16b
520 st1 {v5.16b},[x1],#16
521 orr v1.16b,v3.16b,v3.16b
522 st1 {v18.16b},[x1],#16
523 orr v18.16b,v19.16b,v19.16b
524 b.hs Loop3x_cbc_dec
525
526 cmn x2,#0x30
527 b.eq Lcbc_done
528 nop
529
530Lcbc_dec_tail:
531 aesd v1.16b,v16.16b
532 aesimc v1.16b,v1.16b
533 aesd v18.16b,v16.16b
534 aesimc v18.16b,v18.16b
535 ld1 {v16.4s},[x7],#16
536 subs w6,w6,#2
537 aesd v1.16b,v17.16b
538 aesimc v1.16b,v1.16b
539 aesd v18.16b,v17.16b
540 aesimc v18.16b,v18.16b
541 ld1 {v17.4s},[x7],#16
542 b.gt Lcbc_dec_tail
543
544 aesd v1.16b,v16.16b
545 aesimc v1.16b,v1.16b
546 aesd v18.16b,v16.16b
547 aesimc v18.16b,v18.16b
548 aesd v1.16b,v17.16b
549 aesimc v1.16b,v1.16b
550 aesd v18.16b,v17.16b
551 aesimc v18.16b,v18.16b
552 aesd v1.16b,v20.16b
553 aesimc v1.16b,v1.16b
554 aesd v18.16b,v20.16b
555 aesimc v18.16b,v18.16b
556 cmn x2,#0x20
557 aesd v1.16b,v21.16b
558 aesimc v1.16b,v1.16b
559 aesd v18.16b,v21.16b
560 aesimc v18.16b,v18.16b
561 eor v5.16b,v6.16b,v7.16b
562 aesd v1.16b,v22.16b
563 aesimc v1.16b,v1.16b
564 aesd v18.16b,v22.16b
565 aesimc v18.16b,v18.16b
566 eor v17.16b,v3.16b,v7.16b
567 aesd v1.16b,v23.16b
568 aesd v18.16b,v23.16b
569 b.eq Lcbc_dec_one
570 eor v5.16b,v5.16b,v1.16b
571 eor v17.16b,v17.16b,v18.16b
572 orr v6.16b,v19.16b,v19.16b
573 st1 {v5.16b},[x1],#16
574 st1 {v17.16b},[x1],#16
575 b Lcbc_done
576
577Lcbc_dec_one:
578 eor v5.16b,v5.16b,v18.16b
579 orr v6.16b,v19.16b,v19.16b
580 st1 {v5.16b},[x1],#16
581
582Lcbc_done:
583 st1 {v6.16b},[x4]
584Lcbc_abort:
585 ldr x29,[sp],#16
586 ret
587
588.globl _aes_hw_ctr32_encrypt_blocks
589.private_extern _aes_hw_ctr32_encrypt_blocks
590
591.align 5
592_aes_hw_ctr32_encrypt_blocks:
593 stp x29,x30,[sp,#-16]!
594 add x29,sp,#0
595 ldr w5,[x3,#240]
596
597 ldr w8, [x4, #12]
598 ld1 {v0.4s},[x4]
599
600 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
601 sub w5,w5,#4
602 mov x12,#16
603 cmp x2,#2
604 add x7,x3,x5,lsl#4 // pointer to last 5 round keys
605 sub w5,w5,#2
606 ld1 {v20.4s,v21.4s},[x7],#32
607 ld1 {v22.4s,v23.4s},[x7],#32
608 ld1 {v7.4s},[x7]
609 add x7,x3,#32
610 mov w6,w5
611 csel x12,xzr,x12,lo
612#ifndef __ARMEB__
613 rev w8, w8
614#endif
615 orr v1.16b,v0.16b,v0.16b
616 add w10, w8, #1
617 orr v18.16b,v0.16b,v0.16b
618 add w8, w8, #2
619 orr v6.16b,v0.16b,v0.16b
620 rev w10, w10
621 mov v1.s[3],w10
622 b.ls Lctr32_tail
623 rev w12, w8
624 sub x2,x2,#3 // bias
625 mov v18.s[3],w12
626 b Loop3x_ctr32
627
628.align 4
629Loop3x_ctr32:
630 aese v0.16b,v16.16b
631 aesmc v0.16b,v0.16b
632 aese v1.16b,v16.16b
633 aesmc v1.16b,v1.16b
634 aese v18.16b,v16.16b
635 aesmc v18.16b,v18.16b
636 ld1 {v16.4s},[x7],#16
637 subs w6,w6,#2
638 aese v0.16b,v17.16b
639 aesmc v0.16b,v0.16b
640 aese v1.16b,v17.16b
641 aesmc v1.16b,v1.16b
642 aese v18.16b,v17.16b
643 aesmc v18.16b,v18.16b
644 ld1 {v17.4s},[x7],#16
645 b.gt Loop3x_ctr32
646
647 aese v0.16b,v16.16b
648 aesmc v4.16b,v0.16b
649 aese v1.16b,v16.16b
650 aesmc v5.16b,v1.16b
651 ld1 {v2.16b},[x0],#16
652 orr v0.16b,v6.16b,v6.16b
653 aese v18.16b,v16.16b
654 aesmc v18.16b,v18.16b
655 ld1 {v3.16b},[x0],#16
656 orr v1.16b,v6.16b,v6.16b
657 aese v4.16b,v17.16b
658 aesmc v4.16b,v4.16b
659 aese v5.16b,v17.16b
660 aesmc v5.16b,v5.16b
661 ld1 {v19.16b},[x0],#16
662 mov x7,x3
663 aese v18.16b,v17.16b
664 aesmc v17.16b,v18.16b
665 orr v18.16b,v6.16b,v6.16b
666 add w9,w8,#1
667 aese v4.16b,v20.16b
668 aesmc v4.16b,v4.16b
669 aese v5.16b,v20.16b
670 aesmc v5.16b,v5.16b
671 eor v2.16b,v2.16b,v7.16b
672 add w10,w8,#2
673 aese v17.16b,v20.16b
674 aesmc v17.16b,v17.16b
675 eor v3.16b,v3.16b,v7.16b
676 add w8,w8,#3
677 aese v4.16b,v21.16b
678 aesmc v4.16b,v4.16b
679 aese v5.16b,v21.16b
680 aesmc v5.16b,v5.16b
681 eor v19.16b,v19.16b,v7.16b
682 rev w9,w9
683 aese v17.16b,v21.16b
684 aesmc v17.16b,v17.16b
685 mov v0.s[3], w9
686 rev w10,w10
687 aese v4.16b,v22.16b
688 aesmc v4.16b,v4.16b
689 aese v5.16b,v22.16b
690 aesmc v5.16b,v5.16b
691 mov v1.s[3], w10
692 rev w12,w8
693 aese v17.16b,v22.16b
694 aesmc v17.16b,v17.16b
695 mov v18.s[3], w12
696 subs x2,x2,#3
697 aese v4.16b,v23.16b
698 aese v5.16b,v23.16b
699 aese v17.16b,v23.16b
700
701 eor v2.16b,v2.16b,v4.16b
702 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
703 st1 {v2.16b},[x1],#16
704 eor v3.16b,v3.16b,v5.16b
705 mov w6,w5
706 st1 {v3.16b},[x1],#16
707 eor v19.16b,v19.16b,v17.16b
708 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
709 st1 {v19.16b},[x1],#16
710 b.hs Loop3x_ctr32
711
712 adds x2,x2,#3
713 b.eq Lctr32_done
714 cmp x2,#1
715 mov x12,#16
716 csel x12,xzr,x12,eq
717
718Lctr32_tail:
719 aese v0.16b,v16.16b
720 aesmc v0.16b,v0.16b
721 aese v1.16b,v16.16b
722 aesmc v1.16b,v1.16b
723 ld1 {v16.4s},[x7],#16
724 subs w6,w6,#2
725 aese v0.16b,v17.16b
726 aesmc v0.16b,v0.16b
727 aese v1.16b,v17.16b
728 aesmc v1.16b,v1.16b
729 ld1 {v17.4s},[x7],#16
730 b.gt Lctr32_tail
731
732 aese v0.16b,v16.16b
733 aesmc v0.16b,v0.16b
734 aese v1.16b,v16.16b
735 aesmc v1.16b,v1.16b
736 aese v0.16b,v17.16b
737 aesmc v0.16b,v0.16b
738 aese v1.16b,v17.16b
739 aesmc v1.16b,v1.16b
740 ld1 {v2.16b},[x0],x12
741 aese v0.16b,v20.16b
742 aesmc v0.16b,v0.16b
743 aese v1.16b,v20.16b
744 aesmc v1.16b,v1.16b
745 ld1 {v3.16b},[x0]
746 aese v0.16b,v21.16b
747 aesmc v0.16b,v0.16b
748 aese v1.16b,v21.16b
749 aesmc v1.16b,v1.16b
750 eor v2.16b,v2.16b,v7.16b
751 aese v0.16b,v22.16b
752 aesmc v0.16b,v0.16b
753 aese v1.16b,v22.16b
754 aesmc v1.16b,v1.16b
755 eor v3.16b,v3.16b,v7.16b
756 aese v0.16b,v23.16b
757 aese v1.16b,v23.16b
758
759 cmp x2,#1
760 eor v2.16b,v2.16b,v0.16b
761 eor v3.16b,v3.16b,v1.16b
762 st1 {v2.16b},[x1],#16
763 b.eq Lctr32_done
764 st1 {v3.16b},[x1]
765
766Lctr32_done:
767 ldr x29,[sp],#16
768 ret
769
770#endif
Robert Sloan726e9d12018-09-11 11:45:04 -0700771#endif // !OPENSSL_NO_ASM