blob: 51e2464487bffc57a8f9503d49f4a9c8c6b5444d [file] [log] [blame]
Kenny Rootb8494592015-09-25 02:29:14 +00001#if defined(__aarch64__)
2#include <openssl/arm_arch.h>
Adam Langleyd9e397b2015-01-22 14:27:53 -08003
4#if __ARM_MAX_ARCH__>=7
5.text
David Benjaminf0c4a6c2016-08-11 13:26:41 -04006#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
Adam Langleyd9e397b2015-01-22 14:27:53 -08007.arch armv8-a+crypto
Kenny Rootd18b6332015-04-18 14:27:55 -07008#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -08009.align 5
Adam Langleye9ada862015-05-11 17:20:37 -070010.Lrcon:
Adam Langleyd9e397b2015-01-22 14:27:53 -080011.long 0x01,0x01,0x01,0x01
12.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
13.long 0x1b,0x1b,0x1b,0x1b
14
Steven Valdezbb1ceac2016-10-07 10:34:51 -040015.globl aes_hw_set_encrypt_key
16.hidden aes_hw_set_encrypt_key
17.type aes_hw_set_encrypt_key,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -080018.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -040019aes_hw_set_encrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -080020.Lenc_key:
21 stp x29,x30,[sp,#-16]!
22 add x29,sp,#0
23 mov x3,#-1
24 cmp x0,#0
25 b.eq .Lenc_key_abort
26 cmp x2,#0
27 b.eq .Lenc_key_abort
28 mov x3,#-2
29 cmp w1,#128
30 b.lt .Lenc_key_abort
31 cmp w1,#256
32 b.gt .Lenc_key_abort
33 tst w1,#0x3f
34 b.ne .Lenc_key_abort
35
Adam Langleye9ada862015-05-11 17:20:37 -070036 adr x3,.Lrcon
Adam Langleyd9e397b2015-01-22 14:27:53 -080037 cmp w1,#192
38
39 eor v0.16b,v0.16b,v0.16b
40 ld1 {v3.16b},[x0],#16
41 mov w1,#8 // reuse w1
42 ld1 {v1.4s,v2.4s},[x3],#32
43
44 b.lt .Loop128
45 b.eq .L192
46 b .L256
47
48.align 4
49.Loop128:
50 tbl v6.16b,{v3.16b},v2.16b
51 ext v5.16b,v0.16b,v3.16b,#12
52 st1 {v3.4s},[x2],#16
53 aese v6.16b,v0.16b
54 subs w1,w1,#1
55
56 eor v3.16b,v3.16b,v5.16b
57 ext v5.16b,v0.16b,v5.16b,#12
58 eor v3.16b,v3.16b,v5.16b
59 ext v5.16b,v0.16b,v5.16b,#12
Adam Langleye9ada862015-05-11 17:20:37 -070060 eor v6.16b,v6.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -080061 eor v3.16b,v3.16b,v5.16b
62 shl v1.16b,v1.16b,#1
63 eor v3.16b,v3.16b,v6.16b
64 b.ne .Loop128
65
66 ld1 {v1.4s},[x3]
67
68 tbl v6.16b,{v3.16b},v2.16b
69 ext v5.16b,v0.16b,v3.16b,#12
70 st1 {v3.4s},[x2],#16
71 aese v6.16b,v0.16b
72
73 eor v3.16b,v3.16b,v5.16b
74 ext v5.16b,v0.16b,v5.16b,#12
75 eor v3.16b,v3.16b,v5.16b
76 ext v5.16b,v0.16b,v5.16b,#12
Adam Langleye9ada862015-05-11 17:20:37 -070077 eor v6.16b,v6.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -080078 eor v3.16b,v3.16b,v5.16b
79 shl v1.16b,v1.16b,#1
80 eor v3.16b,v3.16b,v6.16b
81
82 tbl v6.16b,{v3.16b},v2.16b
83 ext v5.16b,v0.16b,v3.16b,#12
84 st1 {v3.4s},[x2],#16
85 aese v6.16b,v0.16b
86
87 eor v3.16b,v3.16b,v5.16b
88 ext v5.16b,v0.16b,v5.16b,#12
89 eor v3.16b,v3.16b,v5.16b
90 ext v5.16b,v0.16b,v5.16b,#12
Adam Langleye9ada862015-05-11 17:20:37 -070091 eor v6.16b,v6.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -080092 eor v3.16b,v3.16b,v5.16b
93 eor v3.16b,v3.16b,v6.16b
94 st1 {v3.4s},[x2]
95 add x2,x2,#0x50
96
97 mov w12,#10
98 b .Ldone
99
100.align 4
101.L192:
102 ld1 {v4.8b},[x0],#8
103 movi v6.16b,#8 // borrow v6.16b
104 st1 {v3.4s},[x2],#16
105 sub v2.16b,v2.16b,v6.16b // adjust the mask
106
107.Loop192:
108 tbl v6.16b,{v4.16b},v2.16b
109 ext v5.16b,v0.16b,v3.16b,#12
110 st1 {v4.8b},[x2],#8
111 aese v6.16b,v0.16b
112 subs w1,w1,#1
113
114 eor v3.16b,v3.16b,v5.16b
115 ext v5.16b,v0.16b,v5.16b,#12
116 eor v3.16b,v3.16b,v5.16b
117 ext v5.16b,v0.16b,v5.16b,#12
118 eor v3.16b,v3.16b,v5.16b
119
120 dup v5.4s,v3.s[3]
121 eor v5.16b,v5.16b,v4.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700122 eor v6.16b,v6.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800123 ext v4.16b,v0.16b,v4.16b,#12
124 shl v1.16b,v1.16b,#1
125 eor v4.16b,v4.16b,v5.16b
126 eor v3.16b,v3.16b,v6.16b
127 eor v4.16b,v4.16b,v6.16b
128 st1 {v3.4s},[x2],#16
129 b.ne .Loop192
130
131 mov w12,#12
132 add x2,x2,#0x20
133 b .Ldone
134
135.align 4
136.L256:
137 ld1 {v4.16b},[x0]
138 mov w1,#7
139 mov w12,#14
140 st1 {v3.4s},[x2],#16
141
142.Loop256:
143 tbl v6.16b,{v4.16b},v2.16b
144 ext v5.16b,v0.16b,v3.16b,#12
145 st1 {v4.4s},[x2],#16
146 aese v6.16b,v0.16b
147 subs w1,w1,#1
148
149 eor v3.16b,v3.16b,v5.16b
150 ext v5.16b,v0.16b,v5.16b,#12
151 eor v3.16b,v3.16b,v5.16b
152 ext v5.16b,v0.16b,v5.16b,#12
Adam Langleye9ada862015-05-11 17:20:37 -0700153 eor v6.16b,v6.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800154 eor v3.16b,v3.16b,v5.16b
155 shl v1.16b,v1.16b,#1
156 eor v3.16b,v3.16b,v6.16b
157 st1 {v3.4s},[x2],#16
158 b.eq .Ldone
159
160 dup v6.4s,v3.s[3] // just splat
161 ext v5.16b,v0.16b,v4.16b,#12
162 aese v6.16b,v0.16b
163
164 eor v4.16b,v4.16b,v5.16b
165 ext v5.16b,v0.16b,v5.16b,#12
166 eor v4.16b,v4.16b,v5.16b
167 ext v5.16b,v0.16b,v5.16b,#12
168 eor v4.16b,v4.16b,v5.16b
169
170 eor v4.16b,v4.16b,v6.16b
171 b .Loop256
172
173.Ldone:
174 str w12,[x2]
175 mov x3,#0
176
177.Lenc_key_abort:
178 mov x0,x3 // return value
179 ldr x29,[sp],#16
180 ret
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400181.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
Adam Langleyd9e397b2015-01-22 14:27:53 -0800182
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400183.globl aes_hw_set_decrypt_key
184.hidden aes_hw_set_decrypt_key
185.type aes_hw_set_decrypt_key,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800186.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400187aes_hw_set_decrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800188 stp x29,x30,[sp,#-16]!
189 add x29,sp,#0
190 bl .Lenc_key
191
192 cmp x0,#0
193 b.ne .Ldec_key_abort
194
195 sub x2,x2,#240 // restore original x2
196 mov x4,#-16
197 add x0,x2,x12,lsl#4 // end of key schedule
198
199 ld1 {v0.4s},[x2]
200 ld1 {v1.4s},[x0]
201 st1 {v0.4s},[x0],x4
202 st1 {v1.4s},[x2],#16
203
204.Loop_imc:
205 ld1 {v0.4s},[x2]
206 ld1 {v1.4s},[x0]
207 aesimc v0.16b,v0.16b
208 aesimc v1.16b,v1.16b
209 st1 {v0.4s},[x0],x4
210 st1 {v1.4s},[x2],#16
211 cmp x0,x2
212 b.hi .Loop_imc
213
214 ld1 {v0.4s},[x2]
215 aesimc v0.16b,v0.16b
216 st1 {v0.4s},[x0]
217
218 eor x0,x0,x0 // return value
219.Ldec_key_abort:
220 ldp x29,x30,[sp],#16
221 ret
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400222.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
223.globl aes_hw_encrypt
224.hidden aes_hw_encrypt
225.type aes_hw_encrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800226.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400227aes_hw_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800228 ldr w3,[x2,#240]
229 ld1 {v0.4s},[x2],#16
230 ld1 {v2.16b},[x0]
231 sub w3,w3,#2
232 ld1 {v1.4s},[x2],#16
233
234.Loop_enc:
235 aese v2.16b,v0.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800236 aesmc v2.16b,v2.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700237 ld1 {v0.4s},[x2],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800238 subs w3,w3,#2
239 aese v2.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800240 aesmc v2.16b,v2.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700241 ld1 {v1.4s},[x2],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800242 b.gt .Loop_enc
243
244 aese v2.16b,v0.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800245 aesmc v2.16b,v2.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700246 ld1 {v0.4s},[x2]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800247 aese v2.16b,v1.16b
248 eor v2.16b,v2.16b,v0.16b
249
250 st1 {v2.16b},[x1]
251 ret
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400252.size aes_hw_encrypt,.-aes_hw_encrypt
253.globl aes_hw_decrypt
254.hidden aes_hw_decrypt
255.type aes_hw_decrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800256.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400257aes_hw_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800258 ldr w3,[x2,#240]
259 ld1 {v0.4s},[x2],#16
260 ld1 {v2.16b},[x0]
261 sub w3,w3,#2
262 ld1 {v1.4s},[x2],#16
263
264.Loop_dec:
265 aesd v2.16b,v0.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800266 aesimc v2.16b,v2.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700267 ld1 {v0.4s},[x2],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800268 subs w3,w3,#2
269 aesd v2.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800270 aesimc v2.16b,v2.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700271 ld1 {v1.4s},[x2],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800272 b.gt .Loop_dec
273
274 aesd v2.16b,v0.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800275 aesimc v2.16b,v2.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700276 ld1 {v0.4s},[x2]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800277 aesd v2.16b,v1.16b
278 eor v2.16b,v2.16b,v0.16b
279
280 st1 {v2.16b},[x1]
281 ret
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400282.size aes_hw_decrypt,.-aes_hw_decrypt
283.globl aes_hw_cbc_encrypt
284.hidden aes_hw_cbc_encrypt
285.type aes_hw_cbc_encrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800286.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400287aes_hw_cbc_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800288 stp x29,x30,[sp,#-16]!
289 add x29,sp,#0
290 subs x2,x2,#16
291 mov x8,#16
292 b.lo .Lcbc_abort
293 csel x8,xzr,x8,eq
294
295 cmp w5,#0 // en- or decrypting?
296 ldr w5,[x3,#240]
297 and x2,x2,#-16
298 ld1 {v6.16b},[x4]
299 ld1 {v0.16b},[x0],x8
300
Adam Langleye9ada862015-05-11 17:20:37 -0700301 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
Adam Langleyd9e397b2015-01-22 14:27:53 -0800302 sub w5,w5,#6
303 add x7,x3,x5,lsl#4 // pointer to last 7 round keys
304 sub w5,w5,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700305 ld1 {v18.4s,v19.4s},[x7],#32
306 ld1 {v20.4s,v21.4s},[x7],#32
307 ld1 {v22.4s,v23.4s},[x7],#32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800308 ld1 {v7.4s},[x7]
309
310 add x7,x3,#32
311 mov w6,w5
312 b.eq .Lcbc_dec
313
314 cmp w5,#2
315 eor v0.16b,v0.16b,v6.16b
316 eor v5.16b,v16.16b,v7.16b
317 b.eq .Lcbc_enc128
318
Adam Langleye9ada862015-05-11 17:20:37 -0700319 ld1 {v2.4s,v3.4s},[x7]
320 add x7,x3,#16
321 add x6,x3,#16*4
322 add x12,x3,#16*5
323 aese v0.16b,v16.16b
324 aesmc v0.16b,v0.16b
325 add x14,x3,#16*6
326 add x3,x3,#16*7
327 b .Lenter_cbc_enc
328
329.align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800330.Loop_cbc_enc:
331 aese v0.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800332 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700333 st1 {v6.16b},[x1],#16
334.Lenter_cbc_enc:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800335 aese v0.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800336 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700337 aese v0.16b,v2.16b
338 aesmc v0.16b,v0.16b
339 ld1 {v16.4s},[x6]
340 cmp w5,#4
341 aese v0.16b,v3.16b
342 aesmc v0.16b,v0.16b
343 ld1 {v17.4s},[x12]
344 b.eq .Lcbc_enc192
Adam Langleyd9e397b2015-01-22 14:27:53 -0800345
346 aese v0.16b,v16.16b
347 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700348 ld1 {v16.4s},[x14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800349 aese v0.16b,v17.16b
350 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700351 ld1 {v17.4s},[x3]
352 nop
353
354.Lcbc_enc192:
355 aese v0.16b,v16.16b
356 aesmc v0.16b,v0.16b
357 subs x2,x2,#16
358 aese v0.16b,v17.16b
359 aesmc v0.16b,v0.16b
360 csel x8,xzr,x8,eq
Adam Langleyd9e397b2015-01-22 14:27:53 -0800361 aese v0.16b,v18.16b
362 aesmc v0.16b,v0.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800363 aese v0.16b,v19.16b
364 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700365 ld1 {v16.16b},[x0],x8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800366 aese v0.16b,v20.16b
367 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700368 eor v16.16b,v16.16b,v5.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800369 aese v0.16b,v21.16b
370 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700371 ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800372 aese v0.16b,v22.16b
373 aesmc v0.16b,v0.16b
374 aese v0.16b,v23.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800375 eor v6.16b,v0.16b,v7.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800376 b.hs .Loop_cbc_enc
377
Adam Langleye9ada862015-05-11 17:20:37 -0700378 st1 {v6.16b},[x1],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800379 b .Lcbc_done
380
381.align 5
382.Lcbc_enc128:
Adam Langleye9ada862015-05-11 17:20:37 -0700383 ld1 {v2.4s,v3.4s},[x7]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800384 aese v0.16b,v16.16b
385 aesmc v0.16b,v0.16b
386 b .Lenter_cbc_enc128
387.Loop_cbc_enc128:
388 aese v0.16b,v16.16b
389 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700390 st1 {v6.16b},[x1],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800391.Lenter_cbc_enc128:
392 aese v0.16b,v17.16b
393 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700394 subs x2,x2,#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800395 aese v0.16b,v2.16b
396 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700397 csel x8,xzr,x8,eq
Adam Langleyd9e397b2015-01-22 14:27:53 -0800398 aese v0.16b,v3.16b
399 aesmc v0.16b,v0.16b
400 aese v0.16b,v18.16b
401 aesmc v0.16b,v0.16b
402 aese v0.16b,v19.16b
403 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700404 ld1 {v16.16b},[x0],x8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800405 aese v0.16b,v20.16b
406 aesmc v0.16b,v0.16b
407 aese v0.16b,v21.16b
408 aesmc v0.16b,v0.16b
409 aese v0.16b,v22.16b
410 aesmc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700411 eor v16.16b,v16.16b,v5.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800412 aese v0.16b,v23.16b
413 eor v6.16b,v0.16b,v7.16b
414 b.hs .Loop_cbc_enc128
415
416 st1 {v6.16b},[x1],#16
417 b .Lcbc_done
418.align 5
419.Lcbc_dec:
420 ld1 {v18.16b},[x0],#16
421 subs x2,x2,#32 // bias
422 add w6,w5,#2
423 orr v3.16b,v0.16b,v0.16b
424 orr v1.16b,v0.16b,v0.16b
425 orr v19.16b,v18.16b,v18.16b
426 b.lo .Lcbc_dec_tail
427
428 orr v1.16b,v18.16b,v18.16b
429 ld1 {v18.16b},[x0],#16
430 orr v2.16b,v0.16b,v0.16b
431 orr v3.16b,v1.16b,v1.16b
432 orr v19.16b,v18.16b,v18.16b
433
434.Loop3x_cbc_dec:
435 aesd v0.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800436 aesimc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700437 aesd v1.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800438 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700439 aesd v18.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800440 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700441 ld1 {v16.4s},[x7],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800442 subs w6,w6,#2
443 aesd v0.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800444 aesimc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700445 aesd v1.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800446 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700447 aesd v18.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800448 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700449 ld1 {v17.4s},[x7],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800450 b.gt .Loop3x_cbc_dec
451
452 aesd v0.16b,v16.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700453 aesimc v0.16b,v0.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800454 aesd v1.16b,v16.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700455 aesimc v1.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800456 aesd v18.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800457 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700458 eor v4.16b,v6.16b,v7.16b
459 subs x2,x2,#0x30
460 eor v5.16b,v2.16b,v7.16b
461 csel x6,x2,x6,lo // x6, w6, is zero at this point
Adam Langleyd9e397b2015-01-22 14:27:53 -0800462 aesd v0.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800463 aesimc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700464 aesd v1.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800465 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700466 aesd v18.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800467 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700468 eor v17.16b,v3.16b,v7.16b
469 add x0,x0,x6 // x0 is adjusted in such way that
Adam Langleyd9e397b2015-01-22 14:27:53 -0800470 // at exit from the loop v1.16b-v18.16b
471 // are loaded with last "words"
Adam Langleye9ada862015-05-11 17:20:37 -0700472 orr v6.16b,v19.16b,v19.16b
473 mov x7,x3
474 aesd v0.16b,v20.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800475 aesimc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700476 aesd v1.16b,v20.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800477 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700478 aesd v18.16b,v20.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800479 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700480 ld1 {v2.16b},[x0],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800481 aesd v0.16b,v21.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700482 aesimc v0.16b,v0.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800483 aesd v1.16b,v21.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700484 aesimc v1.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800485 aesd v18.16b,v21.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800486 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700487 ld1 {v3.16b},[x0],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800488 aesd v0.16b,v22.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800489 aesimc v0.16b,v0.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700490 aesd v1.16b,v22.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800491 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700492 aesd v18.16b,v22.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800493 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700494 ld1 {v19.16b},[x0],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800495 aesd v0.16b,v23.16b
496 aesd v1.16b,v23.16b
497 aesd v18.16b,v23.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700498 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
499 add w6,w5,#2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800500 eor v4.16b,v4.16b,v0.16b
501 eor v5.16b,v5.16b,v1.16b
502 eor v18.16b,v18.16b,v17.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700503 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800504 st1 {v4.16b},[x1],#16
Adam Langleye9ada862015-05-11 17:20:37 -0700505 orr v0.16b,v2.16b,v2.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800506 st1 {v5.16b},[x1],#16
Adam Langleye9ada862015-05-11 17:20:37 -0700507 orr v1.16b,v3.16b,v3.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800508 st1 {v18.16b},[x1],#16
Adam Langleye9ada862015-05-11 17:20:37 -0700509 orr v18.16b,v19.16b,v19.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800510 b.hs .Loop3x_cbc_dec
511
512 cmn x2,#0x30
513 b.eq .Lcbc_done
514 nop
515
516.Lcbc_dec_tail:
517 aesd v1.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800518 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700519 aesd v18.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800520 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700521 ld1 {v16.4s},[x7],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800522 subs w6,w6,#2
523 aesd v1.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800524 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700525 aesd v18.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800526 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700527 ld1 {v17.4s},[x7],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800528 b.gt .Lcbc_dec_tail
529
530 aesd v1.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800531 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700532 aesd v18.16b,v16.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800533 aesimc v18.16b,v18.16b
534 aesd v1.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800535 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700536 aesd v18.16b,v17.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800537 aesimc v18.16b,v18.16b
538 aesd v1.16b,v20.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700539 aesimc v1.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800540 aesd v18.16b,v20.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800541 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700542 cmn x2,#0x20
Adam Langleyd9e397b2015-01-22 14:27:53 -0800543 aesd v1.16b,v21.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700544 aesimc v1.16b,v1.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800545 aesd v18.16b,v21.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800546 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700547 eor v5.16b,v6.16b,v7.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800548 aesd v1.16b,v22.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800549 aesimc v1.16b,v1.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700550 aesd v18.16b,v22.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800551 aesimc v18.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700552 eor v17.16b,v3.16b,v7.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800553 aesd v1.16b,v23.16b
554 aesd v18.16b,v23.16b
555 b.eq .Lcbc_dec_one
556 eor v5.16b,v5.16b,v1.16b
557 eor v17.16b,v17.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700558 orr v6.16b,v19.16b,v19.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800559 st1 {v5.16b},[x1],#16
560 st1 {v17.16b},[x1],#16
561 b .Lcbc_done
562
563.Lcbc_dec_one:
564 eor v5.16b,v5.16b,v18.16b
Adam Langleye9ada862015-05-11 17:20:37 -0700565 orr v6.16b,v19.16b,v19.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800566 st1 {v5.16b},[x1],#16
567
568.Lcbc_done:
569 st1 {v6.16b},[x4]
570.Lcbc_abort:
571 ldr x29,[sp],#16
572 ret
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400573.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
574.globl aes_hw_ctr32_encrypt_blocks
575.hidden aes_hw_ctr32_encrypt_blocks
576.type aes_hw_ctr32_encrypt_blocks,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800577.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400578aes_hw_ctr32_encrypt_blocks:
Adam Langleye9ada862015-05-11 17:20:37 -0700579 stp x29,x30,[sp,#-16]!
580 add x29,sp,#0
581 ldr w5,[x3,#240]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800582
Adam Langleye9ada862015-05-11 17:20:37 -0700583 ldr w8, [x4, #12]
584 ld1 {v0.4s},[x4]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800585
Adam Langleye9ada862015-05-11 17:20:37 -0700586 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
587 sub w5,w5,#4
588 mov x12,#16
589 cmp x2,#2
590 add x7,x3,x5,lsl#4 // pointer to last 5 round keys
591 sub w5,w5,#2
592 ld1 {v20.4s,v21.4s},[x7],#32
593 ld1 {v22.4s,v23.4s},[x7],#32
594 ld1 {v7.4s},[x7]
595 add x7,x3,#32
596 mov w6,w5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800597 csel x12,xzr,x12,lo
598#ifndef __ARMEB__
Adam Langleye9ada862015-05-11 17:20:37 -0700599 rev w8, w8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800600#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700601 orr v1.16b,v0.16b,v0.16b
602 add w10, w8, #1
603 orr v18.16b,v0.16b,v0.16b
604 add w8, w8, #2
605 orr v6.16b,v0.16b,v0.16b
606 rev w10, w10
607 mov v1.s[3],w10
608 b.ls .Lctr32_tail
609 rev w12, w8
610 sub x2,x2,#3 // bias
611 mov v18.s[3],w12
612 b .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800613
614.align 4
615.Loop3x_ctr32:
Adam Langleye9ada862015-05-11 17:20:37 -0700616 aese v0.16b,v16.16b
617 aesmc v0.16b,v0.16b
618 aese v1.16b,v16.16b
619 aesmc v1.16b,v1.16b
620 aese v18.16b,v16.16b
621 aesmc v18.16b,v18.16b
622 ld1 {v16.4s},[x7],#16
623 subs w6,w6,#2
624 aese v0.16b,v17.16b
625 aesmc v0.16b,v0.16b
626 aese v1.16b,v17.16b
627 aesmc v1.16b,v1.16b
628 aese v18.16b,v17.16b
629 aesmc v18.16b,v18.16b
630 ld1 {v17.4s},[x7],#16
631 b.gt .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800632
Adam Langleye9ada862015-05-11 17:20:37 -0700633 aese v0.16b,v16.16b
634 aesmc v4.16b,v0.16b
635 aese v1.16b,v16.16b
636 aesmc v5.16b,v1.16b
637 ld1 {v2.16b},[x0],#16
638 orr v0.16b,v6.16b,v6.16b
639 aese v18.16b,v16.16b
640 aesmc v18.16b,v18.16b
641 ld1 {v3.16b},[x0],#16
642 orr v1.16b,v6.16b,v6.16b
643 aese v4.16b,v17.16b
644 aesmc v4.16b,v4.16b
645 aese v5.16b,v17.16b
646 aesmc v5.16b,v5.16b
647 ld1 {v19.16b},[x0],#16
648 mov x7,x3
649 aese v18.16b,v17.16b
650 aesmc v17.16b,v18.16b
651 orr v18.16b,v6.16b,v6.16b
652 add w9,w8,#1
653 aese v4.16b,v20.16b
654 aesmc v4.16b,v4.16b
655 aese v5.16b,v20.16b
656 aesmc v5.16b,v5.16b
657 eor v2.16b,v2.16b,v7.16b
658 add w10,w8,#2
659 aese v17.16b,v20.16b
660 aesmc v17.16b,v17.16b
661 eor v3.16b,v3.16b,v7.16b
662 add w8,w8,#3
663 aese v4.16b,v21.16b
664 aesmc v4.16b,v4.16b
665 aese v5.16b,v21.16b
666 aesmc v5.16b,v5.16b
667 eor v19.16b,v19.16b,v7.16b
668 rev w9,w9
669 aese v17.16b,v21.16b
670 aesmc v17.16b,v17.16b
671 mov v0.s[3], w9
672 rev w10,w10
673 aese v4.16b,v22.16b
674 aesmc v4.16b,v4.16b
675 aese v5.16b,v22.16b
676 aesmc v5.16b,v5.16b
677 mov v1.s[3], w10
678 rev w12,w8
679 aese v17.16b,v22.16b
680 aesmc v17.16b,v17.16b
681 mov v18.s[3], w12
682 subs x2,x2,#3
683 aese v4.16b,v23.16b
684 aese v5.16b,v23.16b
685 aese v17.16b,v23.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800686
Adam Langleye9ada862015-05-11 17:20:37 -0700687 eor v2.16b,v2.16b,v4.16b
688 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
689 st1 {v2.16b},[x1],#16
690 eor v3.16b,v3.16b,v5.16b
691 mov w6,w5
692 st1 {v3.16b},[x1],#16
693 eor v19.16b,v19.16b,v17.16b
694 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
695 st1 {v19.16b},[x1],#16
696 b.hs .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800697
Adam Langleye9ada862015-05-11 17:20:37 -0700698 adds x2,x2,#3
699 b.eq .Lctr32_done
700 cmp x2,#1
701 mov x12,#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800702 csel x12,xzr,x12,eq
703
704.Lctr32_tail:
Adam Langleye9ada862015-05-11 17:20:37 -0700705 aese v0.16b,v16.16b
706 aesmc v0.16b,v0.16b
707 aese v1.16b,v16.16b
708 aesmc v1.16b,v1.16b
709 ld1 {v16.4s},[x7],#16
710 subs w6,w6,#2
711 aese v0.16b,v17.16b
712 aesmc v0.16b,v0.16b
713 aese v1.16b,v17.16b
714 aesmc v1.16b,v1.16b
715 ld1 {v17.4s},[x7],#16
716 b.gt .Lctr32_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800717
Adam Langleye9ada862015-05-11 17:20:37 -0700718 aese v0.16b,v16.16b
719 aesmc v0.16b,v0.16b
720 aese v1.16b,v16.16b
721 aesmc v1.16b,v1.16b
722 aese v0.16b,v17.16b
723 aesmc v0.16b,v0.16b
724 aese v1.16b,v17.16b
725 aesmc v1.16b,v1.16b
726 ld1 {v2.16b},[x0],x12
727 aese v0.16b,v20.16b
728 aesmc v0.16b,v0.16b
729 aese v1.16b,v20.16b
730 aesmc v1.16b,v1.16b
731 ld1 {v3.16b},[x0]
732 aese v0.16b,v21.16b
733 aesmc v0.16b,v0.16b
734 aese v1.16b,v21.16b
735 aesmc v1.16b,v1.16b
736 eor v2.16b,v2.16b,v7.16b
737 aese v0.16b,v22.16b
738 aesmc v0.16b,v0.16b
739 aese v1.16b,v22.16b
740 aesmc v1.16b,v1.16b
741 eor v3.16b,v3.16b,v7.16b
742 aese v0.16b,v23.16b
743 aese v1.16b,v23.16b
Adam Langleyd9e397b2015-01-22 14:27:53 -0800744
Adam Langleye9ada862015-05-11 17:20:37 -0700745 cmp x2,#1
746 eor v2.16b,v2.16b,v0.16b
747 eor v3.16b,v3.16b,v1.16b
748 st1 {v2.16b},[x1],#16
749 b.eq .Lctr32_done
750 st1 {v3.16b},[x1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800751
752.Lctr32_done:
Adam Langleye9ada862015-05-11 17:20:37 -0700753 ldr x29,[sp],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800754 ret
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400755.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
Adam Langleyd9e397b2015-01-22 14:27:53 -0800756#endif
David Benjamin4969cc92016-04-22 15:02:23 -0400757#endif