blob: c2f6b68e34fdd2fb59fd24509eed98ddebf36dcb [file] [log] [blame]
Kenny Rootb8494592015-09-25 02:29:14 +00001#if defined(__arm__)
2#include <openssl/arm_arch.h>
Adam Langleyd9e397b2015-01-22 14:27:53 -08003
4#if __ARM_MAX_ARCH__>=7
5.text
6.arch armv7-a
7.fpu neon
8.code 32
9.align 5
Adam Langleye9ada862015-05-11 17:20:37 -070010.Lrcon:
Adam Langleyd9e397b2015-01-22 14:27:53 -080011.long 0x01,0x01,0x01,0x01
12.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
13.long 0x1b,0x1b,0x1b,0x1b
14
Steven Valdezbb1ceac2016-10-07 10:34:51 -040015.globl aes_hw_set_encrypt_key
16.hidden aes_hw_set_encrypt_key
17.type aes_hw_set_encrypt_key,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -080018.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -040019aes_hw_set_encrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -080020.Lenc_key:
21 mov r3,#-1
22 cmp r0,#0
23 beq .Lenc_key_abort
24 cmp r2,#0
25 beq .Lenc_key_abort
26 mov r3,#-2
27 cmp r1,#128
28 blt .Lenc_key_abort
29 cmp r1,#256
30 bgt .Lenc_key_abort
31 tst r1,#0x3f
32 bne .Lenc_key_abort
33
Adam Langleye9ada862015-05-11 17:20:37 -070034 adr r3,.Lrcon
Adam Langleyd9e397b2015-01-22 14:27:53 -080035 cmp r1,#192
36
37 veor q0,q0,q0
38 vld1.8 {q3},[r0]!
39 mov r1,#8 @ reuse r1
40 vld1.32 {q1,q2},[r3]!
41
42 blt .Loop128
43 beq .L192
44 b .L256
45
46.align 4
47.Loop128:
48 vtbl.8 d20,{q3},d4
49 vtbl.8 d21,{q3},d5
50 vext.8 q9,q0,q3,#12
51 vst1.32 {q3},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -070052.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -080053 subs r1,r1,#1
54
55 veor q3,q3,q9
56 vext.8 q9,q0,q9,#12
57 veor q3,q3,q9
58 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -070059 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -080060 veor q3,q3,q9
61 vshl.u8 q1,q1,#1
62 veor q3,q3,q10
63 bne .Loop128
64
65 vld1.32 {q1},[r3]
66
67 vtbl.8 d20,{q3},d4
68 vtbl.8 d21,{q3},d5
69 vext.8 q9,q0,q3,#12
70 vst1.32 {q3},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -070071.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -080072
73 veor q3,q3,q9
74 vext.8 q9,q0,q9,#12
75 veor q3,q3,q9
76 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -070077 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -080078 veor q3,q3,q9
79 vshl.u8 q1,q1,#1
80 veor q3,q3,q10
81
82 vtbl.8 d20,{q3},d4
83 vtbl.8 d21,{q3},d5
84 vext.8 q9,q0,q3,#12
85 vst1.32 {q3},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -070086.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -080087
88 veor q3,q3,q9
89 vext.8 q9,q0,q9,#12
90 veor q3,q3,q9
91 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -070092 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -080093 veor q3,q3,q9
94 veor q3,q3,q10
95 vst1.32 {q3},[r2]
96 add r2,r2,#0x50
97
98 mov r12,#10
99 b .Ldone
100
101.align 4
102.L192:
103 vld1.8 {d16},[r0]!
104 vmov.i8 q10,#8 @ borrow q10
105 vst1.32 {q3},[r2]!
106 vsub.i8 q2,q2,q10 @ adjust the mask
107
108.Loop192:
109 vtbl.8 d20,{q8},d4
110 vtbl.8 d21,{q8},d5
111 vext.8 q9,q0,q3,#12
112 vst1.32 {d16},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -0700113.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800114 subs r1,r1,#1
115
116 veor q3,q3,q9
117 vext.8 q9,q0,q9,#12
118 veor q3,q3,q9
119 vext.8 q9,q0,q9,#12
120 veor q3,q3,q9
121
122 vdup.32 q9,d7[1]
123 veor q9,q9,q8
Adam Langleye9ada862015-05-11 17:20:37 -0700124 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800125 vext.8 q8,q0,q8,#12
126 vshl.u8 q1,q1,#1
127 veor q8,q8,q9
128 veor q3,q3,q10
129 veor q8,q8,q10
130 vst1.32 {q3},[r2]!
131 bne .Loop192
132
133 mov r12,#12
134 add r2,r2,#0x20
135 b .Ldone
136
137.align 4
138.L256:
139 vld1.8 {q8},[r0]
140 mov r1,#7
141 mov r12,#14
142 vst1.32 {q3},[r2]!
143
144.Loop256:
145 vtbl.8 d20,{q8},d4
146 vtbl.8 d21,{q8},d5
147 vext.8 q9,q0,q3,#12
148 vst1.32 {q8},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -0700149.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800150 subs r1,r1,#1
151
152 veor q3,q3,q9
153 vext.8 q9,q0,q9,#12
154 veor q3,q3,q9
155 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -0700156 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800157 veor q3,q3,q9
158 vshl.u8 q1,q1,#1
159 veor q3,q3,q10
160 vst1.32 {q3},[r2]!
161 beq .Ldone
162
163 vdup.32 q10,d7[1]
164 vext.8 q9,q0,q8,#12
Adam Langleye9ada862015-05-11 17:20:37 -0700165.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800166
167 veor q8,q8,q9
168 vext.8 q9,q0,q9,#12
169 veor q8,q8,q9
170 vext.8 q9,q0,q9,#12
171 veor q8,q8,q9
172
173 veor q8,q8,q10
174 b .Loop256
175
176.Ldone:
177 str r12,[r2]
178 mov r3,#0
179
180.Lenc_key_abort:
181 mov r0,r3 @ return value
Adam Langleye9ada862015-05-11 17:20:37 -0700182
Adam Langleyd9e397b2015-01-22 14:27:53 -0800183 bx lr
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400184.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
Adam Langleyd9e397b2015-01-22 14:27:53 -0800185
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400186.globl aes_hw_set_decrypt_key
187.hidden aes_hw_set_decrypt_key
188.type aes_hw_set_decrypt_key,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800189.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400190aes_hw_set_decrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800191 stmdb sp!,{r4,lr}
192 bl .Lenc_key
193
194 cmp r0,#0
195 bne .Ldec_key_abort
196
197 sub r2,r2,#240 @ restore original r2
198 mov r4,#-16
199 add r0,r2,r12,lsl#4 @ end of key schedule
200
201 vld1.32 {q0},[r2]
202 vld1.32 {q1},[r0]
203 vst1.32 {q0},[r0],r4
204 vst1.32 {q1},[r2]!
205
206.Loop_imc:
207 vld1.32 {q0},[r2]
208 vld1.32 {q1},[r0]
Adam Langleye9ada862015-05-11 17:20:37 -0700209.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
210.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800211 vst1.32 {q0},[r0],r4
212 vst1.32 {q1},[r2]!
213 cmp r0,r2
214 bhi .Loop_imc
215
216 vld1.32 {q0},[r2]
Adam Langleye9ada862015-05-11 17:20:37 -0700217.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800218 vst1.32 {q0},[r0]
219
220 eor r0,r0,r0 @ return value
221.Ldec_key_abort:
222 ldmia sp!,{r4,pc}
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400223.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
224.globl aes_hw_encrypt
225.hidden aes_hw_encrypt
226.type aes_hw_encrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800227.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400228aes_hw_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800229 ldr r3,[r2,#240]
230 vld1.32 {q0},[r2]!
231 vld1.8 {q2},[r0]
232 sub r3,r3,#2
233 vld1.32 {q1},[r2]!
234
235.Loop_enc:
Adam Langleye9ada862015-05-11 17:20:37 -0700236.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
237.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800238 vld1.32 {q0},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800239 subs r3,r3,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700240.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
241.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800242 vld1.32 {q1},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800243 bgt .Loop_enc
244
Adam Langleye9ada862015-05-11 17:20:37 -0700245.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
246.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800247 vld1.32 {q0},[r2]
Adam Langleye9ada862015-05-11 17:20:37 -0700248.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800249 veor q2,q2,q0
250
251 vst1.8 {q2},[r1]
252 bx lr
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400253.size aes_hw_encrypt,.-aes_hw_encrypt
254.globl aes_hw_decrypt
255.hidden aes_hw_decrypt
256.type aes_hw_decrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800257.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400258aes_hw_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800259 ldr r3,[r2,#240]
260 vld1.32 {q0},[r2]!
261 vld1.8 {q2},[r0]
262 sub r3,r3,#2
263 vld1.32 {q1},[r2]!
264
265.Loop_dec:
Adam Langleye9ada862015-05-11 17:20:37 -0700266.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
267.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800268 vld1.32 {q0},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800269 subs r3,r3,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700270.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
271.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800272 vld1.32 {q1},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800273 bgt .Loop_dec
274
Adam Langleye9ada862015-05-11 17:20:37 -0700275.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
276.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800277 vld1.32 {q0},[r2]
Adam Langleye9ada862015-05-11 17:20:37 -0700278.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800279 veor q2,q2,q0
280
281 vst1.8 {q2},[r1]
282 bx lr
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400283.size aes_hw_decrypt,.-aes_hw_decrypt
284.globl aes_hw_cbc_encrypt
285.hidden aes_hw_cbc_encrypt
286.type aes_hw_cbc_encrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800287.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400288aes_hw_cbc_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800289 mov ip,sp
Adam Langleye9ada862015-05-11 17:20:37 -0700290 stmdb sp!,{r4,r5,r6,r7,r8,lr}
291 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
292 ldmia ip,{r4,r5} @ load remaining args
Adam Langleyd9e397b2015-01-22 14:27:53 -0800293 subs r2,r2,#16
294 mov r8,#16
295 blo .Lcbc_abort
296 moveq r8,#0
297
298 cmp r5,#0 @ en- or decrypting?
299 ldr r5,[r3,#240]
300 and r2,r2,#-16
301 vld1.8 {q6},[r4]
302 vld1.8 {q0},[r0],r8
303
Adam Langleye9ada862015-05-11 17:20:37 -0700304 vld1.32 {q8,q9},[r3] @ load key schedule...
Adam Langleyd9e397b2015-01-22 14:27:53 -0800305 sub r5,r5,#6
306 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
307 sub r5,r5,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700308 vld1.32 {q10,q11},[r7]!
309 vld1.32 {q12,q13},[r7]!
310 vld1.32 {q14,q15},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800311 vld1.32 {q7},[r7]
312
313 add r7,r3,#32
314 mov r6,r5
315 beq .Lcbc_dec
316
317 cmp r5,#2
318 veor q0,q0,q6
319 veor q5,q8,q7
320 beq .Lcbc_enc128
321
Adam Langleye9ada862015-05-11 17:20:37 -0700322 vld1.32 {q2,q3},[r7]
323 add r7,r3,#16
324 add r6,r3,#16*4
325 add r12,r3,#16*5
326.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
327.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
328 add r14,r3,#16*6
329 add r3,r3,#16*7
330 b .Lenter_cbc_enc
331
332.align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800333.Loop_cbc_enc:
Adam Langleye9ada862015-05-11 17:20:37 -0700334.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
335.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800336 vst1.8 {q6},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700337.Lenter_cbc_enc:
338.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
339.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
340.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
341.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
342 vld1.32 {q8},[r6]
343 cmp r5,#4
344.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
345.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
346 vld1.32 {q9},[r12]
347 beq .Lcbc_enc192
348
349.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
350.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
351 vld1.32 {q8},[r14]
352.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
353.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
354 vld1.32 {q9},[r3]
355 nop
356
357.Lcbc_enc192:
358.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
359.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
360 subs r2,r2,#16
361.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
362.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
363 moveq r8,#0
364.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
365.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
366.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
367.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
368 vld1.8 {q8},[r0],r8
369.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
370.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
371 veor q8,q8,q5
372.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
373.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
374 vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
375.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
376.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
377.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
378 veor q6,q0,q7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800379 bhs .Loop_cbc_enc
380
Adam Langleye9ada862015-05-11 17:20:37 -0700381 vst1.8 {q6},[r1]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800382 b .Lcbc_done
383
384.align 5
385.Lcbc_enc128:
Adam Langleye9ada862015-05-11 17:20:37 -0700386 vld1.32 {q2,q3},[r7]
387.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
388.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800389 b .Lenter_cbc_enc128
390.Loop_cbc_enc128:
Adam Langleye9ada862015-05-11 17:20:37 -0700391.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
392.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
393 vst1.8 {q6},[r1]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800394.Lenter_cbc_enc128:
Adam Langleye9ada862015-05-11 17:20:37 -0700395.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
396.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
397 subs r2,r2,#16
398.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
399.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
400 moveq r8,#0
401.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
402.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
403.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
404.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
405.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
406.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
407 vld1.8 {q8},[r0],r8
408.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
409.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
410.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
411.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
412.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
413.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
414 veor q8,q8,q5
415.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800416 veor q6,q0,q7
417 bhs .Loop_cbc_enc128
418
419 vst1.8 {q6},[r1]!
420 b .Lcbc_done
421.align 5
422.Lcbc_dec:
423 vld1.8 {q10},[r0]!
424 subs r2,r2,#32 @ bias
425 add r6,r5,#2
426 vorr q3,q0,q0
427 vorr q1,q0,q0
428 vorr q11,q10,q10
429 blo .Lcbc_dec_tail
430
431 vorr q1,q10,q10
432 vld1.8 {q10},[r0]!
433 vorr q2,q0,q0
434 vorr q3,q1,q1
435 vorr q11,q10,q10
436
437.Loop3x_cbc_dec:
Adam Langleye9ada862015-05-11 17:20:37 -0700438.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
439.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
440.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
441.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
442.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
443.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800444 vld1.32 {q8},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800445 subs r6,r6,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700446.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
447.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
448.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
449.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
450.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
451.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800452 vld1.32 {q9},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800453 bgt .Loop3x_cbc_dec
454
Adam Langleye9ada862015-05-11 17:20:37 -0700455.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
456.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
457.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
458.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
459.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
460.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
461 veor q4,q6,q7
462 subs r2,r2,#0x30
463 veor q5,q2,q7
464 movlo r6,r2 @ r6, r6, is zero at this point
465.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
466.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
467.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
468.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
469.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
470.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
471 veor q9,q3,q7
472 add r0,r0,r6 @ r0 is adjusted in such way that
Adam Langleyd9e397b2015-01-22 14:27:53 -0800473 @ at exit from the loop q1-q10
474 @ are loaded with last "words"
Adam Langleye9ada862015-05-11 17:20:37 -0700475 vorr q6,q11,q11
476 mov r7,r3
477.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
478.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
479.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
480.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
481.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
482.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
483 vld1.8 {q2},[r0]!
484.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
485.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
486.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
487.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
488.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
489.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
490 vld1.8 {q3},[r0]!
491.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
492.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
493.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
494.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
495.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
496.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
497 vld1.8 {q11},[r0]!
498.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
499.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
500.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
501 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
502 add r6,r5,#2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800503 veor q4,q4,q0
504 veor q5,q5,q1
505 veor q10,q10,q9
Adam Langleye9ada862015-05-11 17:20:37 -0700506 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800507 vst1.8 {q4},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700508 vorr q0,q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800509 vst1.8 {q5},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700510 vorr q1,q3,q3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800511 vst1.8 {q10},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700512 vorr q10,q11,q11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800513 bhs .Loop3x_cbc_dec
514
515 cmn r2,#0x30
516 beq .Lcbc_done
517 nop
518
519.Lcbc_dec_tail:
Adam Langleye9ada862015-05-11 17:20:37 -0700520.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
521.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
522.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
523.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800524 vld1.32 {q8},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800525 subs r6,r6,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700526.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
527.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
528.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
529.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800530 vld1.32 {q9},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800531 bgt .Lcbc_dec_tail
532
Adam Langleye9ada862015-05-11 17:20:37 -0700533.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
534.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
535.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
536.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
537.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
538.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
539.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
540.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
541.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
542.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
543.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
544.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
545 cmn r2,#0x20
546.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
547.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
548.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
549.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
550 veor q5,q6,q7
551.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
552.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
553.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
554.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
555 veor q9,q3,q7
556.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
557.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800558 beq .Lcbc_dec_one
559 veor q5,q5,q1
560 veor q9,q9,q10
Adam Langleye9ada862015-05-11 17:20:37 -0700561 vorr q6,q11,q11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800562 vst1.8 {q5},[r1]!
563 vst1.8 {q9},[r1]!
564 b .Lcbc_done
565
566.Lcbc_dec_one:
567 veor q5,q5,q10
Adam Langleye9ada862015-05-11 17:20:37 -0700568 vorr q6,q11,q11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800569 vst1.8 {q5},[r1]!
570
571.Lcbc_done:
572 vst1.8 {q6},[r4]
573.Lcbc_abort:
Adam Langleye9ada862015-05-11 17:20:37 -0700574 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
575 ldmia sp!,{r4,r5,r6,r7,r8,pc}
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400576.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
577.globl aes_hw_ctr32_encrypt_blocks
578.hidden aes_hw_ctr32_encrypt_blocks
579.type aes_hw_ctr32_encrypt_blocks,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800580.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400581aes_hw_ctr32_encrypt_blocks:
Adam Langleye9ada862015-05-11 17:20:37 -0700582 mov ip,sp
583 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
584 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
585 ldr r4, [ip] @ load remaining arg
586 ldr r5,[r3,#240]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800587
Adam Langleye9ada862015-05-11 17:20:37 -0700588 ldr r8, [r4, #12]
589 vld1.32 {q0},[r4]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800590
Adam Langleye9ada862015-05-11 17:20:37 -0700591 vld1.32 {q8,q9},[r3] @ load key schedule...
592 sub r5,r5,#4
593 mov r12,#16
594 cmp r2,#2
595 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
596 sub r5,r5,#2
597 vld1.32 {q12,q13},[r7]!
598 vld1.32 {q14,q15},[r7]!
599 vld1.32 {q7},[r7]
600 add r7,r3,#32
601 mov r6,r5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800602 movlo r12,#0
603#ifndef __ARMEB__
Adam Langleye9ada862015-05-11 17:20:37 -0700604 rev r8, r8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800605#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700606 vorr q1,q0,q0
607 add r10, r8, #1
608 vorr q10,q0,q0
609 add r8, r8, #2
610 vorr q6,q0,q0
611 rev r10, r10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800612 vmov.32 d3[1],r10
Adam Langleye9ada862015-05-11 17:20:37 -0700613 bls .Lctr32_tail
614 rev r12, r8
615 sub r2,r2,#3 @ bias
Adam Langleyd9e397b2015-01-22 14:27:53 -0800616 vmov.32 d21[1],r12
Adam Langleye9ada862015-05-11 17:20:37 -0700617 b .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800618
619.align 4
620.Loop3x_ctr32:
Adam Langleye9ada862015-05-11 17:20:37 -0700621.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
622.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
623.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
624.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
625.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
626.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
627 vld1.32 {q8},[r7]!
628 subs r6,r6,#2
629.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
630.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
631.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
632.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
633.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
634.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
635 vld1.32 {q9},[r7]!
636 bgt .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800637
Adam Langleye9ada862015-05-11 17:20:37 -0700638.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
639.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
640.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
641.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
642 vld1.8 {q2},[r0]!
643 vorr q0,q6,q6
644.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
645.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
646 vld1.8 {q3},[r0]!
647 vorr q1,q6,q6
648.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
649.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
650.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
651.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
652 vld1.8 {q11},[r0]!
653 mov r7,r3
654.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
655.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
656 vorr q10,q6,q6
657 add r9,r8,#1
658.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
659.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
660.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
661.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
662 veor q2,q2,q7
663 add r10,r8,#2
664.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
665.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
666 veor q3,q3,q7
667 add r8,r8,#3
668.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
669.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
670.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
671.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
672 veor q11,q11,q7
673 rev r9,r9
674.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
675.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
676 vmov.32 d1[1], r9
677 rev r10,r10
678.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
679.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
680.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
681.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
682 vmov.32 d3[1], r10
683 rev r12,r8
684.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
685.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
686 vmov.32 d21[1], r12
687 subs r2,r2,#3
688.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
689.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
690.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800691
Adam Langleye9ada862015-05-11 17:20:37 -0700692 veor q2,q2,q4
693 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
694 vst1.8 {q2},[r1]!
695 veor q3,q3,q5
696 mov r6,r5
697 vst1.8 {q3},[r1]!
698 veor q11,q11,q9
699 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
700 vst1.8 {q11},[r1]!
701 bhs .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800702
Adam Langleye9ada862015-05-11 17:20:37 -0700703 adds r2,r2,#3
704 beq .Lctr32_done
705 cmp r2,#1
706 mov r12,#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800707 moveq r12,#0
708
709.Lctr32_tail:
Adam Langleye9ada862015-05-11 17:20:37 -0700710.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
711.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
712.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
713.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
714 vld1.32 {q8},[r7]!
715 subs r6,r6,#2
716.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
717.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
718.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
719.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
720 vld1.32 {q9},[r7]!
721 bgt .Lctr32_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800722
Adam Langleye9ada862015-05-11 17:20:37 -0700723.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
724.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
725.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
726.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
727.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
728.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
729.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
730.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
731 vld1.8 {q2},[r0],r12
732.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
733.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
734.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
735.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
736 vld1.8 {q3},[r0]
737.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
738.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
739.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
740.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
741 veor q2,q2,q7
742.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
743.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
744.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
745.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
746 veor q3,q3,q7
747.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
748.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800749
Adam Langleye9ada862015-05-11 17:20:37 -0700750 cmp r2,#1
751 veor q2,q2,q0
752 veor q3,q3,q1
753 vst1.8 {q2},[r1]!
754 beq .Lctr32_done
755 vst1.8 {q3},[r1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800756
757.Lctr32_done:
Adam Langleye9ada862015-05-11 17:20:37 -0700758 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
759 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400760.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
Adam Langleyd9e397b2015-01-22 14:27:53 -0800761#endif
David Benjamin4969cc92016-04-22 15:02:23 -0400762#endif