blob: 7c7ef19c7947182e27dc219edc87cc3c4f72dba8 [file] [log] [blame]
Kenny Rootb8494592015-09-25 02:29:14 +00001#if defined(__arm__)
2#include <openssl/arm_arch.h>
Adam Langleyd9e397b2015-01-22 14:27:53 -08003
4#if __ARM_MAX_ARCH__>=7
5.text
Robert Sloan8ff03552017-06-14 12:40:58 -07006.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
Adam Langleyd9e397b2015-01-22 14:27:53 -08007.fpu neon
8.code 32
Robert Sloan8ff03552017-06-14 12:40:58 -07009#undef __thumb2__
Adam Langleyd9e397b2015-01-22 14:27:53 -080010.align 5
Adam Langleye9ada862015-05-11 17:20:37 -070011.Lrcon:
Adam Langleyd9e397b2015-01-22 14:27:53 -080012.long 0x01,0x01,0x01,0x01
13.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
14.long 0x1b,0x1b,0x1b,0x1b
15
Steven Valdezbb1ceac2016-10-07 10:34:51 -040016.globl aes_hw_set_encrypt_key
17.hidden aes_hw_set_encrypt_key
18.type aes_hw_set_encrypt_key,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -080019.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -040020aes_hw_set_encrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -080021.Lenc_key:
22 mov r3,#-1
23 cmp r0,#0
24 beq .Lenc_key_abort
25 cmp r2,#0
26 beq .Lenc_key_abort
27 mov r3,#-2
28 cmp r1,#128
29 blt .Lenc_key_abort
30 cmp r1,#256
31 bgt .Lenc_key_abort
32 tst r1,#0x3f
33 bne .Lenc_key_abort
34
Adam Langleye9ada862015-05-11 17:20:37 -070035 adr r3,.Lrcon
Adam Langleyd9e397b2015-01-22 14:27:53 -080036 cmp r1,#192
37
38 veor q0,q0,q0
39 vld1.8 {q3},[r0]!
40 mov r1,#8 @ reuse r1
41 vld1.32 {q1,q2},[r3]!
42
43 blt .Loop128
44 beq .L192
45 b .L256
46
47.align 4
48.Loop128:
49 vtbl.8 d20,{q3},d4
50 vtbl.8 d21,{q3},d5
51 vext.8 q9,q0,q3,#12
52 vst1.32 {q3},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -070053.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -080054 subs r1,r1,#1
55
56 veor q3,q3,q9
57 vext.8 q9,q0,q9,#12
58 veor q3,q3,q9
59 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -070060 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -080061 veor q3,q3,q9
62 vshl.u8 q1,q1,#1
63 veor q3,q3,q10
64 bne .Loop128
65
66 vld1.32 {q1},[r3]
67
68 vtbl.8 d20,{q3},d4
69 vtbl.8 d21,{q3},d5
70 vext.8 q9,q0,q3,#12
71 vst1.32 {q3},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -070072.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -080073
74 veor q3,q3,q9
75 vext.8 q9,q0,q9,#12
76 veor q3,q3,q9
77 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -070078 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -080079 veor q3,q3,q9
80 vshl.u8 q1,q1,#1
81 veor q3,q3,q10
82
83 vtbl.8 d20,{q3},d4
84 vtbl.8 d21,{q3},d5
85 vext.8 q9,q0,q3,#12
86 vst1.32 {q3},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -070087.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -080088
89 veor q3,q3,q9
90 vext.8 q9,q0,q9,#12
91 veor q3,q3,q9
92 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -070093 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -080094 veor q3,q3,q9
95 veor q3,q3,q10
96 vst1.32 {q3},[r2]
97 add r2,r2,#0x50
98
99 mov r12,#10
100 b .Ldone
101
102.align 4
103.L192:
104 vld1.8 {d16},[r0]!
105 vmov.i8 q10,#8 @ borrow q10
106 vst1.32 {q3},[r2]!
107 vsub.i8 q2,q2,q10 @ adjust the mask
108
109.Loop192:
110 vtbl.8 d20,{q8},d4
111 vtbl.8 d21,{q8},d5
112 vext.8 q9,q0,q3,#12
113 vst1.32 {d16},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -0700114.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800115 subs r1,r1,#1
116
117 veor q3,q3,q9
118 vext.8 q9,q0,q9,#12
119 veor q3,q3,q9
120 vext.8 q9,q0,q9,#12
121 veor q3,q3,q9
122
123 vdup.32 q9,d7[1]
124 veor q9,q9,q8
Adam Langleye9ada862015-05-11 17:20:37 -0700125 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800126 vext.8 q8,q0,q8,#12
127 vshl.u8 q1,q1,#1
128 veor q8,q8,q9
129 veor q3,q3,q10
130 veor q8,q8,q10
131 vst1.32 {q3},[r2]!
132 bne .Loop192
133
134 mov r12,#12
135 add r2,r2,#0x20
136 b .Ldone
137
138.align 4
139.L256:
140 vld1.8 {q8},[r0]
141 mov r1,#7
142 mov r12,#14
143 vst1.32 {q3},[r2]!
144
145.Loop256:
146 vtbl.8 d20,{q8},d4
147 vtbl.8 d21,{q8},d5
148 vext.8 q9,q0,q3,#12
149 vst1.32 {q8},[r2]!
Adam Langleye9ada862015-05-11 17:20:37 -0700150.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800151 subs r1,r1,#1
152
153 veor q3,q3,q9
154 vext.8 q9,q0,q9,#12
155 veor q3,q3,q9
156 vext.8 q9,q0,q9,#12
Adam Langleye9ada862015-05-11 17:20:37 -0700157 veor q10,q10,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800158 veor q3,q3,q9
159 vshl.u8 q1,q1,#1
160 veor q3,q3,q10
161 vst1.32 {q3},[r2]!
162 beq .Ldone
163
164 vdup.32 q10,d7[1]
165 vext.8 q9,q0,q8,#12
Adam Langleye9ada862015-05-11 17:20:37 -0700166.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800167
168 veor q8,q8,q9
169 vext.8 q9,q0,q9,#12
170 veor q8,q8,q9
171 vext.8 q9,q0,q9,#12
172 veor q8,q8,q9
173
174 veor q8,q8,q10
175 b .Loop256
176
177.Ldone:
178 str r12,[r2]
179 mov r3,#0
180
181.Lenc_key_abort:
182 mov r0,r3 @ return value
Adam Langleye9ada862015-05-11 17:20:37 -0700183
Adam Langleyd9e397b2015-01-22 14:27:53 -0800184 bx lr
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400185.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
Adam Langleyd9e397b2015-01-22 14:27:53 -0800186
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400187.globl aes_hw_set_decrypt_key
188.hidden aes_hw_set_decrypt_key
189.type aes_hw_set_decrypt_key,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800190.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400191aes_hw_set_decrypt_key:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800192 stmdb sp!,{r4,lr}
193 bl .Lenc_key
194
195 cmp r0,#0
196 bne .Ldec_key_abort
197
198 sub r2,r2,#240 @ restore original r2
199 mov r4,#-16
200 add r0,r2,r12,lsl#4 @ end of key schedule
201
202 vld1.32 {q0},[r2]
203 vld1.32 {q1},[r0]
204 vst1.32 {q0},[r0],r4
205 vst1.32 {q1},[r2]!
206
207.Loop_imc:
208 vld1.32 {q0},[r2]
209 vld1.32 {q1},[r0]
Adam Langleye9ada862015-05-11 17:20:37 -0700210.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
211.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800212 vst1.32 {q0},[r0],r4
213 vst1.32 {q1},[r2]!
214 cmp r0,r2
215 bhi .Loop_imc
216
217 vld1.32 {q0},[r2]
Adam Langleye9ada862015-05-11 17:20:37 -0700218.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800219 vst1.32 {q0},[r0]
220
221 eor r0,r0,r0 @ return value
222.Ldec_key_abort:
223 ldmia sp!,{r4,pc}
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400224.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
225.globl aes_hw_encrypt
226.hidden aes_hw_encrypt
227.type aes_hw_encrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800228.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400229aes_hw_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800230 ldr r3,[r2,#240]
231 vld1.32 {q0},[r2]!
232 vld1.8 {q2},[r0]
233 sub r3,r3,#2
234 vld1.32 {q1},[r2]!
235
236.Loop_enc:
Adam Langleye9ada862015-05-11 17:20:37 -0700237.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
238.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800239 vld1.32 {q0},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800240 subs r3,r3,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700241.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
242.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800243 vld1.32 {q1},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800244 bgt .Loop_enc
245
Adam Langleye9ada862015-05-11 17:20:37 -0700246.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
247.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800248 vld1.32 {q0},[r2]
Adam Langleye9ada862015-05-11 17:20:37 -0700249.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800250 veor q2,q2,q0
251
252 vst1.8 {q2},[r1]
253 bx lr
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400254.size aes_hw_encrypt,.-aes_hw_encrypt
255.globl aes_hw_decrypt
256.hidden aes_hw_decrypt
257.type aes_hw_decrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800258.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400259aes_hw_decrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800260 ldr r3,[r2,#240]
261 vld1.32 {q0},[r2]!
262 vld1.8 {q2},[r0]
263 sub r3,r3,#2
264 vld1.32 {q1},[r2]!
265
266.Loop_dec:
Adam Langleye9ada862015-05-11 17:20:37 -0700267.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
268.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800269 vld1.32 {q0},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800270 subs r3,r3,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700271.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
272.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800273 vld1.32 {q1},[r2]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800274 bgt .Loop_dec
275
Adam Langleye9ada862015-05-11 17:20:37 -0700276.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
277.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800278 vld1.32 {q0},[r2]
Adam Langleye9ada862015-05-11 17:20:37 -0700279.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800280 veor q2,q2,q0
281
282 vst1.8 {q2},[r1]
283 bx lr
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400284.size aes_hw_decrypt,.-aes_hw_decrypt
285.globl aes_hw_cbc_encrypt
286.hidden aes_hw_cbc_encrypt
287.type aes_hw_cbc_encrypt,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800288.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400289aes_hw_cbc_encrypt:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800290 mov ip,sp
Adam Langleye9ada862015-05-11 17:20:37 -0700291 stmdb sp!,{r4,r5,r6,r7,r8,lr}
292 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
293 ldmia ip,{r4,r5} @ load remaining args
Adam Langleyd9e397b2015-01-22 14:27:53 -0800294 subs r2,r2,#16
295 mov r8,#16
296 blo .Lcbc_abort
297 moveq r8,#0
298
299 cmp r5,#0 @ en- or decrypting?
300 ldr r5,[r3,#240]
301 and r2,r2,#-16
302 vld1.8 {q6},[r4]
303 vld1.8 {q0},[r0],r8
304
Adam Langleye9ada862015-05-11 17:20:37 -0700305 vld1.32 {q8,q9},[r3] @ load key schedule...
Adam Langleyd9e397b2015-01-22 14:27:53 -0800306 sub r5,r5,#6
307 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
308 sub r5,r5,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700309 vld1.32 {q10,q11},[r7]!
310 vld1.32 {q12,q13},[r7]!
311 vld1.32 {q14,q15},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800312 vld1.32 {q7},[r7]
313
314 add r7,r3,#32
315 mov r6,r5
316 beq .Lcbc_dec
317
318 cmp r5,#2
319 veor q0,q0,q6
320 veor q5,q8,q7
321 beq .Lcbc_enc128
322
Adam Langleye9ada862015-05-11 17:20:37 -0700323 vld1.32 {q2,q3},[r7]
324 add r7,r3,#16
325 add r6,r3,#16*4
326 add r12,r3,#16*5
327.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
328.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
329 add r14,r3,#16*6
330 add r3,r3,#16*7
331 b .Lenter_cbc_enc
332
333.align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800334.Loop_cbc_enc:
Adam Langleye9ada862015-05-11 17:20:37 -0700335.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
336.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800337 vst1.8 {q6},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700338.Lenter_cbc_enc:
339.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
340.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
341.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
342.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
343 vld1.32 {q8},[r6]
344 cmp r5,#4
345.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
346.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
347 vld1.32 {q9},[r12]
348 beq .Lcbc_enc192
349
350.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
351.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
352 vld1.32 {q8},[r14]
353.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
354.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
355 vld1.32 {q9},[r3]
356 nop
357
358.Lcbc_enc192:
359.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
360.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
361 subs r2,r2,#16
362.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
363.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
364 moveq r8,#0
365.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
366.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
367.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
368.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
369 vld1.8 {q8},[r0],r8
370.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
371.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
372 veor q8,q8,q5
373.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
374.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
375 vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
376.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
377.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
378.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
379 veor q6,q0,q7
Adam Langleyd9e397b2015-01-22 14:27:53 -0800380 bhs .Loop_cbc_enc
381
Adam Langleye9ada862015-05-11 17:20:37 -0700382 vst1.8 {q6},[r1]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800383 b .Lcbc_done
384
385.align 5
386.Lcbc_enc128:
Adam Langleye9ada862015-05-11 17:20:37 -0700387 vld1.32 {q2,q3},[r7]
388.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
389.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800390 b .Lenter_cbc_enc128
391.Loop_cbc_enc128:
Adam Langleye9ada862015-05-11 17:20:37 -0700392.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
393.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
394 vst1.8 {q6},[r1]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800395.Lenter_cbc_enc128:
Adam Langleye9ada862015-05-11 17:20:37 -0700396.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
397.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
398 subs r2,r2,#16
399.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
400.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
401 moveq r8,#0
402.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
403.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
404.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
405.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
406.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
407.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
408 vld1.8 {q8},[r0],r8
409.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
410.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
411.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
412.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
413.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
414.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
415 veor q8,q8,q5
416.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800417 veor q6,q0,q7
418 bhs .Loop_cbc_enc128
419
420 vst1.8 {q6},[r1]!
421 b .Lcbc_done
422.align 5
423.Lcbc_dec:
424 vld1.8 {q10},[r0]!
425 subs r2,r2,#32 @ bias
426 add r6,r5,#2
427 vorr q3,q0,q0
428 vorr q1,q0,q0
429 vorr q11,q10,q10
430 blo .Lcbc_dec_tail
431
432 vorr q1,q10,q10
433 vld1.8 {q10},[r0]!
434 vorr q2,q0,q0
435 vorr q3,q1,q1
436 vorr q11,q10,q10
437
438.Loop3x_cbc_dec:
Adam Langleye9ada862015-05-11 17:20:37 -0700439.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
440.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
441.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
442.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
443.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
444.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800445 vld1.32 {q8},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800446 subs r6,r6,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700447.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
448.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
449.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
450.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
451.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
452.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800453 vld1.32 {q9},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800454 bgt .Loop3x_cbc_dec
455
Adam Langleye9ada862015-05-11 17:20:37 -0700456.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
457.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
458.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
459.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
460.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
461.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
462 veor q4,q6,q7
463 subs r2,r2,#0x30
464 veor q5,q2,q7
465 movlo r6,r2 @ r6, r6, is zero at this point
466.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
467.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
468.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
469.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
470.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
471.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
472 veor q9,q3,q7
473 add r0,r0,r6 @ r0 is adjusted in such way that
Adam Langleyd9e397b2015-01-22 14:27:53 -0800474 @ at exit from the loop q1-q10
475 @ are loaded with last "words"
Adam Langleye9ada862015-05-11 17:20:37 -0700476 vorr q6,q11,q11
477 mov r7,r3
478.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
479.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
480.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
481.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
482.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
483.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
484 vld1.8 {q2},[r0]!
485.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
486.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
487.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
488.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
489.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
490.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
491 vld1.8 {q3},[r0]!
492.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
493.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
494.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
495.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
496.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
497.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
498 vld1.8 {q11},[r0]!
499.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
500.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
501.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
502 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
503 add r6,r5,#2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800504 veor q4,q4,q0
505 veor q5,q5,q1
506 veor q10,q10,q9
Adam Langleye9ada862015-05-11 17:20:37 -0700507 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800508 vst1.8 {q4},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700509 vorr q0,q2,q2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800510 vst1.8 {q5},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700511 vorr q1,q3,q3
Adam Langleyd9e397b2015-01-22 14:27:53 -0800512 vst1.8 {q10},[r1]!
Adam Langleye9ada862015-05-11 17:20:37 -0700513 vorr q10,q11,q11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800514 bhs .Loop3x_cbc_dec
515
516 cmn r2,#0x30
517 beq .Lcbc_done
518 nop
519
520.Lcbc_dec_tail:
Adam Langleye9ada862015-05-11 17:20:37 -0700521.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
522.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
523.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
524.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800525 vld1.32 {q8},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800526 subs r6,r6,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700527.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
528.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
529.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
530.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800531 vld1.32 {q9},[r7]!
Adam Langleyd9e397b2015-01-22 14:27:53 -0800532 bgt .Lcbc_dec_tail
533
Adam Langleye9ada862015-05-11 17:20:37 -0700534.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
535.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
536.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
537.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
538.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
539.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
540.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
541.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
542.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
543.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
544.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
545.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
546 cmn r2,#0x20
547.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
548.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
549.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
550.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
551 veor q5,q6,q7
552.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
553.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
554.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
555.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
556 veor q9,q3,q7
557.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
558.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800559 beq .Lcbc_dec_one
560 veor q5,q5,q1
561 veor q9,q9,q10
Adam Langleye9ada862015-05-11 17:20:37 -0700562 vorr q6,q11,q11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800563 vst1.8 {q5},[r1]!
564 vst1.8 {q9},[r1]!
565 b .Lcbc_done
566
567.Lcbc_dec_one:
568 veor q5,q5,q10
Adam Langleye9ada862015-05-11 17:20:37 -0700569 vorr q6,q11,q11
Adam Langleyd9e397b2015-01-22 14:27:53 -0800570 vst1.8 {q5},[r1]!
571
572.Lcbc_done:
573 vst1.8 {q6},[r4]
574.Lcbc_abort:
Adam Langleye9ada862015-05-11 17:20:37 -0700575 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
576 ldmia sp!,{r4,r5,r6,r7,r8,pc}
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400577.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
578.globl aes_hw_ctr32_encrypt_blocks
579.hidden aes_hw_ctr32_encrypt_blocks
580.type aes_hw_ctr32_encrypt_blocks,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800581.align 5
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400582aes_hw_ctr32_encrypt_blocks:
Adam Langleye9ada862015-05-11 17:20:37 -0700583 mov ip,sp
584 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
585 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
586 ldr r4, [ip] @ load remaining arg
587 ldr r5,[r3,#240]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800588
Adam Langleye9ada862015-05-11 17:20:37 -0700589 ldr r8, [r4, #12]
590 vld1.32 {q0},[r4]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800591
Adam Langleye9ada862015-05-11 17:20:37 -0700592 vld1.32 {q8,q9},[r3] @ load key schedule...
593 sub r5,r5,#4
594 mov r12,#16
595 cmp r2,#2
596 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
597 sub r5,r5,#2
598 vld1.32 {q12,q13},[r7]!
599 vld1.32 {q14,q15},[r7]!
600 vld1.32 {q7},[r7]
601 add r7,r3,#32
602 mov r6,r5
Adam Langleyd9e397b2015-01-22 14:27:53 -0800603 movlo r12,#0
604#ifndef __ARMEB__
Adam Langleye9ada862015-05-11 17:20:37 -0700605 rev r8, r8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800606#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700607 vorr q1,q0,q0
608 add r10, r8, #1
609 vorr q10,q0,q0
610 add r8, r8, #2
611 vorr q6,q0,q0
612 rev r10, r10
Adam Langleyd9e397b2015-01-22 14:27:53 -0800613 vmov.32 d3[1],r10
Adam Langleye9ada862015-05-11 17:20:37 -0700614 bls .Lctr32_tail
615 rev r12, r8
616 sub r2,r2,#3 @ bias
Adam Langleyd9e397b2015-01-22 14:27:53 -0800617 vmov.32 d21[1],r12
Adam Langleye9ada862015-05-11 17:20:37 -0700618 b .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800619
620.align 4
621.Loop3x_ctr32:
Adam Langleye9ada862015-05-11 17:20:37 -0700622.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
623.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
624.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
625.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
626.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
627.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
628 vld1.32 {q8},[r7]!
629 subs r6,r6,#2
630.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
631.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
632.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
633.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
634.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
635.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
636 vld1.32 {q9},[r7]!
637 bgt .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800638
Adam Langleye9ada862015-05-11 17:20:37 -0700639.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
640.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
641.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
642.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
643 vld1.8 {q2},[r0]!
644 vorr q0,q6,q6
645.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
646.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
647 vld1.8 {q3},[r0]!
648 vorr q1,q6,q6
649.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
650.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
651.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
652.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
653 vld1.8 {q11},[r0]!
654 mov r7,r3
655.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
656.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
657 vorr q10,q6,q6
658 add r9,r8,#1
659.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
660.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
661.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
662.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
663 veor q2,q2,q7
664 add r10,r8,#2
665.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
666.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
667 veor q3,q3,q7
668 add r8,r8,#3
669.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
670.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
671.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
672.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
673 veor q11,q11,q7
674 rev r9,r9
675.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
676.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
677 vmov.32 d1[1], r9
678 rev r10,r10
679.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
680.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
681.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
682.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
683 vmov.32 d3[1], r10
684 rev r12,r8
685.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
686.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
687 vmov.32 d21[1], r12
688 subs r2,r2,#3
689.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
690.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
691.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800692
Adam Langleye9ada862015-05-11 17:20:37 -0700693 veor q2,q2,q4
694 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
695 vst1.8 {q2},[r1]!
696 veor q3,q3,q5
697 mov r6,r5
698 vst1.8 {q3},[r1]!
699 veor q11,q11,q9
700 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
701 vst1.8 {q11},[r1]!
702 bhs .Loop3x_ctr32
Adam Langleyd9e397b2015-01-22 14:27:53 -0800703
Adam Langleye9ada862015-05-11 17:20:37 -0700704 adds r2,r2,#3
705 beq .Lctr32_done
706 cmp r2,#1
707 mov r12,#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800708 moveq r12,#0
709
710.Lctr32_tail:
Adam Langleye9ada862015-05-11 17:20:37 -0700711.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
712.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
713.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
714.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
715 vld1.32 {q8},[r7]!
716 subs r6,r6,#2
717.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
718.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
719.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
720.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
721 vld1.32 {q9},[r7]!
722 bgt .Lctr32_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800723
Adam Langleye9ada862015-05-11 17:20:37 -0700724.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
725.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
726.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
727.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
728.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
729.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
730.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
731.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
732 vld1.8 {q2},[r0],r12
733.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
734.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
735.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
736.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
737 vld1.8 {q3},[r0]
738.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
739.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
740.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
741.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
742 veor q2,q2,q7
743.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
744.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
745.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
746.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
747 veor q3,q3,q7
748.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
749.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800750
Adam Langleye9ada862015-05-11 17:20:37 -0700751 cmp r2,#1
752 veor q2,q2,q0
753 veor q3,q3,q1
754 vst1.8 {q2},[r1]!
755 beq .Lctr32_done
756 vst1.8 {q3},[r1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800757
758.Lctr32_done:
Adam Langleye9ada862015-05-11 17:20:37 -0700759 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
760 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400761.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
Adam Langleyd9e397b2015-01-22 14:27:53 -0800762#endif
David Benjamin4969cc92016-04-22 15:02:23 -0400763#endif