blob: d44c88c2a51fd320e97d02298dd7ece6d34c905c [file] [log] [blame]
Robert Sloan8ff03552017-06-14 12:40:58 -07001#include <openssl/arm_arch.h>
2
3#if __ARM_MAX_ARCH__>=7
4.text
5
6
7.code 32
8#undef __thumb2__
9.align 5
10Lrcon:
11.long 0x01,0x01,0x01,0x01
12.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
13.long 0x1b,0x1b,0x1b,0x1b
14
15.globl _aes_hw_set_encrypt_key
16.private_extern _aes_hw_set_encrypt_key
17#ifdef __thumb2__
18.thumb_func _aes_hw_set_encrypt_key
19#endif
20.align 5
21_aes_hw_set_encrypt_key:
22Lenc_key:
23 mov r3,#-1
24 cmp r0,#0
25 beq Lenc_key_abort
26 cmp r2,#0
27 beq Lenc_key_abort
28 mov r3,#-2
29 cmp r1,#128
30 blt Lenc_key_abort
31 cmp r1,#256
32 bgt Lenc_key_abort
33 tst r1,#0x3f
34 bne Lenc_key_abort
35
36 adr r3,Lrcon
37 cmp r1,#192
38
39 veor q0,q0,q0
40 vld1.8 {q3},[r0]!
41 mov r1,#8 @ reuse r1
42 vld1.32 {q1,q2},[r3]!
43
44 blt Loop128
45 beq L192
46 b L256
47
48.align 4
49Loop128:
50 vtbl.8 d20,{q3},d4
51 vtbl.8 d21,{q3},d5
52 vext.8 q9,q0,q3,#12
53 vst1.32 {q3},[r2]!
54.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
55 subs r1,r1,#1
56
57 veor q3,q3,q9
58 vext.8 q9,q0,q9,#12
59 veor q3,q3,q9
60 vext.8 q9,q0,q9,#12
61 veor q10,q10,q1
62 veor q3,q3,q9
63 vshl.u8 q1,q1,#1
64 veor q3,q3,q10
65 bne Loop128
66
67 vld1.32 {q1},[r3]
68
69 vtbl.8 d20,{q3},d4
70 vtbl.8 d21,{q3},d5
71 vext.8 q9,q0,q3,#12
72 vst1.32 {q3},[r2]!
73.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
74
75 veor q3,q3,q9
76 vext.8 q9,q0,q9,#12
77 veor q3,q3,q9
78 vext.8 q9,q0,q9,#12
79 veor q10,q10,q1
80 veor q3,q3,q9
81 vshl.u8 q1,q1,#1
82 veor q3,q3,q10
83
84 vtbl.8 d20,{q3},d4
85 vtbl.8 d21,{q3},d5
86 vext.8 q9,q0,q3,#12
87 vst1.32 {q3},[r2]!
88.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
89
90 veor q3,q3,q9
91 vext.8 q9,q0,q9,#12
92 veor q3,q3,q9
93 vext.8 q9,q0,q9,#12
94 veor q10,q10,q1
95 veor q3,q3,q9
96 veor q3,q3,q10
97 vst1.32 {q3},[r2]
98 add r2,r2,#0x50
99
100 mov r12,#10
101 b Ldone
102
103.align 4
104L192:
105 vld1.8 {d16},[r0]!
106 vmov.i8 q10,#8 @ borrow q10
107 vst1.32 {q3},[r2]!
108 vsub.i8 q2,q2,q10 @ adjust the mask
109
110Loop192:
111 vtbl.8 d20,{q8},d4
112 vtbl.8 d21,{q8},d5
113 vext.8 q9,q0,q3,#12
114 vst1.32 {d16},[r2]!
115.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
116 subs r1,r1,#1
117
118 veor q3,q3,q9
119 vext.8 q9,q0,q9,#12
120 veor q3,q3,q9
121 vext.8 q9,q0,q9,#12
122 veor q3,q3,q9
123
124 vdup.32 q9,d7[1]
125 veor q9,q9,q8
126 veor q10,q10,q1
127 vext.8 q8,q0,q8,#12
128 vshl.u8 q1,q1,#1
129 veor q8,q8,q9
130 veor q3,q3,q10
131 veor q8,q8,q10
132 vst1.32 {q3},[r2]!
133 bne Loop192
134
135 mov r12,#12
136 add r2,r2,#0x20
137 b Ldone
138
139.align 4
140L256:
141 vld1.8 {q8},[r0]
142 mov r1,#7
143 mov r12,#14
144 vst1.32 {q3},[r2]!
145
146Loop256:
147 vtbl.8 d20,{q8},d4
148 vtbl.8 d21,{q8},d5
149 vext.8 q9,q0,q3,#12
150 vst1.32 {q8},[r2]!
151.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
152 subs r1,r1,#1
153
154 veor q3,q3,q9
155 vext.8 q9,q0,q9,#12
156 veor q3,q3,q9
157 vext.8 q9,q0,q9,#12
158 veor q10,q10,q1
159 veor q3,q3,q9
160 vshl.u8 q1,q1,#1
161 veor q3,q3,q10
162 vst1.32 {q3},[r2]!
163 beq Ldone
164
165 vdup.32 q10,d7[1]
166 vext.8 q9,q0,q8,#12
167.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
168
169 veor q8,q8,q9
170 vext.8 q9,q0,q9,#12
171 veor q8,q8,q9
172 vext.8 q9,q0,q9,#12
173 veor q8,q8,q9
174
175 veor q8,q8,q10
176 b Loop256
177
178Ldone:
179 str r12,[r2]
180 mov r3,#0
181
182Lenc_key_abort:
183 mov r0,r3 @ return value
184
185 bx lr
186
187
188.globl _aes_hw_set_decrypt_key
189.private_extern _aes_hw_set_decrypt_key
190#ifdef __thumb2__
191.thumb_func _aes_hw_set_decrypt_key
192#endif
193.align 5
194_aes_hw_set_decrypt_key:
195 stmdb sp!,{r4,lr}
196 bl Lenc_key
197
198 cmp r0,#0
199 bne Ldec_key_abort
200
201 sub r2,r2,#240 @ restore original r2
202 mov r4,#-16
203 add r0,r2,r12,lsl#4 @ end of key schedule
204
205 vld1.32 {q0},[r2]
206 vld1.32 {q1},[r0]
207 vst1.32 {q0},[r0],r4
208 vst1.32 {q1},[r2]!
209
210Loop_imc:
211 vld1.32 {q0},[r2]
212 vld1.32 {q1},[r0]
213.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
214.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
215 vst1.32 {q0},[r0],r4
216 vst1.32 {q1},[r2]!
217 cmp r0,r2
218 bhi Loop_imc
219
220 vld1.32 {q0},[r2]
221.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
222 vst1.32 {q0},[r0]
223
224 eor r0,r0,r0 @ return value
225Ldec_key_abort:
226 ldmia sp!,{r4,pc}
227
228.globl _aes_hw_encrypt
229.private_extern _aes_hw_encrypt
230#ifdef __thumb2__
231.thumb_func _aes_hw_encrypt
232#endif
233.align 5
234_aes_hw_encrypt:
235 ldr r3,[r2,#240]
236 vld1.32 {q0},[r2]!
237 vld1.8 {q2},[r0]
238 sub r3,r3,#2
239 vld1.32 {q1},[r2]!
240
241Loop_enc:
242.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
243.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
244 vld1.32 {q0},[r2]!
245 subs r3,r3,#2
246.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
247.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
248 vld1.32 {q1},[r2]!
249 bgt Loop_enc
250
251.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
252.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
253 vld1.32 {q0},[r2]
254.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
255 veor q2,q2,q0
256
257 vst1.8 {q2},[r1]
258 bx lr
259
260.globl _aes_hw_decrypt
261.private_extern _aes_hw_decrypt
262#ifdef __thumb2__
263.thumb_func _aes_hw_decrypt
264#endif
265.align 5
266_aes_hw_decrypt:
267 ldr r3,[r2,#240]
268 vld1.32 {q0},[r2]!
269 vld1.8 {q2},[r0]
270 sub r3,r3,#2
271 vld1.32 {q1},[r2]!
272
273Loop_dec:
274.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
275.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
276 vld1.32 {q0},[r2]!
277 subs r3,r3,#2
278.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
279.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
280 vld1.32 {q1},[r2]!
281 bgt Loop_dec
282
283.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
284.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
285 vld1.32 {q0},[r2]
286.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
287 veor q2,q2,q0
288
289 vst1.8 {q2},[r1]
290 bx lr
291
292.globl _aes_hw_cbc_encrypt
293.private_extern _aes_hw_cbc_encrypt
294#ifdef __thumb2__
295.thumb_func _aes_hw_cbc_encrypt
296#endif
297.align 5
298_aes_hw_cbc_encrypt:
299 mov ip,sp
300 stmdb sp!,{r4,r5,r6,r7,r8,lr}
301 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
302 ldmia ip,{r4,r5} @ load remaining args
303 subs r2,r2,#16
304 mov r8,#16
305 blo Lcbc_abort
306 moveq r8,#0
307
308 cmp r5,#0 @ en- or decrypting?
309 ldr r5,[r3,#240]
310 and r2,r2,#-16
311 vld1.8 {q6},[r4]
312 vld1.8 {q0},[r0],r8
313
314 vld1.32 {q8,q9},[r3] @ load key schedule...
315 sub r5,r5,#6
316 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
317 sub r5,r5,#2
318 vld1.32 {q10,q11},[r7]!
319 vld1.32 {q12,q13},[r7]!
320 vld1.32 {q14,q15},[r7]!
321 vld1.32 {q7},[r7]
322
323 add r7,r3,#32
324 mov r6,r5
325 beq Lcbc_dec
326
327 cmp r5,#2
328 veor q0,q0,q6
329 veor q5,q8,q7
330 beq Lcbc_enc128
331
332 vld1.32 {q2,q3},[r7]
333 add r7,r3,#16
334 add r6,r3,#16*4
335 add r12,r3,#16*5
336.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
337.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
338 add r14,r3,#16*6
339 add r3,r3,#16*7
340 b Lenter_cbc_enc
341
342.align 4
343Loop_cbc_enc:
344.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
345.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
346 vst1.8 {q6},[r1]!
347Lenter_cbc_enc:
348.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
349.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
350.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
351.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
352 vld1.32 {q8},[r6]
353 cmp r5,#4
354.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
355.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
356 vld1.32 {q9},[r12]
357 beq Lcbc_enc192
358
359.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
360.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
361 vld1.32 {q8},[r14]
362.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
363.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
364 vld1.32 {q9},[r3]
365 nop
366
367Lcbc_enc192:
368.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
369.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
370 subs r2,r2,#16
371.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
372.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
373 moveq r8,#0
374.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
375.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
376.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
377.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
378 vld1.8 {q8},[r0],r8
379.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
380.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
381 veor q8,q8,q5
382.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
383.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
384 vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
385.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
386.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
387.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
388 veor q6,q0,q7
389 bhs Loop_cbc_enc
390
391 vst1.8 {q6},[r1]!
392 b Lcbc_done
393
394.align 5
395Lcbc_enc128:
396 vld1.32 {q2,q3},[r7]
397.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
398.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
399 b Lenter_cbc_enc128
400Loop_cbc_enc128:
401.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
402.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
403 vst1.8 {q6},[r1]!
404Lenter_cbc_enc128:
405.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
406.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
407 subs r2,r2,#16
408.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
409.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
410 moveq r8,#0
411.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
412.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
413.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
414.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
415.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
416.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
417 vld1.8 {q8},[r0],r8
418.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
419.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
420.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
421.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
422.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
423.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
424 veor q8,q8,q5
425.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
426 veor q6,q0,q7
427 bhs Loop_cbc_enc128
428
429 vst1.8 {q6},[r1]!
430 b Lcbc_done
431.align 5
432Lcbc_dec:
433 vld1.8 {q10},[r0]!
434 subs r2,r2,#32 @ bias
435 add r6,r5,#2
436 vorr q3,q0,q0
437 vorr q1,q0,q0
438 vorr q11,q10,q10
439 blo Lcbc_dec_tail
440
441 vorr q1,q10,q10
442 vld1.8 {q10},[r0]!
443 vorr q2,q0,q0
444 vorr q3,q1,q1
445 vorr q11,q10,q10
446
447Loop3x_cbc_dec:
448.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
449.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
450.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
451.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
452.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
453.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
454 vld1.32 {q8},[r7]!
455 subs r6,r6,#2
456.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
457.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
458.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
459.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
460.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
461.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
462 vld1.32 {q9},[r7]!
463 bgt Loop3x_cbc_dec
464
465.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
466.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
467.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
468.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
469.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
470.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
471 veor q4,q6,q7
472 subs r2,r2,#0x30
473 veor q5,q2,q7
474 movlo r6,r2 @ r6, r6, is zero at this point
475.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
476.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
477.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
478.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
479.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
480.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
481 veor q9,q3,q7
482 add r0,r0,r6 @ r0 is adjusted in such way that
483 @ at exit from the loop q1-q10
484 @ are loaded with last "words"
485 vorr q6,q11,q11
486 mov r7,r3
487.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
488.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
489.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
490.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
491.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
492.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
493 vld1.8 {q2},[r0]!
494.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
495.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
496.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
497.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
498.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
499.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
500 vld1.8 {q3},[r0]!
501.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
502.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
503.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
504.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
505.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
506.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
507 vld1.8 {q11},[r0]!
508.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
509.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
510.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
511 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
512 add r6,r5,#2
513 veor q4,q4,q0
514 veor q5,q5,q1
515 veor q10,q10,q9
516 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
517 vst1.8 {q4},[r1]!
518 vorr q0,q2,q2
519 vst1.8 {q5},[r1]!
520 vorr q1,q3,q3
521 vst1.8 {q10},[r1]!
522 vorr q10,q11,q11
523 bhs Loop3x_cbc_dec
524
525 cmn r2,#0x30
526 beq Lcbc_done
527 nop
528
529Lcbc_dec_tail:
530.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
531.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
532.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
533.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
534 vld1.32 {q8},[r7]!
535 subs r6,r6,#2
536.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
537.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
538.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
539.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
540 vld1.32 {q9},[r7]!
541 bgt Lcbc_dec_tail
542
543.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
544.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
545.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
546.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
547.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
548.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
549.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
550.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
551.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
552.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
553.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
554.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
555 cmn r2,#0x20
556.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
557.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
558.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
559.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
560 veor q5,q6,q7
561.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
562.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
563.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
564.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
565 veor q9,q3,q7
566.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
567.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
568 beq Lcbc_dec_one
569 veor q5,q5,q1
570 veor q9,q9,q10
571 vorr q6,q11,q11
572 vst1.8 {q5},[r1]!
573 vst1.8 {q9},[r1]!
574 b Lcbc_done
575
576Lcbc_dec_one:
577 veor q5,q5,q10
578 vorr q6,q11,q11
579 vst1.8 {q5},[r1]!
580
581Lcbc_done:
582 vst1.8 {q6},[r4]
583Lcbc_abort:
584 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
585 ldmia sp!,{r4,r5,r6,r7,r8,pc}
586
587.globl _aes_hw_ctr32_encrypt_blocks
588.private_extern _aes_hw_ctr32_encrypt_blocks
589#ifdef __thumb2__
590.thumb_func _aes_hw_ctr32_encrypt_blocks
591#endif
592.align 5
593_aes_hw_ctr32_encrypt_blocks:
594 mov ip,sp
595 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
596 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
597 ldr r4, [ip] @ load remaining arg
598 ldr r5,[r3,#240]
599
600 ldr r8, [r4, #12]
601 vld1.32 {q0},[r4]
602
603 vld1.32 {q8,q9},[r3] @ load key schedule...
604 sub r5,r5,#4
605 mov r12,#16
606 cmp r2,#2
607 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
608 sub r5,r5,#2
609 vld1.32 {q12,q13},[r7]!
610 vld1.32 {q14,q15},[r7]!
611 vld1.32 {q7},[r7]
612 add r7,r3,#32
613 mov r6,r5
614 movlo r12,#0
615#ifndef __ARMEB__
616 rev r8, r8
617#endif
618 vorr q1,q0,q0
619 add r10, r8, #1
620 vorr q10,q0,q0
621 add r8, r8, #2
622 vorr q6,q0,q0
623 rev r10, r10
624 vmov.32 d3[1],r10
625 bls Lctr32_tail
626 rev r12, r8
627 sub r2,r2,#3 @ bias
628 vmov.32 d21[1],r12
629 b Loop3x_ctr32
630
631.align 4
632Loop3x_ctr32:
633.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
634.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
635.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
636.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
637.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
638.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
639 vld1.32 {q8},[r7]!
640 subs r6,r6,#2
641.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
642.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
643.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
644.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
645.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
646.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
647 vld1.32 {q9},[r7]!
648 bgt Loop3x_ctr32
649
650.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
651.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
652.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
653.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
654 vld1.8 {q2},[r0]!
655 vorr q0,q6,q6
656.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
657.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
658 vld1.8 {q3},[r0]!
659 vorr q1,q6,q6
660.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
661.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
662.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
663.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
664 vld1.8 {q11},[r0]!
665 mov r7,r3
666.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
667.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
668 vorr q10,q6,q6
669 add r9,r8,#1
670.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
671.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
672.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
673.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
674 veor q2,q2,q7
675 add r10,r8,#2
676.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
677.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
678 veor q3,q3,q7
679 add r8,r8,#3
680.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
681.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
682.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
683.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
684 veor q11,q11,q7
685 rev r9,r9
686.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
687.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
688 vmov.32 d1[1], r9
689 rev r10,r10
690.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
691.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
692.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
693.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
694 vmov.32 d3[1], r10
695 rev r12,r8
696.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
697.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
698 vmov.32 d21[1], r12
699 subs r2,r2,#3
700.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
701.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
702.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
703
704 veor q2,q2,q4
705 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
706 vst1.8 {q2},[r1]!
707 veor q3,q3,q5
708 mov r6,r5
709 vst1.8 {q3},[r1]!
710 veor q11,q11,q9
711 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
712 vst1.8 {q11},[r1]!
713 bhs Loop3x_ctr32
714
715 adds r2,r2,#3
716 beq Lctr32_done
717 cmp r2,#1
718 mov r12,#16
719 moveq r12,#0
720
721Lctr32_tail:
722.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
723.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
724.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
725.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
726 vld1.32 {q8},[r7]!
727 subs r6,r6,#2
728.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
729.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
730.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
731.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
732 vld1.32 {q9},[r7]!
733 bgt Lctr32_tail
734
735.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
736.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
737.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
738.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
739.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
740.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
741.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
742.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
743 vld1.8 {q2},[r0],r12
744.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
745.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
746.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
747.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
748 vld1.8 {q3},[r0]
749.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
750.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
751.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
752.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
753 veor q2,q2,q7
754.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
755.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
756.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
757.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
758 veor q3,q3,q7
759.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
760.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
761
762 cmp r2,#1
763 veor q2,q2,q0
764 veor q3,q3,q1
765 vst1.8 {q2},[r1]!
766 beq Lctr32_done
767 vst1.8 {q3},[r1]
768
769Lctr32_done:
770 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
771 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
772
773#endif