blob: a2a7fbcacc141ed595f31026510cecd459a733f0 [file] [log] [blame]
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01001/*
2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
3 *
4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12
13 .text
14 .arch armv8-a+crypto
15
16 /*
17 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
18 * u32 *macp, u8 const rk[], u32 rounds);
19 */
20ENTRY(ce_aes_ccm_auth_data)
21 ldr w8, [x3] /* leftover from prev round? */
22 ld1 {v0.2d}, [x0] /* load mac */
23 cbz w8, 1f
24 sub w8, w8, #16
25 eor v1.16b, v1.16b, v1.16b
260: ldrb w7, [x1], #1 /* get 1 byte of input */
27 subs w2, w2, #1
28 add w8, w8, #1
29 ins v1.b[0], w7
30 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
31 beq 8f /* out of input? */
32 cbnz w8, 0b
33 eor v0.16b, v0.16b, v1.16b
341: ld1 {v3.2d}, [x4] /* load first round key */
35 prfm pldl1strm, [x1]
36 cmp w5, #12 /* which key size? */
37 add x6, x4, #16
38 sub w7, w5, #2 /* modified # of rounds */
39 bmi 2f
40 bne 5f
41 mov v5.16b, v3.16b
42 b 4f
432: mov v4.16b, v3.16b
44 ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
453: aese v0.16b, v4.16b
46 aesmc v0.16b, v0.16b
474: ld1 {v3.2d}, [x6], #16 /* load next round key */
48 aese v0.16b, v5.16b
49 aesmc v0.16b, v0.16b
505: ld1 {v4.2d}, [x6], #16 /* load next round key */
51 subs w7, w7, #3
52 aese v0.16b, v3.16b
53 aesmc v0.16b, v0.16b
54 ld1 {v5.2d}, [x6], #16 /* load next round key */
55 bpl 3b
56 aese v0.16b, v4.16b
57 subs w2, w2, #16 /* last data? */
58 eor v0.16b, v0.16b, v5.16b /* final round */
59 bmi 6f
60 ld1 {v1.16b}, [x1], #16 /* load next input block */
61 eor v0.16b, v0.16b, v1.16b /* xor with mac */
62 bne 1b
636: st1 {v0.2d}, [x0] /* store mac */
64 beq 10f
65 adds w2, w2, #16
66 beq 10f
67 mov w8, w2
687: ldrb w7, [x1], #1
69 umov w6, v0.b[0]
70 eor w6, w6, w7
71 strb w6, [x0], #1
72 subs w2, w2, #1
73 beq 10f
74 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
75 b 7b
768: mov w7, w8
77 add w8, w8, #16
789: ext v1.16b, v1.16b, v1.16b, #1
79 adds w7, w7, #1
80 bne 9b
81 eor v0.16b, v0.16b, v1.16b
82 st1 {v0.2d}, [x0]
8310: str w8, [x3]
84 ret
85ENDPROC(ce_aes_ccm_auth_data)
86
87 /*
88 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
89 * u32 rounds);
90 */
91ENTRY(ce_aes_ccm_final)
92 ld1 {v3.2d}, [x2], #16 /* load first round key */
93 ld1 {v0.2d}, [x0] /* load mac */
94 cmp w3, #12 /* which key size? */
95 sub w3, w3, #2 /* modified # of rounds */
96 ld1 {v1.2d}, [x1] /* load 1st ctriv */
97 bmi 0f
98 bne 3f
99 mov v5.16b, v3.16b
100 b 2f
1010: mov v4.16b, v3.16b
1021: ld1 {v5.2d}, [x2], #16 /* load next round key */
103 aese v0.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100104 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000105 aese v1.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100106 aesmc v1.16b, v1.16b
1072: ld1 {v3.2d}, [x2], #16 /* load next round key */
108 aese v0.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100109 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000110 aese v1.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100111 aesmc v1.16b, v1.16b
1123: ld1 {v4.2d}, [x2], #16 /* load next round key */
113 subs w3, w3, #3
114 aese v0.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100115 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000116 aese v1.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100117 aesmc v1.16b, v1.16b
118 bpl 1b
119 aese v0.16b, v4.16b
120 aese v1.16b, v4.16b
121 /* final round key cancels out */
122 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
123 st1 {v0.2d}, [x0] /* store result */
124 ret
125ENDPROC(ce_aes_ccm_final)
126
127 .macro aes_ccm_do_crypt,enc
128 ldr x8, [x6, #8] /* load lower ctr */
129 ld1 {v0.2d}, [x5] /* load mac */
130 rev x8, x8 /* keep swabbed ctr in reg */
1310: /* outer loop */
132 ld1 {v1.1d}, [x6] /* load upper ctr */
133 prfm pldl1strm, [x1]
134 add x8, x8, #1
135 rev x9, x8
136 cmp w4, #12 /* which key size? */
137 sub w7, w4, #2 /* get modified # of rounds */
138 ins v1.d[1], x9 /* no carry in lower ctr */
139 ld1 {v3.2d}, [x3] /* load first round key */
140 add x10, x3, #16
141 bmi 1f
142 bne 4f
143 mov v5.16b, v3.16b
144 b 3f
1451: mov v4.16b, v3.16b
146 ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
1472: /* inner loop: 3 rounds, 2x interleaved */
148 aese v0.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100149 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000150 aese v1.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100151 aesmc v1.16b, v1.16b
1523: ld1 {v3.2d}, [x10], #16 /* load next round key */
153 aese v0.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100154 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000155 aese v1.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100156 aesmc v1.16b, v1.16b
1574: ld1 {v4.2d}, [x10], #16 /* load next round key */
158 subs w7, w7, #3
159 aese v0.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100160 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000161 aese v1.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100162 aesmc v1.16b, v1.16b
163 ld1 {v5.2d}, [x10], #16 /* load next round key */
164 bpl 2b
165 aese v0.16b, v4.16b
166 aese v1.16b, v4.16b
167 subs w2, w2, #16
168 bmi 6f /* partial block? */
169 ld1 {v2.16b}, [x1], #16 /* load next input block */
170 .if \enc == 1
171 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
172 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
173 .else
174 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
175 eor v1.16b, v2.16b, v5.16b /* final round enc */
176 .endif
177 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
178 st1 {v1.16b}, [x0], #16 /* write output block */
179 bne 0b
180 rev x8, x8
181 st1 {v0.2d}, [x5] /* store mac */
182 str x8, [x6, #8] /* store lsb end of ctr (BE) */
1835: ret
184
1856: eor v0.16b, v0.16b, v5.16b /* final round mac */
186 eor v1.16b, v1.16b, v5.16b /* final round enc */
187 st1 {v0.2d}, [x5] /* store mac */
188 add w2, w2, #16 /* process partial tail block */
1897: ldrb w9, [x1], #1 /* get 1 byte of input */
190 umov w6, v1.b[0] /* get top crypted ctr byte */
191 umov w7, v0.b[0] /* get top mac byte */
192 .if \enc == 1
193 eor w7, w7, w9
194 eor w9, w9, w6
195 .else
196 eor w9, w9, w6
197 eor w7, w7, w9
198 .endif
199 strb w9, [x0], #1 /* store out byte */
200 strb w7, [x5], #1 /* store mac byte */
201 subs w2, w2, #1
202 beq 5b
203 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
204 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
205 b 7b
206 .endm
207
208 /*
209 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
210 * u8 const rk[], u32 rounds, u8 mac[],
211 * u8 ctr[]);
212 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
213 * u8 const rk[], u32 rounds, u8 mac[],
214 * u8 ctr[]);
215 */
216ENTRY(ce_aes_ccm_encrypt)
217 aes_ccm_do_crypt 1
218ENDPROC(ce_aes_ccm_encrypt)
219
220ENTRY(ce_aes_ccm_decrypt)
221 aes_ccm_do_crypt 0
222ENDPROC(ce_aes_ccm_decrypt)