blob: 7bc459d9235cdb66b0986f1b6413d68198a6ea69 [file] [log] [blame]
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01001/*
2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
3 *
4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
Ard Biesheuveld018dc92016-10-11 19:15:17 +010012#include <asm/assembler.h>
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010013
14 .text
15 .arch armv8-a+crypto
16
17 /*
18 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
19 * u32 *macp, u8 const rk[], u32 rounds);
20 */
21ENTRY(ce_aes_ccm_auth_data)
22 ldr w8, [x3] /* leftover from prev round? */
Ard Biesheuveld018dc92016-10-11 19:15:17 +010023 ld1 {v0.16b}, [x0] /* load mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010024 cbz w8, 1f
25 sub w8, w8, #16
26 eor v1.16b, v1.16b, v1.16b
270: ldrb w7, [x1], #1 /* get 1 byte of input */
28 subs w2, w2, #1
29 add w8, w8, #1
30 ins v1.b[0], w7
31 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
32 beq 8f /* out of input? */
33 cbnz w8, 0b
34 eor v0.16b, v0.16b, v1.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100351: ld1 {v3.16b}, [x4] /* load first round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010036 prfm pldl1strm, [x1]
37 cmp w5, #12 /* which key size? */
38 add x6, x4, #16
39 sub w7, w5, #2 /* modified # of rounds */
40 bmi 2f
41 bne 5f
42 mov v5.16b, v3.16b
43 b 4f
442: mov v4.16b, v3.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +010045 ld1 {v5.16b}, [x6], #16 /* load 2nd round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100463: aese v0.16b, v4.16b
47 aesmc v0.16b, v0.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100484: ld1 {v3.16b}, [x6], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010049 aese v0.16b, v5.16b
50 aesmc v0.16b, v0.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100515: ld1 {v4.16b}, [x6], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010052 subs w7, w7, #3
53 aese v0.16b, v3.16b
54 aesmc v0.16b, v0.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +010055 ld1 {v5.16b}, [x6], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010056 bpl 3b
57 aese v0.16b, v4.16b
58 subs w2, w2, #16 /* last data? */
59 eor v0.16b, v0.16b, v5.16b /* final round */
60 bmi 6f
61 ld1 {v1.16b}, [x1], #16 /* load next input block */
62 eor v0.16b, v0.16b, v1.16b /* xor with mac */
63 bne 1b
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100646: st1 {v0.16b}, [x0] /* store mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010065 beq 10f
66 adds w2, w2, #16
67 beq 10f
68 mov w8, w2
697: ldrb w7, [x1], #1
70 umov w6, v0.b[0]
71 eor w6, w6, w7
72 strb w6, [x0], #1
73 subs w2, w2, #1
74 beq 10f
75 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
76 b 7b
Ard Biesheuvel9a0eec82019-01-24 17:33:45 +0100778: cbz w8, 91f
78 mov w7, w8
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010079 add w8, w8, #16
809: ext v1.16b, v1.16b, v1.16b, #1
81 adds w7, w7, #1
82 bne 9b
Ard Biesheuvel9a0eec82019-01-24 17:33:45 +01008391: eor v0.16b, v0.16b, v1.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +010084 st1 {v0.16b}, [x0]
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01008510: str w8, [x3]
86 ret
87ENDPROC(ce_aes_ccm_auth_data)
88
89 /*
90 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
91 * u32 rounds);
92 */
93ENTRY(ce_aes_ccm_final)
Ard Biesheuveld018dc92016-10-11 19:15:17 +010094 ld1 {v3.16b}, [x2], #16 /* load first round key */
95 ld1 {v0.16b}, [x0] /* load mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010096 cmp w3, #12 /* which key size? */
97 sub w3, w3, #2 /* modified # of rounds */
Ard Biesheuveld018dc92016-10-11 19:15:17 +010098 ld1 {v1.16b}, [x1] /* load 1st ctriv */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010099 bmi 0f
100 bne 3f
101 mov v5.16b, v3.16b
102 b 2f
1030: mov v4.16b, v3.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +01001041: ld1 {v5.16b}, [x2], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100105 aese v0.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100106 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000107 aese v1.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100108 aesmc v1.16b, v1.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +01001092: ld1 {v3.16b}, [x2], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100110 aese v0.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100111 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000112 aese v1.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100113 aesmc v1.16b, v1.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +01001143: ld1 {v4.16b}, [x2], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100115 subs w3, w3, #3
116 aese v0.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100117 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000118 aese v1.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100119 aesmc v1.16b, v1.16b
120 bpl 1b
121 aese v0.16b, v4.16b
122 aese v1.16b, v4.16b
123 /* final round key cancels out */
124 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100125 st1 {v0.16b}, [x0] /* store result */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100126 ret
127ENDPROC(ce_aes_ccm_final)
128
129 .macro aes_ccm_do_crypt,enc
130 ldr x8, [x6, #8] /* load lower ctr */
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100131 ld1 {v0.16b}, [x5] /* load mac */
132CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01001330: /* outer loop */
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100134 ld1 {v1.8b}, [x6] /* load upper ctr */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100135 prfm pldl1strm, [x1]
136 add x8, x8, #1
137 rev x9, x8
138 cmp w4, #12 /* which key size? */
139 sub w7, w4, #2 /* get modified # of rounds */
140 ins v1.d[1], x9 /* no carry in lower ctr */
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100141 ld1 {v3.16b}, [x3] /* load first round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100142 add x10, x3, #16
143 bmi 1f
144 bne 4f
145 mov v5.16b, v3.16b
146 b 3f
1471: mov v4.16b, v3.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100148 ld1 {v5.16b}, [x10], #16 /* load 2nd round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01001492: /* inner loop: 3 rounds, 2x interleaved */
150 aese v0.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100151 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000152 aese v1.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100153 aesmc v1.16b, v1.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +01001543: ld1 {v3.16b}, [x10], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100155 aese v0.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100156 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000157 aese v1.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100158 aesmc v1.16b, v1.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +01001594: ld1 {v4.16b}, [x10], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100160 subs w7, w7, #3
161 aese v0.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100162 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd2015-03-17 18:05:13 +0000163 aese v1.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100164 aesmc v1.16b, v1.16b
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100165 ld1 {v5.16b}, [x10], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100166 bpl 2b
167 aese v0.16b, v4.16b
168 aese v1.16b, v4.16b
169 subs w2, w2, #16
170 bmi 6f /* partial block? */
171 ld1 {v2.16b}, [x1], #16 /* load next input block */
172 .if \enc == 1
173 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
174 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
175 .else
176 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
177 eor v1.16b, v2.16b, v5.16b /* final round enc */
178 .endif
179 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
180 st1 {v1.16b}, [x0], #16 /* write output block */
181 bne 0b
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100182CPU_LE( rev x8, x8 )
183 st1 {v0.16b}, [x5] /* store mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100184 str x8, [x6, #8] /* store lsb end of ctr (BE) */
1855: ret
186
1876: eor v0.16b, v0.16b, v5.16b /* final round mac */
188 eor v1.16b, v1.16b, v5.16b /* final round enc */
Ard Biesheuveld018dc92016-10-11 19:15:17 +0100189 st1 {v0.16b}, [x5] /* store mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100190 add w2, w2, #16 /* process partial tail block */
1917: ldrb w9, [x1], #1 /* get 1 byte of input */
192 umov w6, v1.b[0] /* get top crypted ctr byte */
193 umov w7, v0.b[0] /* get top mac byte */
194 .if \enc == 1
195 eor w7, w7, w9
196 eor w9, w9, w6
197 .else
198 eor w9, w9, w6
199 eor w7, w7, w9
200 .endif
201 strb w9, [x0], #1 /* store out byte */
202 strb w7, [x5], #1 /* store mac byte */
203 subs w2, w2, #1
204 beq 5b
205 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
206 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
207 b 7b
208 .endm
209
210 /*
211 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
212 * u8 const rk[], u32 rounds, u8 mac[],
213 * u8 ctr[]);
214 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
215 * u8 const rk[], u32 rounds, u8 mac[],
216 * u8 ctr[]);
217 */
218ENTRY(ce_aes_ccm_encrypt)
219 aes_ccm_do_crypt 1
220ENDPROC(ce_aes_ccm_encrypt)
221
222ENTRY(ce_aes_ccm_decrypt)
223 aes_ccm_do_crypt 0
224ENDPROC(ce_aes_ccm_decrypt)