blob: f2d67c095e596edc67cacae01e2d9329125cb629 [file] [log] [blame]
Ard Biesheuvelb7912e02017-01-11 16:41:53 +00001/*
2 * Scalar AES core transform
3 *
4 * Copyright (C) 2017 Linaro Ltd.
5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
Eric Biggers910bbfc2018-10-17 21:37:59 -070013#include <asm/assembler.h>
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010014#include <asm/cache.h>
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000015
16 .text
17 .align 5
18
19 rk .req r0
20 rounds .req r1
21 in .req r2
22 out .req r3
Ard Biesheuvelf1b4d6b2017-01-13 08:33:26 +000023 ttab .req ip
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000024
25 t0 .req lr
26 t1 .req r2
27 t2 .req r3
28
29 .macro __select, out, in, idx
30 .if __LINUX_ARM_ARCH__ < 7
31 and \out, \in, #0xff << (8 * \idx)
32 .else
33 ubfx \out, \in, #(8 * \idx), #8
34 .endif
35 .endm
36
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010037 .macro __load, out, in, idx, sz, op
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000038 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010039 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000040 .else
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010041 ldr\op \out, [ttab, \in, lsl #\sz]
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000042 .endif
43 .endm
44
Eric Biggers910bbfc2018-10-17 21:37:59 -070045 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000046 __select \out0, \in0, 0
47 __select t0, \in1, 1
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010048 __load \out0, \out0, 0, \sz, \op
49 __load t0, t0, 1, \sz, \op
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000050
51 .if \enc
52 __select \out1, \in1, 0
53 __select t1, \in2, 1
54 .else
55 __select \out1, \in3, 0
56 __select t1, \in0, 1
57 .endif
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010058 __load \out1, \out1, 0, \sz, \op
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000059 __select t2, \in2, 2
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010060 __load t1, t1, 1, \sz, \op
61 __load t2, t2, 2, \sz, \op
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000062
63 eor \out0, \out0, t0, ror #24
64
65 __select t0, \in3, 3
66 .if \enc
67 __select \t3, \in3, 2
68 __select \t4, \in0, 3
69 .else
70 __select \t3, \in1, 2
71 __select \t4, \in2, 3
72 .endif
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010073 __load \t3, \t3, 2, \sz, \op
74 __load t0, t0, 3, \sz, \op
75 __load \t4, \t4, 3, \sz, \op
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000076
Eric Biggers910bbfc2018-10-17 21:37:59 -070077 .ifnb \oldcpsr
78 /*
79 * This is the final round and we're done with all data-dependent table
80 * lookups, so we can safely re-enable interrupts.
81 */
82 restore_irqs \oldcpsr
83 .endif
84
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000085 eor \out1, \out1, t1, ror #24
86 eor \out0, \out0, t2, ror #16
87 ldm rk!, {t1, t2}
88 eor \out1, \out1, \t3, ror #16
89 eor \out0, \out0, t0, ror #8
90 eor \out1, \out1, \t4, ror #8
91 eor \out0, \out0, t1
92 eor \out1, \out1, t2
93 .endm
94
Eric Biggers910bbfc2018-10-17 21:37:59 -070095 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +010096 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
Eric Biggers910bbfc2018-10-17 21:37:59 -070097 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
Ard Biesheuvelb7912e02017-01-11 16:41:53 +000098 .endm
99
Eric Biggers910bbfc2018-10-17 21:37:59 -0700100 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +0100101 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
Eric Biggers910bbfc2018-10-17 21:37:59 -0700102 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
Ard Biesheuvelb7912e02017-01-11 16:41:53 +0000103 .endm
104
105 .macro __rev, out, in
106 .if __LINUX_ARM_ARCH__ < 6
107 lsl t0, \in, #24
108 and t1, \in, #0xff00
109 and t2, \in, #0xff0000
110 orr \out, t0, \in, lsr #24
111 orr \out, \out, t1, lsl #8
112 orr \out, \out, t2, lsr #8
113 .else
114 rev \out, \in
115 .endif
116 .endm
117
118 .macro __adrl, out, sym, c
119 .if __LINUX_ARM_ARCH__ < 7
120 ldr\c \out, =\sym
121 .else
122 movw\c \out, #:lower16:\sym
123 movt\c \out, #:upper16:\sym
124 .endif
125 .endm
126
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +0100127 .macro do_crypt, round, ttab, ltab, bsz
Ard Biesheuvelb7912e02017-01-11 16:41:53 +0000128 push {r3-r11, lr}
129
Eric Biggers910bbfc2018-10-17 21:37:59 -0700130 // Load keys first, to reduce latency in case they're not cached yet.
131 ldm rk!, {r8-r11}
132
Ard Biesheuvelb7912e02017-01-11 16:41:53 +0000133 ldr r4, [in]
134 ldr r5, [in, #4]
135 ldr r6, [in, #8]
136 ldr r7, [in, #12]
137
Ard Biesheuvelb7912e02017-01-11 16:41:53 +0000138#ifdef CONFIG_CPU_BIG_ENDIAN
139 __rev r4, r4
140 __rev r5, r5
141 __rev r6, r6
142 __rev r7, r7
143#endif
144
145 eor r4, r4, r8
146 eor r5, r5, r9
147 eor r6, r6, r10
148 eor r7, r7, r11
149
Ard Biesheuvelf1b4d6b2017-01-13 08:33:26 +0000150 __adrl ttab, \ttab
Eric Biggers910bbfc2018-10-17 21:37:59 -0700151 /*
152 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
153 * L1 cache, assuming cacheline size >= 32. This is a hardening measure
154 * intended to make cache-timing attacks more difficult. They may not
155 * be fully prevented, however; see the paper
156 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
157 * ("Cache-timing attacks on AES") for a discussion of the many
158 * difficulties involved in writing truly constant-time AES software.
159 */
160 save_and_disable_irqs t0
161 .set i, 0
162 .rept 1024 / 128
163 ldr r8, [ttab, #i + 0]
164 ldr r9, [ttab, #i + 32]
165 ldr r10, [ttab, #i + 64]
166 ldr r11, [ttab, #i + 96]
167 .set i, i + 128
168 .endr
169 push {t0} // oldcpsr
Ard Biesheuvelb7912e02017-01-11 16:41:53 +0000170
171 tst rounds, #2
172 bne 1f
173
1740: \round r8, r9, r10, r11, r4, r5, r6, r7
175 \round r4, r5, r6, r7, r8, r9, r10, r11
176
1771: subs rounds, rounds, #4
178 \round r8, r9, r10, r11, r4, r5, r6, r7
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +0100179 bls 2f
Ard Biesheuvelb7912e02017-01-11 16:41:53 +0000180 \round r4, r5, r6, r7, r8, r9, r10, r11
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +0100181 b 0b
182
Eric Biggers910bbfc2018-10-17 21:37:59 -07001832: .ifb \ltab
184 add ttab, ttab, #1
185 .else
186 __adrl ttab, \ltab
187 // Prefetch inverse S-box for final round; see explanation above
188 .set i, 0
189 .rept 256 / 64
190 ldr t0, [ttab, #i + 0]
191 ldr t1, [ttab, #i + 32]
192 .set i, i + 64
193 .endr
194 .endif
195
196 pop {rounds} // oldcpsr
197 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
Ard Biesheuvelb7912e02017-01-11 16:41:53 +0000198
199#ifdef CONFIG_CPU_BIG_ENDIAN
200 __rev r4, r4
201 __rev r5, r5
202 __rev r6, r6
203 __rev r7, r7
204#endif
205
206 ldr out, [sp]
207
208 str r4, [out]
209 str r5, [out, #4]
210 str r6, [out, #8]
211 str r7, [out, #12]
212
213 pop {r3-r11, pc}
214
215 .align 3
216 .ltorg
217 .endm
218
Jinbum Parkabed8f92018-02-12 22:52:37 +0900219ENTRY(__aes_arm_encrypt)
Eric Biggers910bbfc2018-10-17 21:37:59 -0700220 do_crypt fround, crypto_ft_tab,, 2
Jinbum Parkabed8f92018-02-12 22:52:37 +0900221ENDPROC(__aes_arm_encrypt)
222
223 .align 5
224ENTRY(__aes_arm_decrypt)
225 do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
226ENDPROC(__aes_arm_decrypt)
227
228 .section ".rodata", "a"
Ard Biesheuvelb99d62f2017-07-24 11:28:19 +0100229 .align L1_CACHE_SHIFT
230 .type __aes_arm_inverse_sbox, %object
231__aes_arm_inverse_sbox:
232 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
233 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
234 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
235 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
236 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
237 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
238 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
239 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
240 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
241 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
242 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
243 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
244 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
245 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
246 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
247 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
248 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
249 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
250 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
251 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
252 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
253 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
254 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
255 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
256 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
257 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
258 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
259 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
260 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
261 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
262 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
263 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
264 .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox