blob: 5f28909d401245b7e169bcbc3df84149769f7e1c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Cryptographic API.
3 *
4 * Support for VIA PadLock hardware crypto engine.
5 *
6 * Copyright (c) 2004 Michal Ludvig <michal@logix.cz>
7 *
8 * Key expansion routine taken from crypto/aes.c
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * ---------------------------------------------------------------------------
16 * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
17 * All rights reserved.
18 *
19 * LICENSE TERMS
20 *
21 * The free distribution and use of this software in both source and binary
22 * form is allowed (with or without changes) provided that:
23 *
24 * 1. distributions of this source code include the above copyright
25 * notice, this list of conditions and the following disclaimer;
26 *
27 * 2. distributions in binary form include the above copyright
28 * notice, this list of conditions and the following disclaimer
29 * in the documentation and/or other associated materials;
30 *
31 * 3. the copyright holder's name is not used to endorse products
32 * built using this software without specific written permission.
33 *
34 * ALTERNATIVELY, provided that this notice is retained in full, this product
35 * may be distributed under the terms of the GNU General Public License (GPL),
36 * in which case the provisions of the GPL apply INSTEAD OF those given above.
37 *
38 * DISCLAIMER
39 *
40 * This software is provided 'as is' with no explicit or implied warranties
41 * in respect of its properties, including, but not limited to, correctness
42 * and/or fitness for purpose.
43 * ---------------------------------------------------------------------------
44 */
45
46#include <linux/module.h>
47#include <linux/init.h>
48#include <linux/types.h>
49#include <linux/errno.h>
50#include <linux/crypto.h>
51#include <linux/interrupt.h>
Herbert Xu6789b2d2005-07-06 13:52:27 -070052#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include <asm/byteorder.h>
54#include "padlock.h"
55
56#define AES_MIN_KEY_SIZE 16 /* in uint8_t units */
57#define AES_MAX_KEY_SIZE 32 /* ditto */
58#define AES_BLOCK_SIZE 16 /* ditto */
59#define AES_EXTENDED_KEY_SIZE 64 /* in uint32_t units */
60#define AES_EXTENDED_KEY_SIZE_B (AES_EXTENDED_KEY_SIZE * sizeof(uint32_t))
61
62struct aes_ctx {
Herbert Xu6789b2d2005-07-06 13:52:27 -070063 uint32_t e_data[AES_EXTENDED_KEY_SIZE];
64 uint32_t d_data[AES_EXTENDED_KEY_SIZE];
65 struct {
66 struct cword encrypt;
67 struct cword decrypt;
68 } cword;
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 uint32_t *E;
70 uint32_t *D;
71 int key_length;
72};
73
74/* ====== Key management routines ====== */
75
76static inline uint32_t
77generic_rotr32 (const uint32_t x, const unsigned bits)
78{
79 const unsigned n = bits % 32;
80 return (x >> n) | (x << (32 - n));
81}
82
83static inline uint32_t
84generic_rotl32 (const uint32_t x, const unsigned bits)
85{
86 const unsigned n = bits % 32;
87 return (x << n) | (x >> (32 - n));
88}
89
90#define rotl generic_rotl32
91#define rotr generic_rotr32
92
93/*
94 * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
95 */
96static inline uint8_t
97byte(const uint32_t x, const unsigned n)
98{
99 return x >> (n << 3);
100}
101
102#define uint32_t_in(x) le32_to_cpu(*(const uint32_t *)(x))
103#define uint32_t_out(to, from) (*(uint32_t *)(to) = cpu_to_le32(from))
104
105#define E_KEY ctx->E
106#define D_KEY ctx->D
107
108static uint8_t pow_tab[256];
109static uint8_t log_tab[256];
110static uint8_t sbx_tab[256];
111static uint8_t isb_tab[256];
112static uint32_t rco_tab[10];
113static uint32_t ft_tab[4][256];
114static uint32_t it_tab[4][256];
115
116static uint32_t fl_tab[4][256];
117static uint32_t il_tab[4][256];
118
119static inline uint8_t
120f_mult (uint8_t a, uint8_t b)
121{
122 uint8_t aa = log_tab[a], cc = aa + log_tab[b];
123
124 return pow_tab[cc + (cc < aa ? 1 : 0)];
125}
126
127#define ff_mult(a,b) (a && b ? f_mult(a, b) : 0)
128
129#define f_rn(bo, bi, n, k) \
130 bo[n] = ft_tab[0][byte(bi[n],0)] ^ \
131 ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \
132 ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
133 ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
134
135#define i_rn(bo, bi, n, k) \
136 bo[n] = it_tab[0][byte(bi[n],0)] ^ \
137 it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \
138 it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
139 it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
140
141#define ls_box(x) \
142 ( fl_tab[0][byte(x, 0)] ^ \
143 fl_tab[1][byte(x, 1)] ^ \
144 fl_tab[2][byte(x, 2)] ^ \
145 fl_tab[3][byte(x, 3)] )
146
147#define f_rl(bo, bi, n, k) \
148 bo[n] = fl_tab[0][byte(bi[n],0)] ^ \
149 fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \
150 fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
151 fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
152
153#define i_rl(bo, bi, n, k) \
154 bo[n] = il_tab[0][byte(bi[n],0)] ^ \
155 il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \
156 il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
157 il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
158
159static void
160gen_tabs (void)
161{
162 uint32_t i, t;
163 uint8_t p, q;
164
165 /* log and power tables for GF(2**8) finite field with
166 0x011b as modular polynomial - the simplest prmitive
167 root is 0x03, used here to generate the tables */
168
169 for (i = 0, p = 1; i < 256; ++i) {
170 pow_tab[i] = (uint8_t) p;
171 log_tab[p] = (uint8_t) i;
172
173 p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
174 }
175
176 log_tab[1] = 0;
177
178 for (i = 0, p = 1; i < 10; ++i) {
179 rco_tab[i] = p;
180
181 p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
182 }
183
184 for (i = 0; i < 256; ++i) {
185 p = (i ? pow_tab[255 - log_tab[i]] : 0);
186 q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
187 p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
188 sbx_tab[i] = p;
189 isb_tab[p] = (uint8_t) i;
190 }
191
192 for (i = 0; i < 256; ++i) {
193 p = sbx_tab[i];
194
195 t = p;
196 fl_tab[0][i] = t;
197 fl_tab[1][i] = rotl (t, 8);
198 fl_tab[2][i] = rotl (t, 16);
199 fl_tab[3][i] = rotl (t, 24);
200
201 t = ((uint32_t) ff_mult (2, p)) |
202 ((uint32_t) p << 8) |
203 ((uint32_t) p << 16) | ((uint32_t) ff_mult (3, p) << 24);
204
205 ft_tab[0][i] = t;
206 ft_tab[1][i] = rotl (t, 8);
207 ft_tab[2][i] = rotl (t, 16);
208 ft_tab[3][i] = rotl (t, 24);
209
210 p = isb_tab[i];
211
212 t = p;
213 il_tab[0][i] = t;
214 il_tab[1][i] = rotl (t, 8);
215 il_tab[2][i] = rotl (t, 16);
216 il_tab[3][i] = rotl (t, 24);
217
218 t = ((uint32_t) ff_mult (14, p)) |
219 ((uint32_t) ff_mult (9, p) << 8) |
220 ((uint32_t) ff_mult (13, p) << 16) |
221 ((uint32_t) ff_mult (11, p) << 24);
222
223 it_tab[0][i] = t;
224 it_tab[1][i] = rotl (t, 8);
225 it_tab[2][i] = rotl (t, 16);
226 it_tab[3][i] = rotl (t, 24);
227 }
228}
229
230#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
231
232#define imix_col(y,x) \
233 u = star_x(x); \
234 v = star_x(u); \
235 w = star_x(v); \
236 t = w ^ (x); \
237 (y) = u ^ v ^ w; \
238 (y) ^= rotr(u ^ t, 8) ^ \
239 rotr(v ^ t, 16) ^ \
240 rotr(t,24)
241
242/* initialise the key schedule from the user supplied key */
243
244#define loop4(i) \
245{ t = rotr(t, 8); t = ls_box(t) ^ rco_tab[i]; \
246 t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \
247 t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \
248 t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \
249 t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \
250}
251
252#define loop6(i) \
253{ t = rotr(t, 8); t = ls_box(t) ^ rco_tab[i]; \
254 t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \
255 t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \
256 t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \
257 t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \
258 t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \
259 t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \
260}
261
262#define loop8(i) \
263{ t = rotr(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \
264 t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \
265 t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \
266 t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \
267 t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \
268 t = E_KEY[8 * i + 4] ^ ls_box(t); \
269 E_KEY[8 * i + 12] = t; \
270 t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \
271 t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \
272 t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \
273}
274
275/* Tells whether the ACE is capable to generate
276 the extended key for a given key_len. */
277static inline int
278aes_hw_extkey_available(uint8_t key_len)
279{
280 /* TODO: We should check the actual CPU model/stepping
281 as it's possible that the capability will be
282 added in the next CPU revisions. */
283 if (key_len == 16)
284 return 1;
285 return 0;
286}
287
Herbert Xu6789b2d2005-07-06 13:52:27 -0700288static inline struct aes_ctx *aes_ctx(void *ctx)
289{
290 return (struct aes_ctx *)ALIGN((unsigned long)ctx, PADLOCK_ALIGNMENT);
291}
292
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293static int
294aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t *flags)
295{
Herbert Xu6789b2d2005-07-06 13:52:27 -0700296 struct aes_ctx *ctx = aes_ctx(ctx_arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 uint32_t i, t, u, v, w;
298 uint32_t P[AES_EXTENDED_KEY_SIZE];
299 uint32_t rounds;
300
301 if (key_len != 16 && key_len != 24 && key_len != 32) {
302 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
303 return -EINVAL;
304 }
305
306 ctx->key_length = key_len;
307
Herbert Xu6789b2d2005-07-06 13:52:27 -0700308 /*
309 * If the hardware is capable of generating the extended key
310 * itself we must supply the plain key for both encryption
311 * and decryption.
312 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 ctx->E = ctx->e_data;
Herbert Xu6789b2d2005-07-06 13:52:27 -0700314 ctx->D = ctx->e_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
316 E_KEY[0] = uint32_t_in (in_key);
317 E_KEY[1] = uint32_t_in (in_key + 4);
318 E_KEY[2] = uint32_t_in (in_key + 8);
319 E_KEY[3] = uint32_t_in (in_key + 12);
320
Herbert Xu6789b2d2005-07-06 13:52:27 -0700321 /* Prepare control words. */
322 memset(&ctx->cword, 0, sizeof(ctx->cword));
323
324 ctx->cword.decrypt.encdec = 1;
325 ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4;
326 ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds;
327 ctx->cword.encrypt.ksize = (key_len - 16) / 8;
328 ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize;
329
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 /* Don't generate extended keys if the hardware can do it. */
331 if (aes_hw_extkey_available(key_len))
332 return 0;
333
Herbert Xu6789b2d2005-07-06 13:52:27 -0700334 ctx->D = ctx->d_data;
335 ctx->cword.encrypt.keygen = 1;
336 ctx->cword.decrypt.keygen = 1;
337
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 switch (key_len) {
339 case 16:
340 t = E_KEY[3];
341 for (i = 0; i < 10; ++i)
342 loop4 (i);
343 break;
344
345 case 24:
346 E_KEY[4] = uint32_t_in (in_key + 16);
347 t = E_KEY[5] = uint32_t_in (in_key + 20);
348 for (i = 0; i < 8; ++i)
349 loop6 (i);
350 break;
351
352 case 32:
353 E_KEY[4] = uint32_t_in (in_key + 16);
354 E_KEY[5] = uint32_t_in (in_key + 20);
355 E_KEY[6] = uint32_t_in (in_key + 24);
356 t = E_KEY[7] = uint32_t_in (in_key + 28);
357 for (i = 0; i < 7; ++i)
358 loop8 (i);
359 break;
360 }
361
362 D_KEY[0] = E_KEY[0];
363 D_KEY[1] = E_KEY[1];
364 D_KEY[2] = E_KEY[2];
365 D_KEY[3] = E_KEY[3];
366
367 for (i = 4; i < key_len + 24; ++i) {
368 imix_col (D_KEY[i], E_KEY[i]);
369 }
370
371 /* PadLock needs a different format of the decryption key. */
372 rounds = 10 + (key_len - 16) / 4;
373
374 for (i = 0; i < rounds; i++) {
375 P[((i + 1) * 4) + 0] = D_KEY[((rounds - i - 1) * 4) + 0];
376 P[((i + 1) * 4) + 1] = D_KEY[((rounds - i - 1) * 4) + 1];
377 P[((i + 1) * 4) + 2] = D_KEY[((rounds - i - 1) * 4) + 2];
378 P[((i + 1) * 4) + 3] = D_KEY[((rounds - i - 1) * 4) + 3];
379 }
380
381 P[0] = E_KEY[(rounds * 4) + 0];
382 P[1] = E_KEY[(rounds * 4) + 1];
383 P[2] = E_KEY[(rounds * 4) + 2];
384 P[3] = E_KEY[(rounds * 4) + 3];
385
386 memcpy(D_KEY, P, AES_EXTENDED_KEY_SIZE_B);
387
388 return 0;
389}
390
391/* ====== Encryption/decryption routines ====== */
392
393/* This is the real call to PadLock. */
Herbert Xu6789b2d2005-07-06 13:52:27 -0700394static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
395 void *control_word, u32 count)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396{
397 asm volatile ("pushfl; popfl"); /* enforce key reload. */
398 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
399 : "+S"(input), "+D"(output)
400 : "d"(control_word), "b"(key), "c"(count));
401}
402
403static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in)
405{
Herbert Xu6789b2d2005-07-06 13:52:27 -0700406 struct aes_ctx *ctx = aes_ctx(ctx_arg);
407 padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408}
409
410static void
411aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in)
412{
Herbert Xu6789b2d2005-07-06 13:52:27 -0700413 struct aes_ctx *ctx = aes_ctx(ctx_arg);
414 padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415}
416
417static struct crypto_alg aes_alg = {
418 .cra_name = "aes",
419 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
420 .cra_blocksize = AES_BLOCK_SIZE,
Herbert Xu6789b2d2005-07-06 13:52:27 -0700421 .cra_ctxsize = sizeof(struct aes_ctx) +
422 PADLOCK_ALIGNMENT,
423 .cra_alignmask = PADLOCK_ALIGNMENT - 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 .cra_module = THIS_MODULE,
425 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
426 .cra_u = {
427 .cipher = {
428 .cia_min_keysize = AES_MIN_KEY_SIZE,
429 .cia_max_keysize = AES_MAX_KEY_SIZE,
430 .cia_setkey = aes_set_key,
431 .cia_encrypt = aes_encrypt,
432 .cia_decrypt = aes_decrypt
433 }
434 }
435};
436
437int __init padlock_init_aes(void)
438{
439 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
440
441 gen_tabs();
442 return crypto_register_alg(&aes_alg);
443}
444
445void __exit padlock_fini_aes(void)
446{
447 crypto_unregister_alg(&aes_alg);
448}