blob: 445aab06387b7293ccd9b495901cb74141c9a050 [file] [log] [blame]
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +02001/*
2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
29#include <crypto/algapi.h>
30#include <crypto/cast5.h>
31#include <crypto/cryptd.h>
32#include <crypto/ctr.h>
33#include <asm/xcr.h>
34#include <asm/xsave.h>
35#include <asm/crypto/ablk_helper.h>
36#include <asm/crypto/glue_helper.h>
37
38#define CAST5_PARALLEL_BLOCKS 16
39
40asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
41 const u8 *src, bool xor);
42asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
43 const u8 *src);
44
45static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
46 const u8 *src)
47{
48 __cast5_enc_blk_16way(ctx, dst, src, false);
49}
50
51static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
52 const u8 *src)
53{
54 __cast5_enc_blk_16way(ctx, dst, src, true);
55}
56
57static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
58 const u8 *src)
59{
60 cast5_dec_blk_16way(ctx, dst, src);
61}
62
63
64static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
65{
66 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
67 NULL, fpu_enabled, nbytes);
68}
69
70static inline void cast5_fpu_end(bool fpu_enabled)
71{
72 return glue_fpu_end(fpu_enabled);
73}
74
75static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
76 bool enc)
77{
78 bool fpu_enabled = false;
79 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80 const unsigned int bsize = CAST5_BLOCK_SIZE;
81 unsigned int nbytes;
82 int err;
83
84 err = blkcipher_walk_virt(desc, walk);
85 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
86
87 while ((nbytes = walk->nbytes)) {
88 u8 *wsrc = walk->src.virt.addr;
89 u8 *wdst = walk->dst.virt.addr;
90
91 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
92
93 /* Process multi-block batch */
94 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
95 do {
96 if (enc)
97 cast5_enc_blk_xway(ctx, wdst, wsrc);
98 else
99 cast5_dec_blk_xway(ctx, wdst, wsrc);
100
101 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
102 wdst += bsize * CAST5_PARALLEL_BLOCKS;
103 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
104 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
105
106 if (nbytes < bsize)
107 goto done;
108 }
109
110 /* Handle leftovers */
111 do {
112 if (enc)
113 __cast5_encrypt(ctx, wdst, wsrc);
114 else
115 __cast5_decrypt(ctx, wdst, wsrc);
116
117 wsrc += bsize;
118 wdst += bsize;
119 nbytes -= bsize;
120 } while (nbytes >= bsize);
121
122done:
123 err = blkcipher_walk_done(desc, walk, nbytes);
124 }
125
126 cast5_fpu_end(fpu_enabled);
127 return err;
128}
129
130static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 struct scatterlist *src, unsigned int nbytes)
132{
133 struct blkcipher_walk walk;
134
135 blkcipher_walk_init(&walk, dst, src, nbytes);
136 return ecb_crypt(desc, &walk, true);
137}
138
139static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
140 struct scatterlist *src, unsigned int nbytes)
141{
142 struct blkcipher_walk walk;
143
144 blkcipher_walk_init(&walk, dst, src, nbytes);
145 return ecb_crypt(desc, &walk, false);
146}
147
148static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
149 struct blkcipher_walk *walk)
150{
151 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
152 const unsigned int bsize = CAST5_BLOCK_SIZE;
153 unsigned int nbytes = walk->nbytes;
154 u64 *src = (u64 *)walk->src.virt.addr;
155 u64 *dst = (u64 *)walk->dst.virt.addr;
156 u64 *iv = (u64 *)walk->iv;
157
158 do {
159 *dst = *src ^ *iv;
160 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
161 iv = dst;
162
163 src += 1;
164 dst += 1;
165 nbytes -= bsize;
166 } while (nbytes >= bsize);
167
168 *(u64 *)walk->iv ^= *iv;
169 return nbytes;
170}
171
172static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173 struct scatterlist *src, unsigned int nbytes)
174{
175 struct blkcipher_walk walk;
176 int err;
177
178 blkcipher_walk_init(&walk, dst, src, nbytes);
179 err = blkcipher_walk_virt(desc, &walk);
180
181 while ((nbytes = walk.nbytes)) {
182 nbytes = __cbc_encrypt(desc, &walk);
183 err = blkcipher_walk_done(desc, &walk, nbytes);
184 }
185
186 return err;
187}
188
189static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
190 struct blkcipher_walk *walk)
191{
192 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
193 const unsigned int bsize = CAST5_BLOCK_SIZE;
194 unsigned int nbytes = walk->nbytes;
195 u64 *src = (u64 *)walk->src.virt.addr;
196 u64 *dst = (u64 *)walk->dst.virt.addr;
197 u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
198 u64 last_iv;
199 int i;
200
201 /* Start of the last block. */
202 src += nbytes / bsize - 1;
203 dst += nbytes / bsize - 1;
204
205 last_iv = *src;
206
207 /* Process multi-block batch */
208 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
209 do {
210 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
211 src -= CAST5_PARALLEL_BLOCKS - 1;
212 dst -= CAST5_PARALLEL_BLOCKS - 1;
213
214 for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
215 ivs[i] = src[i];
216
217 cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
218
219 for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
220 *(dst + (i + 1)) ^= *(ivs + i);
221
222 nbytes -= bsize;
223 if (nbytes < bsize)
224 goto done;
225
226 *dst ^= *(src - 1);
227 src -= 1;
228 dst -= 1;
229 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
230
231 if (nbytes < bsize)
232 goto done;
233 }
234
235 /* Handle leftovers */
236 for (;;) {
237 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
238
239 nbytes -= bsize;
240 if (nbytes < bsize)
241 break;
242
243 *dst ^= *(src - 1);
244 src -= 1;
245 dst -= 1;
246 }
247
248done:
249 *dst ^= *(u64 *)walk->iv;
250 *(u64 *)walk->iv = last_iv;
251
252 return nbytes;
253}
254
255static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
256 struct scatterlist *src, unsigned int nbytes)
257{
258 bool fpu_enabled = false;
259 struct blkcipher_walk walk;
260 int err;
261
262 blkcipher_walk_init(&walk, dst, src, nbytes);
263 err = blkcipher_walk_virt(desc, &walk);
264 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
265
266 while ((nbytes = walk.nbytes)) {
267 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
268 nbytes = __cbc_decrypt(desc, &walk);
269 err = blkcipher_walk_done(desc, &walk, nbytes);
270 }
271
272 cast5_fpu_end(fpu_enabled);
273 return err;
274}
275
276static void ctr_crypt_final(struct blkcipher_desc *desc,
277 struct blkcipher_walk *walk)
278{
279 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
280 u8 *ctrblk = walk->iv;
281 u8 keystream[CAST5_BLOCK_SIZE];
282 u8 *src = walk->src.virt.addr;
283 u8 *dst = walk->dst.virt.addr;
284 unsigned int nbytes = walk->nbytes;
285
286 __cast5_encrypt(ctx, keystream, ctrblk);
287 crypto_xor(keystream, src, nbytes);
288 memcpy(dst, keystream, nbytes);
289
290 crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
291}
292
293static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
294 struct blkcipher_walk *walk)
295{
296 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
297 const unsigned int bsize = CAST5_BLOCK_SIZE;
298 unsigned int nbytes = walk->nbytes;
299 u64 *src = (u64 *)walk->src.virt.addr;
300 u64 *dst = (u64 *)walk->dst.virt.addr;
301 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
302 __be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
303 int i;
304
305 /* Process multi-block batch */
306 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
307 do {
308 /* create ctrblks for parallel encrypt */
309 for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
310 if (dst != src)
311 dst[i] = src[i];
312
313 ctrblocks[i] = cpu_to_be64(ctrblk++);
314 }
315
316 cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
317 (u8 *)ctrblocks);
318
319 src += CAST5_PARALLEL_BLOCKS;
320 dst += CAST5_PARALLEL_BLOCKS;
321 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
322 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
323
324 if (nbytes < bsize)
325 goto done;
326 }
327
328 /* Handle leftovers */
329 do {
330 if (dst != src)
331 *dst = *src;
332
333 ctrblocks[0] = cpu_to_be64(ctrblk++);
334
335 __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
336 *dst ^= ctrblocks[0];
337
338 src += 1;
339 dst += 1;
340 nbytes -= bsize;
341 } while (nbytes >= bsize);
342
343done:
344 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
345 return nbytes;
346}
347
348static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
349 struct scatterlist *src, unsigned int nbytes)
350{
351 bool fpu_enabled = false;
352 struct blkcipher_walk walk;
353 int err;
354
355 blkcipher_walk_init(&walk, dst, src, nbytes);
356 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
357 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
358
359 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
360 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
361 nbytes = __ctr_crypt(desc, &walk);
362 err = blkcipher_walk_done(desc, &walk, nbytes);
363 }
364
365 cast5_fpu_end(fpu_enabled);
366
367 if (walk.nbytes) {
368 ctr_crypt_final(desc, &walk);
369 err = blkcipher_walk_done(desc, &walk, 0);
370 }
371
372 return err;
373}
374
375
376static struct crypto_alg cast5_algs[6] = { {
377 .cra_name = "__ecb-cast5-avx",
378 .cra_driver_name = "__driver-ecb-cast5-avx",
379 .cra_priority = 0,
380 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
381 .cra_blocksize = CAST5_BLOCK_SIZE,
382 .cra_ctxsize = sizeof(struct cast5_ctx),
383 .cra_alignmask = 0,
384 .cra_type = &crypto_blkcipher_type,
385 .cra_module = THIS_MODULE,
386 .cra_u = {
387 .blkcipher = {
388 .min_keysize = CAST5_MIN_KEY_SIZE,
389 .max_keysize = CAST5_MAX_KEY_SIZE,
390 .setkey = cast5_setkey,
391 .encrypt = ecb_encrypt,
392 .decrypt = ecb_decrypt,
393 },
394 },
395}, {
396 .cra_name = "__cbc-cast5-avx",
397 .cra_driver_name = "__driver-cbc-cast5-avx",
398 .cra_priority = 0,
399 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
400 .cra_blocksize = CAST5_BLOCK_SIZE,
401 .cra_ctxsize = sizeof(struct cast5_ctx),
402 .cra_alignmask = 0,
403 .cra_type = &crypto_blkcipher_type,
404 .cra_module = THIS_MODULE,
405 .cra_u = {
406 .blkcipher = {
407 .min_keysize = CAST5_MIN_KEY_SIZE,
408 .max_keysize = CAST5_MAX_KEY_SIZE,
409 .setkey = cast5_setkey,
410 .encrypt = cbc_encrypt,
411 .decrypt = cbc_decrypt,
412 },
413 },
414}, {
415 .cra_name = "__ctr-cast5-avx",
416 .cra_driver_name = "__driver-ctr-cast5-avx",
417 .cra_priority = 0,
418 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
419 .cra_blocksize = 1,
420 .cra_ctxsize = sizeof(struct cast5_ctx),
421 .cra_alignmask = 0,
422 .cra_type = &crypto_blkcipher_type,
423 .cra_module = THIS_MODULE,
424 .cra_u = {
425 .blkcipher = {
426 .min_keysize = CAST5_MIN_KEY_SIZE,
427 .max_keysize = CAST5_MAX_KEY_SIZE,
428 .ivsize = CAST5_BLOCK_SIZE,
429 .setkey = cast5_setkey,
430 .encrypt = ctr_crypt,
431 .decrypt = ctr_crypt,
432 },
433 },
434}, {
435 .cra_name = "ecb(cast5)",
436 .cra_driver_name = "ecb-cast5-avx",
437 .cra_priority = 200,
438 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
439 .cra_blocksize = CAST5_BLOCK_SIZE,
440 .cra_ctxsize = sizeof(struct async_helper_ctx),
441 .cra_alignmask = 0,
442 .cra_type = &crypto_ablkcipher_type,
443 .cra_module = THIS_MODULE,
444 .cra_init = ablk_init,
445 .cra_exit = ablk_exit,
446 .cra_u = {
447 .ablkcipher = {
448 .min_keysize = CAST5_MIN_KEY_SIZE,
449 .max_keysize = CAST5_MAX_KEY_SIZE,
450 .setkey = ablk_set_key,
451 .encrypt = ablk_encrypt,
452 .decrypt = ablk_decrypt,
453 },
454 },
455}, {
456 .cra_name = "cbc(cast5)",
457 .cra_driver_name = "cbc-cast5-avx",
458 .cra_priority = 200,
459 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
460 .cra_blocksize = CAST5_BLOCK_SIZE,
461 .cra_ctxsize = sizeof(struct async_helper_ctx),
462 .cra_alignmask = 0,
463 .cra_type = &crypto_ablkcipher_type,
464 .cra_module = THIS_MODULE,
465 .cra_init = ablk_init,
466 .cra_exit = ablk_exit,
467 .cra_u = {
468 .ablkcipher = {
469 .min_keysize = CAST5_MIN_KEY_SIZE,
470 .max_keysize = CAST5_MAX_KEY_SIZE,
471 .ivsize = CAST5_BLOCK_SIZE,
472 .setkey = ablk_set_key,
473 .encrypt = __ablk_encrypt,
474 .decrypt = ablk_decrypt,
475 },
476 },
477}, {
478 .cra_name = "ctr(cast5)",
479 .cra_driver_name = "ctr-cast5-avx",
480 .cra_priority = 200,
481 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
482 .cra_blocksize = 1,
483 .cra_ctxsize = sizeof(struct async_helper_ctx),
484 .cra_alignmask = 0,
485 .cra_type = &crypto_ablkcipher_type,
486 .cra_module = THIS_MODULE,
487 .cra_init = ablk_init,
488 .cra_exit = ablk_exit,
489 .cra_u = {
490 .ablkcipher = {
491 .min_keysize = CAST5_MIN_KEY_SIZE,
492 .max_keysize = CAST5_MAX_KEY_SIZE,
493 .ivsize = CAST5_BLOCK_SIZE,
494 .setkey = ablk_set_key,
495 .encrypt = ablk_encrypt,
496 .decrypt = ablk_encrypt,
497 .geniv = "chainiv",
498 },
499 },
500} };
501
502static int __init cast5_init(void)
503{
504 u64 xcr0;
505
506 if (!cpu_has_avx || !cpu_has_osxsave) {
507 pr_info("AVX instructions are not detected.\n");
508 return -ENODEV;
509 }
510
511 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
512 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
513 pr_info("AVX detected but unusable.\n");
514 return -ENODEV;
515 }
516
517 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
518}
519
520static void __exit cast5_exit(void)
521{
522 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
523}
524
525module_init(cast5_init);
526module_exit(cast5_exit);
527
528MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
529MODULE_LICENSE("GPL");
530MODULE_ALIAS("cast5");