blob: e57e20ab5e0bc49ffa098ec9e86ab396ee2b71ea [file] [log] [blame]
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +02001/*
2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
Ard Biesheuvel801201a2013-09-20 09:55:41 +020029#include <crypto/ablk_helper.h>
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020030#include <crypto/algapi.h>
31#include <crypto/cast5.h>
32#include <crypto/cryptd.h>
33#include <crypto/ctr.h>
34#include <asm/xcr.h>
35#include <asm/xsave.h>
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020036#include <asm/crypto/glue_helper.h>
37
38#define CAST5_PARALLEL_BLOCKS 16
39
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030040asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020041 const u8 *src);
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030042asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
43 const u8 *src);
44asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
45 const u8 *src);
46asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
47 __be64 *iv);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020048
49static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
50{
51 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
52 NULL, fpu_enabled, nbytes);
53}
54
55static inline void cast5_fpu_end(bool fpu_enabled)
56{
57 return glue_fpu_end(fpu_enabled);
58}
59
60static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
61 bool enc)
62{
63 bool fpu_enabled = false;
64 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
65 const unsigned int bsize = CAST5_BLOCK_SIZE;
66 unsigned int nbytes;
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030067 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020068 int err;
69
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030070 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
71
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020072 err = blkcipher_walk_virt(desc, walk);
73 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
74
75 while ((nbytes = walk->nbytes)) {
76 u8 *wsrc = walk->src.virt.addr;
77 u8 *wdst = walk->dst.virt.addr;
78
79 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
80
81 /* Process multi-block batch */
82 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
83 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030084 fn(ctx, wdst, wsrc);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020085
86 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
87 wdst += bsize * CAST5_PARALLEL_BLOCKS;
88 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
89 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
90
91 if (nbytes < bsize)
92 goto done;
93 }
94
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030095 fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
96
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020097 /* Handle leftovers */
98 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030099 fn(ctx, wdst, wsrc);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200100
101 wsrc += bsize;
102 wdst += bsize;
103 nbytes -= bsize;
104 } while (nbytes >= bsize);
105
106done:
107 err = blkcipher_walk_done(desc, walk, nbytes);
108 }
109
110 cast5_fpu_end(fpu_enabled);
111 return err;
112}
113
114static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
115 struct scatterlist *src, unsigned int nbytes)
116{
117 struct blkcipher_walk walk;
118
119 blkcipher_walk_init(&walk, dst, src, nbytes);
120 return ecb_crypt(desc, &walk, true);
121}
122
123static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
124 struct scatterlist *src, unsigned int nbytes)
125{
126 struct blkcipher_walk walk;
127
128 blkcipher_walk_init(&walk, dst, src, nbytes);
129 return ecb_crypt(desc, &walk, false);
130}
131
132static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
133 struct blkcipher_walk *walk)
134{
135 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
136 const unsigned int bsize = CAST5_BLOCK_SIZE;
137 unsigned int nbytes = walk->nbytes;
138 u64 *src = (u64 *)walk->src.virt.addr;
139 u64 *dst = (u64 *)walk->dst.virt.addr;
140 u64 *iv = (u64 *)walk->iv;
141
142 do {
143 *dst = *src ^ *iv;
144 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
145 iv = dst;
146
147 src += 1;
148 dst += 1;
149 nbytes -= bsize;
150 } while (nbytes >= bsize);
151
Jussi Kivilinna200429c2012-09-19 14:24:57 +0300152 *(u64 *)walk->iv = *iv;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200153 return nbytes;
154}
155
156static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
157 struct scatterlist *src, unsigned int nbytes)
158{
159 struct blkcipher_walk walk;
160 int err;
161
162 blkcipher_walk_init(&walk, dst, src, nbytes);
163 err = blkcipher_walk_virt(desc, &walk);
164
165 while ((nbytes = walk.nbytes)) {
166 nbytes = __cbc_encrypt(desc, &walk);
167 err = blkcipher_walk_done(desc, &walk, nbytes);
168 }
169
170 return err;
171}
172
173static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
174 struct blkcipher_walk *walk)
175{
176 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
177 const unsigned int bsize = CAST5_BLOCK_SIZE;
178 unsigned int nbytes = walk->nbytes;
179 u64 *src = (u64 *)walk->src.virt.addr;
180 u64 *dst = (u64 *)walk->dst.virt.addr;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200181 u64 last_iv;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200182
183 /* Start of the last block. */
184 src += nbytes / bsize - 1;
185 dst += nbytes / bsize - 1;
186
187 last_iv = *src;
188
189 /* Process multi-block batch */
190 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
191 do {
192 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
193 src -= CAST5_PARALLEL_BLOCKS - 1;
194 dst -= CAST5_PARALLEL_BLOCKS - 1;
195
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300196 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200197
198 nbytes -= bsize;
199 if (nbytes < bsize)
200 goto done;
201
202 *dst ^= *(src - 1);
203 src -= 1;
204 dst -= 1;
205 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200206 }
207
208 /* Handle leftovers */
209 for (;;) {
210 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
211
212 nbytes -= bsize;
213 if (nbytes < bsize)
214 break;
215
216 *dst ^= *(src - 1);
217 src -= 1;
218 dst -= 1;
219 }
220
221done:
222 *dst ^= *(u64 *)walk->iv;
223 *(u64 *)walk->iv = last_iv;
224
225 return nbytes;
226}
227
228static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
229 struct scatterlist *src, unsigned int nbytes)
230{
231 bool fpu_enabled = false;
232 struct blkcipher_walk walk;
233 int err;
234
235 blkcipher_walk_init(&walk, dst, src, nbytes);
236 err = blkcipher_walk_virt(desc, &walk);
237 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
238
239 while ((nbytes = walk.nbytes)) {
240 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
241 nbytes = __cbc_decrypt(desc, &walk);
242 err = blkcipher_walk_done(desc, &walk, nbytes);
243 }
244
245 cast5_fpu_end(fpu_enabled);
246 return err;
247}
248
249static void ctr_crypt_final(struct blkcipher_desc *desc,
250 struct blkcipher_walk *walk)
251{
252 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
253 u8 *ctrblk = walk->iv;
254 u8 keystream[CAST5_BLOCK_SIZE];
255 u8 *src = walk->src.virt.addr;
256 u8 *dst = walk->dst.virt.addr;
257 unsigned int nbytes = walk->nbytes;
258
259 __cast5_encrypt(ctx, keystream, ctrblk);
260 crypto_xor(keystream, src, nbytes);
261 memcpy(dst, keystream, nbytes);
262
263 crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
264}
265
266static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
267 struct blkcipher_walk *walk)
268{
269 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
270 const unsigned int bsize = CAST5_BLOCK_SIZE;
271 unsigned int nbytes = walk->nbytes;
272 u64 *src = (u64 *)walk->src.virt.addr;
273 u64 *dst = (u64 *)walk->dst.virt.addr;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200274
275 /* Process multi-block batch */
276 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
277 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300278 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
279 (__be64 *)walk->iv);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200280
281 src += CAST5_PARALLEL_BLOCKS;
282 dst += CAST5_PARALLEL_BLOCKS;
283 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
284 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
285
286 if (nbytes < bsize)
287 goto done;
288 }
289
290 /* Handle leftovers */
291 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300292 u64 ctrblk;
293
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200294 if (dst != src)
295 *dst = *src;
296
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300297 ctrblk = *(u64 *)walk->iv;
298 be64_add_cpu((__be64 *)walk->iv, 1);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200299
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300300 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
301 *dst ^= ctrblk;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200302
303 src += 1;
304 dst += 1;
305 nbytes -= bsize;
306 } while (nbytes >= bsize);
307
308done:
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200309 return nbytes;
310}
311
312static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
313 struct scatterlist *src, unsigned int nbytes)
314{
315 bool fpu_enabled = false;
316 struct blkcipher_walk walk;
317 int err;
318
319 blkcipher_walk_init(&walk, dst, src, nbytes);
320 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
321 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
322
323 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
324 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
325 nbytes = __ctr_crypt(desc, &walk);
326 err = blkcipher_walk_done(desc, &walk, nbytes);
327 }
328
329 cast5_fpu_end(fpu_enabled);
330
331 if (walk.nbytes) {
332 ctr_crypt_final(desc, &walk);
333 err = blkcipher_walk_done(desc, &walk, 0);
334 }
335
336 return err;
337}
338
339
340static struct crypto_alg cast5_algs[6] = { {
341 .cra_name = "__ecb-cast5-avx",
342 .cra_driver_name = "__driver-ecb-cast5-avx",
343 .cra_priority = 0,
344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
345 .cra_blocksize = CAST5_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct cast5_ctx),
347 .cra_alignmask = 0,
348 .cra_type = &crypto_blkcipher_type,
349 .cra_module = THIS_MODULE,
350 .cra_u = {
351 .blkcipher = {
352 .min_keysize = CAST5_MIN_KEY_SIZE,
353 .max_keysize = CAST5_MAX_KEY_SIZE,
354 .setkey = cast5_setkey,
355 .encrypt = ecb_encrypt,
356 .decrypt = ecb_decrypt,
357 },
358 },
359}, {
360 .cra_name = "__cbc-cast5-avx",
361 .cra_driver_name = "__driver-cbc-cast5-avx",
362 .cra_priority = 0,
363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
364 .cra_blocksize = CAST5_BLOCK_SIZE,
365 .cra_ctxsize = sizeof(struct cast5_ctx),
366 .cra_alignmask = 0,
367 .cra_type = &crypto_blkcipher_type,
368 .cra_module = THIS_MODULE,
369 .cra_u = {
370 .blkcipher = {
371 .min_keysize = CAST5_MIN_KEY_SIZE,
372 .max_keysize = CAST5_MAX_KEY_SIZE,
373 .setkey = cast5_setkey,
374 .encrypt = cbc_encrypt,
375 .decrypt = cbc_decrypt,
376 },
377 },
378}, {
379 .cra_name = "__ctr-cast5-avx",
380 .cra_driver_name = "__driver-ctr-cast5-avx",
381 .cra_priority = 0,
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
383 .cra_blocksize = 1,
384 .cra_ctxsize = sizeof(struct cast5_ctx),
385 .cra_alignmask = 0,
386 .cra_type = &crypto_blkcipher_type,
387 .cra_module = THIS_MODULE,
388 .cra_u = {
389 .blkcipher = {
390 .min_keysize = CAST5_MIN_KEY_SIZE,
391 .max_keysize = CAST5_MAX_KEY_SIZE,
392 .ivsize = CAST5_BLOCK_SIZE,
393 .setkey = cast5_setkey,
394 .encrypt = ctr_crypt,
395 .decrypt = ctr_crypt,
396 },
397 },
398}, {
399 .cra_name = "ecb(cast5)",
400 .cra_driver_name = "ecb-cast5-avx",
401 .cra_priority = 200,
402 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
403 .cra_blocksize = CAST5_BLOCK_SIZE,
404 .cra_ctxsize = sizeof(struct async_helper_ctx),
405 .cra_alignmask = 0,
406 .cra_type = &crypto_ablkcipher_type,
407 .cra_module = THIS_MODULE,
408 .cra_init = ablk_init,
409 .cra_exit = ablk_exit,
410 .cra_u = {
411 .ablkcipher = {
412 .min_keysize = CAST5_MIN_KEY_SIZE,
413 .max_keysize = CAST5_MAX_KEY_SIZE,
414 .setkey = ablk_set_key,
415 .encrypt = ablk_encrypt,
416 .decrypt = ablk_decrypt,
417 },
418 },
419}, {
420 .cra_name = "cbc(cast5)",
421 .cra_driver_name = "cbc-cast5-avx",
422 .cra_priority = 200,
423 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
424 .cra_blocksize = CAST5_BLOCK_SIZE,
425 .cra_ctxsize = sizeof(struct async_helper_ctx),
426 .cra_alignmask = 0,
427 .cra_type = &crypto_ablkcipher_type,
428 .cra_module = THIS_MODULE,
429 .cra_init = ablk_init,
430 .cra_exit = ablk_exit,
431 .cra_u = {
432 .ablkcipher = {
433 .min_keysize = CAST5_MIN_KEY_SIZE,
434 .max_keysize = CAST5_MAX_KEY_SIZE,
435 .ivsize = CAST5_BLOCK_SIZE,
436 .setkey = ablk_set_key,
437 .encrypt = __ablk_encrypt,
438 .decrypt = ablk_decrypt,
439 },
440 },
441}, {
442 .cra_name = "ctr(cast5)",
443 .cra_driver_name = "ctr-cast5-avx",
444 .cra_priority = 200,
445 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 .cra_blocksize = 1,
447 .cra_ctxsize = sizeof(struct async_helper_ctx),
448 .cra_alignmask = 0,
449 .cra_type = &crypto_ablkcipher_type,
450 .cra_module = THIS_MODULE,
451 .cra_init = ablk_init,
452 .cra_exit = ablk_exit,
453 .cra_u = {
454 .ablkcipher = {
455 .min_keysize = CAST5_MIN_KEY_SIZE,
456 .max_keysize = CAST5_MAX_KEY_SIZE,
457 .ivsize = CAST5_BLOCK_SIZE,
458 .setkey = ablk_set_key,
459 .encrypt = ablk_encrypt,
460 .decrypt = ablk_encrypt,
461 .geniv = "chainiv",
462 },
463 },
464} };
465
466static int __init cast5_init(void)
467{
468 u64 xcr0;
469
470 if (!cpu_has_avx || !cpu_has_osxsave) {
471 pr_info("AVX instructions are not detected.\n");
472 return -ENODEV;
473 }
474
475 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
476 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
477 pr_info("AVX detected but unusable.\n");
478 return -ENODEV;
479 }
480
481 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
482}
483
484static void __exit cast5_exit(void)
485{
486 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
487}
488
489module_init(cast5_init);
490module_exit(cast5_exit);
491
492MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
493MODULE_LICENSE("GPL");
494MODULE_ALIAS("cast5");