blob: 8648158f39166f1cf1603fc7f139af629e163bde [file] [log] [blame]
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +02001/*
2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
Ard Biesheuvel801201a2013-09-20 09:55:41 +020029#include <crypto/ablk_helper.h>
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020030#include <crypto/algapi.h>
31#include <crypto/cast5.h>
32#include <crypto/cryptd.h>
33#include <crypto/ctr.h>
Ingo Molnard5d34d92015-04-28 10:11:24 +020034#include <asm/fpu/api.h>
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020035#include <asm/crypto/glue_helper.h>
36
37#define CAST5_PARALLEL_BLOCKS 16
38
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030039asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020040 const u8 *src);
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030041asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
42 const u8 *src);
43asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
44 const u8 *src);
45asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
46 __be64 *iv);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020047
48static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
49{
50 return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
51 NULL, fpu_enabled, nbytes);
52}
53
54static inline void cast5_fpu_end(bool fpu_enabled)
55{
56 return glue_fpu_end(fpu_enabled);
57}
58
59static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
60 bool enc)
61{
62 bool fpu_enabled = false;
63 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
64 const unsigned int bsize = CAST5_BLOCK_SIZE;
65 unsigned int nbytes;
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030066 void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020067 int err;
68
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030069 fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
70
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020071 err = blkcipher_walk_virt(desc, walk);
72 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
73
74 while ((nbytes = walk->nbytes)) {
75 u8 *wsrc = walk->src.virt.addr;
76 u8 *wdst = walk->dst.virt.addr;
77
78 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
79
80 /* Process multi-block batch */
81 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
82 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030083 fn(ctx, wdst, wsrc);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020084
85 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
86 wdst += bsize * CAST5_PARALLEL_BLOCKS;
87 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
88 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
89
90 if (nbytes < bsize)
91 goto done;
92 }
93
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030094 fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
95
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020096 /* Handle leftovers */
97 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +030098 fn(ctx, wdst, wsrc);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +020099
100 wsrc += bsize;
101 wdst += bsize;
102 nbytes -= bsize;
103 } while (nbytes >= bsize);
104
105done:
106 err = blkcipher_walk_done(desc, walk, nbytes);
107 }
108
109 cast5_fpu_end(fpu_enabled);
110 return err;
111}
112
113static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
114 struct scatterlist *src, unsigned int nbytes)
115{
116 struct blkcipher_walk walk;
117
118 blkcipher_walk_init(&walk, dst, src, nbytes);
119 return ecb_crypt(desc, &walk, true);
120}
121
122static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
123 struct scatterlist *src, unsigned int nbytes)
124{
125 struct blkcipher_walk walk;
126
127 blkcipher_walk_init(&walk, dst, src, nbytes);
128 return ecb_crypt(desc, &walk, false);
129}
130
131static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
132 struct blkcipher_walk *walk)
133{
134 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
135 const unsigned int bsize = CAST5_BLOCK_SIZE;
136 unsigned int nbytes = walk->nbytes;
137 u64 *src = (u64 *)walk->src.virt.addr;
138 u64 *dst = (u64 *)walk->dst.virt.addr;
139 u64 *iv = (u64 *)walk->iv;
140
141 do {
142 *dst = *src ^ *iv;
143 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
144 iv = dst;
145
146 src += 1;
147 dst += 1;
148 nbytes -= bsize;
149 } while (nbytes >= bsize);
150
Jussi Kivilinna200429c2012-09-19 14:24:57 +0300151 *(u64 *)walk->iv = *iv;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200152 return nbytes;
153}
154
155static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
156 struct scatterlist *src, unsigned int nbytes)
157{
158 struct blkcipher_walk walk;
159 int err;
160
161 blkcipher_walk_init(&walk, dst, src, nbytes);
162 err = blkcipher_walk_virt(desc, &walk);
163
164 while ((nbytes = walk.nbytes)) {
165 nbytes = __cbc_encrypt(desc, &walk);
166 err = blkcipher_walk_done(desc, &walk, nbytes);
167 }
168
169 return err;
170}
171
172static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
173 struct blkcipher_walk *walk)
174{
175 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
176 const unsigned int bsize = CAST5_BLOCK_SIZE;
177 unsigned int nbytes = walk->nbytes;
178 u64 *src = (u64 *)walk->src.virt.addr;
179 u64 *dst = (u64 *)walk->dst.virt.addr;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200180 u64 last_iv;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200181
182 /* Start of the last block. */
183 src += nbytes / bsize - 1;
184 dst += nbytes / bsize - 1;
185
186 last_iv = *src;
187
188 /* Process multi-block batch */
189 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
190 do {
191 nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
192 src -= CAST5_PARALLEL_BLOCKS - 1;
193 dst -= CAST5_PARALLEL_BLOCKS - 1;
194
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300195 cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200196
197 nbytes -= bsize;
198 if (nbytes < bsize)
199 goto done;
200
201 *dst ^= *(src - 1);
202 src -= 1;
203 dst -= 1;
204 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200205 }
206
207 /* Handle leftovers */
208 for (;;) {
209 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
210
211 nbytes -= bsize;
212 if (nbytes < bsize)
213 break;
214
215 *dst ^= *(src - 1);
216 src -= 1;
217 dst -= 1;
218 }
219
220done:
221 *dst ^= *(u64 *)walk->iv;
222 *(u64 *)walk->iv = last_iv;
223
224 return nbytes;
225}
226
227static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
228 struct scatterlist *src, unsigned int nbytes)
229{
230 bool fpu_enabled = false;
231 struct blkcipher_walk walk;
232 int err;
233
234 blkcipher_walk_init(&walk, dst, src, nbytes);
235 err = blkcipher_walk_virt(desc, &walk);
236 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
237
238 while ((nbytes = walk.nbytes)) {
239 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
240 nbytes = __cbc_decrypt(desc, &walk);
241 err = blkcipher_walk_done(desc, &walk, nbytes);
242 }
243
244 cast5_fpu_end(fpu_enabled);
245 return err;
246}
247
248static void ctr_crypt_final(struct blkcipher_desc *desc,
249 struct blkcipher_walk *walk)
250{
251 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
252 u8 *ctrblk = walk->iv;
253 u8 keystream[CAST5_BLOCK_SIZE];
254 u8 *src = walk->src.virt.addr;
255 u8 *dst = walk->dst.virt.addr;
256 unsigned int nbytes = walk->nbytes;
257
258 __cast5_encrypt(ctx, keystream, ctrblk);
259 crypto_xor(keystream, src, nbytes);
260 memcpy(dst, keystream, nbytes);
261
262 crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
263}
264
265static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
266 struct blkcipher_walk *walk)
267{
268 struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
269 const unsigned int bsize = CAST5_BLOCK_SIZE;
270 unsigned int nbytes = walk->nbytes;
271 u64 *src = (u64 *)walk->src.virt.addr;
272 u64 *dst = (u64 *)walk->dst.virt.addr;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200273
274 /* Process multi-block batch */
275 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
276 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300277 cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
278 (__be64 *)walk->iv);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200279
280 src += CAST5_PARALLEL_BLOCKS;
281 dst += CAST5_PARALLEL_BLOCKS;
282 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
283 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
284
285 if (nbytes < bsize)
286 goto done;
287 }
288
289 /* Handle leftovers */
290 do {
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300291 u64 ctrblk;
292
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200293 if (dst != src)
294 *dst = *src;
295
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300296 ctrblk = *(u64 *)walk->iv;
297 be64_add_cpu((__be64 *)walk->iv, 1);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200298
Jussi Kivilinnac12ab202012-10-20 15:06:56 +0300299 __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
300 *dst ^= ctrblk;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200301
302 src += 1;
303 dst += 1;
304 nbytes -= bsize;
305 } while (nbytes >= bsize);
306
307done:
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200308 return nbytes;
309}
310
311static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
312 struct scatterlist *src, unsigned int nbytes)
313{
314 bool fpu_enabled = false;
315 struct blkcipher_walk walk;
316 int err;
317
318 blkcipher_walk_init(&walk, dst, src, nbytes);
319 err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
320 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
321
322 while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
323 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
324 nbytes = __ctr_crypt(desc, &walk);
325 err = blkcipher_walk_done(desc, &walk, nbytes);
326 }
327
328 cast5_fpu_end(fpu_enabled);
329
330 if (walk.nbytes) {
331 ctr_crypt_final(desc, &walk);
332 err = blkcipher_walk_done(desc, &walk, 0);
333 }
334
335 return err;
336}
337
338
339static struct crypto_alg cast5_algs[6] = { {
340 .cra_name = "__ecb-cast5-avx",
341 .cra_driver_name = "__driver-ecb-cast5-avx",
342 .cra_priority = 0,
Stephan Mueller680574e2015-03-30 22:03:57 +0200343 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
344 CRYPTO_ALG_INTERNAL,
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200345 .cra_blocksize = CAST5_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct cast5_ctx),
347 .cra_alignmask = 0,
348 .cra_type = &crypto_blkcipher_type,
349 .cra_module = THIS_MODULE,
350 .cra_u = {
351 .blkcipher = {
352 .min_keysize = CAST5_MIN_KEY_SIZE,
353 .max_keysize = CAST5_MAX_KEY_SIZE,
354 .setkey = cast5_setkey,
355 .encrypt = ecb_encrypt,
356 .decrypt = ecb_decrypt,
357 },
358 },
359}, {
360 .cra_name = "__cbc-cast5-avx",
361 .cra_driver_name = "__driver-cbc-cast5-avx",
362 .cra_priority = 0,
Stephan Mueller680574e2015-03-30 22:03:57 +0200363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
364 CRYPTO_ALG_INTERNAL,
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200365 .cra_blocksize = CAST5_BLOCK_SIZE,
366 .cra_ctxsize = sizeof(struct cast5_ctx),
367 .cra_alignmask = 0,
368 .cra_type = &crypto_blkcipher_type,
369 .cra_module = THIS_MODULE,
370 .cra_u = {
371 .blkcipher = {
372 .min_keysize = CAST5_MIN_KEY_SIZE,
373 .max_keysize = CAST5_MAX_KEY_SIZE,
374 .setkey = cast5_setkey,
375 .encrypt = cbc_encrypt,
376 .decrypt = cbc_decrypt,
377 },
378 },
379}, {
380 .cra_name = "__ctr-cast5-avx",
381 .cra_driver_name = "__driver-ctr-cast5-avx",
382 .cra_priority = 0,
Stephan Mueller680574e2015-03-30 22:03:57 +0200383 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
384 CRYPTO_ALG_INTERNAL,
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200385 .cra_blocksize = 1,
386 .cra_ctxsize = sizeof(struct cast5_ctx),
387 .cra_alignmask = 0,
388 .cra_type = &crypto_blkcipher_type,
389 .cra_module = THIS_MODULE,
390 .cra_u = {
391 .blkcipher = {
392 .min_keysize = CAST5_MIN_KEY_SIZE,
393 .max_keysize = CAST5_MAX_KEY_SIZE,
394 .ivsize = CAST5_BLOCK_SIZE,
395 .setkey = cast5_setkey,
396 .encrypt = ctr_crypt,
397 .decrypt = ctr_crypt,
398 },
399 },
400}, {
401 .cra_name = "ecb(cast5)",
402 .cra_driver_name = "ecb-cast5-avx",
403 .cra_priority = 200,
404 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
405 .cra_blocksize = CAST5_BLOCK_SIZE,
406 .cra_ctxsize = sizeof(struct async_helper_ctx),
407 .cra_alignmask = 0,
408 .cra_type = &crypto_ablkcipher_type,
409 .cra_module = THIS_MODULE,
410 .cra_init = ablk_init,
411 .cra_exit = ablk_exit,
412 .cra_u = {
413 .ablkcipher = {
414 .min_keysize = CAST5_MIN_KEY_SIZE,
415 .max_keysize = CAST5_MAX_KEY_SIZE,
416 .setkey = ablk_set_key,
417 .encrypt = ablk_encrypt,
418 .decrypt = ablk_decrypt,
419 },
420 },
421}, {
422 .cra_name = "cbc(cast5)",
423 .cra_driver_name = "cbc-cast5-avx",
424 .cra_priority = 200,
425 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
426 .cra_blocksize = CAST5_BLOCK_SIZE,
427 .cra_ctxsize = sizeof(struct async_helper_ctx),
428 .cra_alignmask = 0,
429 .cra_type = &crypto_ablkcipher_type,
430 .cra_module = THIS_MODULE,
431 .cra_init = ablk_init,
432 .cra_exit = ablk_exit,
433 .cra_u = {
434 .ablkcipher = {
435 .min_keysize = CAST5_MIN_KEY_SIZE,
436 .max_keysize = CAST5_MAX_KEY_SIZE,
437 .ivsize = CAST5_BLOCK_SIZE,
438 .setkey = ablk_set_key,
439 .encrypt = __ablk_encrypt,
440 .decrypt = ablk_decrypt,
441 },
442 },
443}, {
444 .cra_name = "ctr(cast5)",
445 .cra_driver_name = "ctr-cast5-avx",
446 .cra_priority = 200,
447 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
448 .cra_blocksize = 1,
449 .cra_ctxsize = sizeof(struct async_helper_ctx),
450 .cra_alignmask = 0,
451 .cra_type = &crypto_ablkcipher_type,
452 .cra_module = THIS_MODULE,
453 .cra_init = ablk_init,
454 .cra_exit = ablk_exit,
455 .cra_u = {
456 .ablkcipher = {
457 .min_keysize = CAST5_MIN_KEY_SIZE,
458 .max_keysize = CAST5_MAX_KEY_SIZE,
459 .ivsize = CAST5_BLOCK_SIZE,
460 .setkey = ablk_set_key,
461 .encrypt = ablk_encrypt,
462 .decrypt = ablk_encrypt,
463 .geniv = "chainiv",
464 },
465 },
466} };
467
468static int __init cast5_init(void)
469{
Ingo Molnard5d34d92015-04-28 10:11:24 +0200470 const char *feature_name;
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200471
Dave Hansend91cab72015-09-02 16:31:26 -0700472 if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
473 &feature_name)) {
Ingo Molnard5d34d92015-04-28 10:11:24 +0200474 pr_info("CPU feature '%s' is not supported.\n", feature_name);
Johannes Goetzfried4d6d6a22012-07-11 19:37:37 +0200475 return -ENODEV;
476 }
477
478 return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
479}
480
481static void __exit cast5_exit(void)
482{
483 crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
484}
485
486module_init(cast5_init);
487module_exit(cast5_exit);
488
489MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
490MODULE_LICENSE("GPL");
Kees Cook5d26a102014-11-20 17:05:53 -0800491MODULE_ALIAS_CRYPTO("cast5");