Tan Swee Heng | 2407d60 | 2007-11-23 19:45:00 +0800 | [diff] [blame] | 1 | /* |
| 2 | * Salsa20: Salsa20 stream cipher algorithm |
| 3 | * |
| 4 | * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com> |
| 5 | * |
| 6 | * Derived from: |
| 7 | * - salsa20.c: Public domain C code by Daniel J. Bernstein <djb@cr.yp.to> |
| 8 | * |
| 9 | * Salsa20 is a stream cipher candidate in eSTREAM, the ECRYPT Stream |
| 10 | * Cipher Project. It is designed by Daniel J. Bernstein <djb@cr.yp.to>. |
| 11 | * More information about eSTREAM and Salsa20 can be found here: |
| 12 | * http://www.ecrypt.eu.org/stream/ |
| 13 | * http://cr.yp.to/snuffle.html |
| 14 | * |
| 15 | * This program is free software; you can redistribute it and/or modify it |
| 16 | * under the terms of the GNU General Public License as published by the Free |
| 17 | * Software Foundation; either version 2 of the License, or (at your option) |
| 18 | * any later version. |
| 19 | * |
| 20 | */ |
| 21 | |
| 22 | #include <linux/init.h> |
| 23 | #include <linux/module.h> |
| 24 | #include <linux/errno.h> |
| 25 | #include <linux/crypto.h> |
| 26 | #include <linux/types.h> |
| 27 | #include <crypto/algapi.h> |
| 28 | #include <asm/byteorder.h> |
| 29 | |
| 30 | #define SALSA20_IV_SIZE 8U |
| 31 | #define SALSA20_MIN_KEY_SIZE 16U |
| 32 | #define SALSA20_MAX_KEY_SIZE 32U |
| 33 | |
| 34 | /* |
| 35 | * Start of code taken from D. J. Bernstein's reference implementation. |
| 36 | * With some modifications and optimizations made to suit our needs. |
| 37 | */ |
| 38 | |
| 39 | /* |
| 40 | salsa20-ref.c version 20051118 |
| 41 | D. J. Bernstein |
| 42 | Public domain. |
| 43 | */ |
| 44 | |
| 45 | #define ROTATE(v,n) (((v) << (n)) | ((v) >> (32 - (n)))) |
| 46 | #define XOR(v,w) ((v) ^ (w)) |
| 47 | #define PLUS(v,w) (((v) + (w))) |
| 48 | #define PLUSONE(v) (PLUS((v),1)) |
| 49 | #define U32TO8_LITTLE(p, v) \ |
| 50 | { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ |
| 51 | (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } |
| 52 | #define U8TO32_LITTLE(p) \ |
| 53 | (((u32)((p)[0]) ) | ((u32)((p)[1]) << 8) | \ |
| 54 | ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24) ) |
| 55 | |
| 56 | struct salsa20_ctx |
| 57 | { |
| 58 | u32 input[16]; |
| 59 | }; |
| 60 | |
| 61 | static void salsa20_wordtobyte(u8 output[64], const u32 input[16]) |
| 62 | { |
| 63 | u32 x[16]; |
| 64 | int i; |
| 65 | |
| 66 | memcpy(x, input, sizeof(x)); |
| 67 | for (i = 20; i > 0; i -= 2) { |
| 68 | x[ 4] = XOR(x[ 4],ROTATE(PLUS(x[ 0],x[12]), 7)); |
| 69 | x[ 8] = XOR(x[ 8],ROTATE(PLUS(x[ 4],x[ 0]), 9)); |
| 70 | x[12] = XOR(x[12],ROTATE(PLUS(x[ 8],x[ 4]),13)); |
| 71 | x[ 0] = XOR(x[ 0],ROTATE(PLUS(x[12],x[ 8]),18)); |
| 72 | x[ 9] = XOR(x[ 9],ROTATE(PLUS(x[ 5],x[ 1]), 7)); |
| 73 | x[13] = XOR(x[13],ROTATE(PLUS(x[ 9],x[ 5]), 9)); |
| 74 | x[ 1] = XOR(x[ 1],ROTATE(PLUS(x[13],x[ 9]),13)); |
| 75 | x[ 5] = XOR(x[ 5],ROTATE(PLUS(x[ 1],x[13]),18)); |
| 76 | x[14] = XOR(x[14],ROTATE(PLUS(x[10],x[ 6]), 7)); |
| 77 | x[ 2] = XOR(x[ 2],ROTATE(PLUS(x[14],x[10]), 9)); |
| 78 | x[ 6] = XOR(x[ 6],ROTATE(PLUS(x[ 2],x[14]),13)); |
| 79 | x[10] = XOR(x[10],ROTATE(PLUS(x[ 6],x[ 2]),18)); |
| 80 | x[ 3] = XOR(x[ 3],ROTATE(PLUS(x[15],x[11]), 7)); |
| 81 | x[ 7] = XOR(x[ 7],ROTATE(PLUS(x[ 3],x[15]), 9)); |
| 82 | x[11] = XOR(x[11],ROTATE(PLUS(x[ 7],x[ 3]),13)); |
| 83 | x[15] = XOR(x[15],ROTATE(PLUS(x[11],x[ 7]),18)); |
| 84 | x[ 1] = XOR(x[ 1],ROTATE(PLUS(x[ 0],x[ 3]), 7)); |
| 85 | x[ 2] = XOR(x[ 2],ROTATE(PLUS(x[ 1],x[ 0]), 9)); |
| 86 | x[ 3] = XOR(x[ 3],ROTATE(PLUS(x[ 2],x[ 1]),13)); |
| 87 | x[ 0] = XOR(x[ 0],ROTATE(PLUS(x[ 3],x[ 2]),18)); |
| 88 | x[ 6] = XOR(x[ 6],ROTATE(PLUS(x[ 5],x[ 4]), 7)); |
| 89 | x[ 7] = XOR(x[ 7],ROTATE(PLUS(x[ 6],x[ 5]), 9)); |
| 90 | x[ 4] = XOR(x[ 4],ROTATE(PLUS(x[ 7],x[ 6]),13)); |
| 91 | x[ 5] = XOR(x[ 5],ROTATE(PLUS(x[ 4],x[ 7]),18)); |
| 92 | x[11] = XOR(x[11],ROTATE(PLUS(x[10],x[ 9]), 7)); |
| 93 | x[ 8] = XOR(x[ 8],ROTATE(PLUS(x[11],x[10]), 9)); |
| 94 | x[ 9] = XOR(x[ 9],ROTATE(PLUS(x[ 8],x[11]),13)); |
| 95 | x[10] = XOR(x[10],ROTATE(PLUS(x[ 9],x[ 8]),18)); |
| 96 | x[12] = XOR(x[12],ROTATE(PLUS(x[15],x[14]), 7)); |
| 97 | x[13] = XOR(x[13],ROTATE(PLUS(x[12],x[15]), 9)); |
| 98 | x[14] = XOR(x[14],ROTATE(PLUS(x[13],x[12]),13)); |
| 99 | x[15] = XOR(x[15],ROTATE(PLUS(x[14],x[13]),18)); |
| 100 | } |
| 101 | for (i = 0; i < 16; ++i) |
| 102 | x[i] = PLUS(x[i],input[i]); |
| 103 | for (i = 0; i < 16; ++i) |
| 104 | U32TO8_LITTLE(output + 4 * i,x[i]); |
| 105 | } |
| 106 | |
| 107 | static const char sigma[16] = "expand 32-byte k"; |
| 108 | static const char tau[16] = "expand 16-byte k"; |
| 109 | |
| 110 | static void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, u32 kbytes) |
| 111 | { |
| 112 | const char *constants; |
| 113 | |
| 114 | ctx->input[1] = U8TO32_LITTLE(k + 0); |
| 115 | ctx->input[2] = U8TO32_LITTLE(k + 4); |
| 116 | ctx->input[3] = U8TO32_LITTLE(k + 8); |
| 117 | ctx->input[4] = U8TO32_LITTLE(k + 12); |
| 118 | if (kbytes == 32) { /* recommended */ |
| 119 | k += 16; |
| 120 | constants = sigma; |
| 121 | } else { /* kbytes == 16 */ |
| 122 | constants = tau; |
| 123 | } |
| 124 | ctx->input[11] = U8TO32_LITTLE(k + 0); |
| 125 | ctx->input[12] = U8TO32_LITTLE(k + 4); |
| 126 | ctx->input[13] = U8TO32_LITTLE(k + 8); |
| 127 | ctx->input[14] = U8TO32_LITTLE(k + 12); |
| 128 | ctx->input[0] = U8TO32_LITTLE(constants + 0); |
| 129 | ctx->input[5] = U8TO32_LITTLE(constants + 4); |
| 130 | ctx->input[10] = U8TO32_LITTLE(constants + 8); |
| 131 | ctx->input[15] = U8TO32_LITTLE(constants + 12); |
| 132 | } |
| 133 | |
| 134 | static void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv) |
| 135 | { |
| 136 | ctx->input[6] = U8TO32_LITTLE(iv + 0); |
| 137 | ctx->input[7] = U8TO32_LITTLE(iv + 4); |
| 138 | ctx->input[8] = 0; |
| 139 | ctx->input[9] = 0; |
| 140 | } |
| 141 | |
| 142 | static void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, u8 *dst, |
| 143 | const u8 *src, unsigned int bytes) |
| 144 | { |
| 145 | u8 buf[64]; |
Tan Swee Heng | 2407d60 | 2007-11-23 19:45:00 +0800 | [diff] [blame] | 146 | |
| 147 | if (dst != src) |
| 148 | memcpy(dst, src, bytes); |
| 149 | |
| 150 | while (bytes) { |
| 151 | salsa20_wordtobyte(buf, ctx->input); |
| 152 | |
| 153 | ctx->input[8] = PLUSONE(ctx->input[8]); |
| 154 | if (!ctx->input[8]) |
| 155 | ctx->input[9] = PLUSONE(ctx->input[9]); |
| 156 | |
| 157 | if (bytes <= 64) { |
Tan Swee Heng | eb6f13e | 2007-12-07 16:38:45 +0800 | [diff] [blame] | 158 | crypto_xor(dst, buf, bytes); |
Tan Swee Heng | 2407d60 | 2007-11-23 19:45:00 +0800 | [diff] [blame] | 159 | return; |
| 160 | } |
| 161 | |
Tan Swee Heng | eb6f13e | 2007-12-07 16:38:45 +0800 | [diff] [blame] | 162 | crypto_xor(dst, buf, 64); |
Tan Swee Heng | 2407d60 | 2007-11-23 19:45:00 +0800 | [diff] [blame] | 163 | bytes -= 64; |
| 164 | dst += 64; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | /* |
| 169 | * End of code taken from D. J. Bernstein's reference implementation. |
| 170 | */ |
| 171 | |
| 172 | static int setkey(struct crypto_tfm *tfm, const u8 *key, |
| 173 | unsigned int keysize) |
| 174 | { |
| 175 | struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); |
| 176 | salsa20_keysetup(ctx, key, keysize); |
| 177 | return 0; |
| 178 | } |
| 179 | |
| 180 | static int encrypt(struct blkcipher_desc *desc, |
| 181 | struct scatterlist *dst, struct scatterlist *src, |
| 182 | unsigned int nbytes) |
| 183 | { |
| 184 | struct blkcipher_walk walk; |
| 185 | struct crypto_blkcipher *tfm = desc->tfm; |
| 186 | struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); |
| 187 | int err; |
| 188 | |
| 189 | blkcipher_walk_init(&walk, dst, src, nbytes); |
Tan Swee Heng | eb6f13e | 2007-12-07 16:38:45 +0800 | [diff] [blame] | 190 | err = blkcipher_walk_virt_block(desc, &walk, 64); |
Tan Swee Heng | 2407d60 | 2007-11-23 19:45:00 +0800 | [diff] [blame] | 191 | |
| 192 | salsa20_ivsetup(ctx, walk.iv); |
Tan Swee Heng | 2407d60 | 2007-11-23 19:45:00 +0800 | [diff] [blame] | 193 | |
Tan Swee Heng | eb6f13e | 2007-12-07 16:38:45 +0800 | [diff] [blame] | 194 | if (likely(walk.nbytes == nbytes)) |
| 195 | { |
| 196 | salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, |
| 197 | walk.src.virt.addr, nbytes); |
| 198 | return blkcipher_walk_done(desc, &walk, 0); |
| 199 | } |
| 200 | |
| 201 | while (walk.nbytes >= 64) { |
| 202 | salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, |
| 203 | walk.src.virt.addr, |
| 204 | walk.nbytes - (walk.nbytes % 64)); |
| 205 | err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); |
| 206 | } |
| 207 | |
| 208 | if (walk.nbytes) { |
| 209 | salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, |
| 210 | walk.src.virt.addr, walk.nbytes); |
| 211 | err = blkcipher_walk_done(desc, &walk, 0); |
| 212 | } |
| 213 | |
Tan Swee Heng | 2407d60 | 2007-11-23 19:45:00 +0800 | [diff] [blame] | 214 | return err; |
| 215 | } |
| 216 | |
| 217 | static struct crypto_alg alg = { |
| 218 | .cra_name = "salsa20", |
| 219 | .cra_driver_name = "salsa20-generic", |
| 220 | .cra_priority = 100, |
| 221 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, |
| 222 | .cra_type = &crypto_blkcipher_type, |
| 223 | .cra_blocksize = 1, |
| 224 | .cra_ctxsize = sizeof(struct salsa20_ctx), |
| 225 | .cra_alignmask = 3, |
| 226 | .cra_module = THIS_MODULE, |
| 227 | .cra_list = LIST_HEAD_INIT(alg.cra_list), |
| 228 | .cra_u = { |
| 229 | .blkcipher = { |
| 230 | .setkey = setkey, |
| 231 | .encrypt = encrypt, |
| 232 | .decrypt = encrypt, |
| 233 | .min_keysize = SALSA20_MIN_KEY_SIZE, |
| 234 | .max_keysize = SALSA20_MAX_KEY_SIZE, |
| 235 | .ivsize = SALSA20_IV_SIZE, |
| 236 | } |
| 237 | } |
| 238 | }; |
| 239 | |
| 240 | static int __init init(void) |
| 241 | { |
| 242 | return crypto_register_alg(&alg); |
| 243 | } |
| 244 | |
| 245 | static void __exit fini(void) |
| 246 | { |
| 247 | crypto_unregister_alg(&alg); |
| 248 | } |
| 249 | |
| 250 | module_init(init); |
| 251 | module_exit(fini); |
| 252 | |
| 253 | MODULE_LICENSE("GPL"); |
| 254 | MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm"); |
| 255 | MODULE_ALIAS("salsa20"); |