Rik Snel | c494e07 | 2006-11-29 18:59:44 +1100 | [diff] [blame] | 1 | /* gf128mul.h - GF(2^128) multiplication functions |
| 2 | * |
| 3 | * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. |
| 4 | * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> |
| 5 | * |
| 6 | * Based on Dr Brian Gladman's (GPL'd) work published at |
| 7 | * http://fp.gladman.plus.com/cryptography_technology/index.htm |
| 8 | * See the original copyright notice below. |
| 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify it |
| 11 | * under the terms of the GNU General Public License as published by the Free |
| 12 | * Software Foundation; either version 2 of the License, or (at your option) |
| 13 | * any later version. |
| 14 | */ |
| 15 | /* |
| 16 | --------------------------------------------------------------------------- |
| 17 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. |
| 18 | |
| 19 | LICENSE TERMS |
| 20 | |
| 21 | The free distribution and use of this software in both source and binary |
| 22 | form is allowed (with or without changes) provided that: |
| 23 | |
| 24 | 1. distributions of this source code include the above copyright |
| 25 | notice, this list of conditions and the following disclaimer; |
| 26 | |
| 27 | 2. distributions in binary form include the above copyright |
| 28 | notice, this list of conditions and the following disclaimer |
| 29 | in the documentation and/or other associated materials; |
| 30 | |
| 31 | 3. the copyright holder's name is not used to endorse products |
| 32 | built using this software without specific written permission. |
| 33 | |
| 34 | ALTERNATIVELY, provided that this notice is retained in full, this product |
| 35 | may be distributed under the terms of the GNU General Public License (GPL), |
| 36 | in which case the provisions of the GPL apply INSTEAD OF those given above. |
| 37 | |
| 38 | DISCLAIMER |
| 39 | |
| 40 | This software is provided 'as is' with no explicit or implied warranties |
| 41 | in respect of its properties, including, but not limited to, correctness |
| 42 | and/or fitness for purpose. |
| 43 | --------------------------------------------------------------------------- |
| 44 | Issue Date: 31/01/2006 |
| 45 | |
| 46 | An implementation of field multiplication in Galois Field GF(128) |
| 47 | */ |
| 48 | |
| 49 | #ifndef _CRYPTO_GF128MUL_H |
| 50 | #define _CRYPTO_GF128MUL_H |
| 51 | |
| 52 | #include <crypto/b128ops.h> |
| 53 | #include <linux/slab.h> |
| 54 | |
| 55 | /* Comment by Rik: |
| 56 | * |
| 57 | * For some background on GF(2^128) see for example: http://- |
| 58 | * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf |
| 59 | * |
| 60 | * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can |
| 61 | * be mapped to computer memory in a variety of ways. Let's examine |
| 62 | * three common cases. |
| 63 | * |
| 64 | * Take a look at the 16 binary octets below in memory order. The msb's |
| 65 | * are left and the lsb's are right. char b[16] is an array and b[0] is |
| 66 | * the first octet. |
| 67 | * |
| 68 | * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 |
| 69 | * b[0] b[1] b[2] b[3] b[13] b[14] b[15] |
| 70 | * |
| 71 | * Every bit is a coefficient of some power of X. We can store the bits |
| 72 | * in every byte in little-endian order and the bytes themselves also in |
| 73 | * little endian order. I will call this lle (little-little-endian). |
| 74 | * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks |
| 75 | * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. |
| 76 | * This format was originally implemented in gf128mul and is used |
| 77 | * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). |
| 78 | * |
| 79 | * Another convention says: store the bits in bigendian order and the |
| 80 | * bytes also. This is bbe (big-big-endian). Now the buffer above |
| 81 | * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, |
| 82 | * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe |
| 83 | * is partly implemented. |
| 84 | * |
| 85 | * Both of the above formats are easy to implement on big-endian |
| 86 | * machines. |
| 87 | * |
| 88 | * EME (which is patent encumbered) uses the ble format (bits are stored |
| 89 | * in big endian order and the bytes in little endian). The above buffer |
| 90 | * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. |
| 91 | * |
| 92 | * The common machine word-size is smaller than 128 bits, so to make |
| 93 | * an efficient implementation we must split into machine word sizes. |
| 94 | * This file uses one 32bit for the moment. Machine endianness comes into |
| 95 | * play. The lle format in relation to machine endianness is discussed |
| 96 | * below by the original author of gf128mul Dr Brian Gladman. |
| 97 | * |
| 98 | * Let's look at the bbe and ble format on a little endian machine. |
| 99 | * |
| 100 | * bbe on a little endian machine u32 x[4]: |
| 101 | * |
| 102 | * MS x[0] LS MS x[1] LS |
| 103 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 104 | * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 |
| 105 | * |
| 106 | * MS x[2] LS MS x[3] LS |
| 107 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 108 | * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 |
| 109 | * |
| 110 | * ble on a little endian machine |
| 111 | * |
| 112 | * MS x[0] LS MS x[1] LS |
| 113 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 114 | * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 |
| 115 | * |
| 116 | * MS x[2] LS MS x[3] LS |
| 117 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 118 | * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 |
| 119 | * |
| 120 | * Multiplications in GF(2^128) are mostly bit-shifts, so you see why |
| 121 | * ble (and lbe also) are easier to implement on a little-endian |
| 122 | * machine than on a big-endian machine. The converse holds for bbe |
| 123 | * and lle. |
| 124 | * |
| 125 | * Note: to have good alignment, it seems to me that it is sufficient |
| 126 | * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize |
| 127 | * machines this will automatically aligned to wordsize and on a 64-bit |
| 128 | * machine also. |
| 129 | */ |
| 130 | /* Multiply a GF128 field element by x. Field elements are held in arrays |
| 131 | of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower |
| 132 | indexed bits placed in the more numerically significant bit positions |
| 133 | within bytes. |
| 134 | |
| 135 | On little endian machines the bit indexes translate into the bit |
| 136 | positions within four 32-bit words in the following way |
| 137 | |
| 138 | MS x[0] LS MS x[1] LS |
| 139 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 140 | 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 |
| 141 | |
| 142 | MS x[2] LS MS x[3] LS |
| 143 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 144 | 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 |
| 145 | |
| 146 | On big endian machines the bit indexes translate into the bit |
| 147 | positions within four 32-bit words in the following way |
| 148 | |
| 149 | MS x[0] LS MS x[1] LS |
| 150 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 151 | 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 |
| 152 | |
| 153 | MS x[2] LS MS x[3] LS |
| 154 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
| 155 | 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 |
| 156 | */ |
| 157 | |
| 158 | /* A slow generic version of gf_mul, implemented for lle and bbe |
| 159 | * It multiplies a and b and puts the result in a */ |
| 160 | void gf128mul_lle(be128 *a, const be128 *b); |
| 161 | |
| 162 | void gf128mul_bbe(be128 *a, const be128 *b); |
| 163 | |
Rik Snel | f19f511 | 2007-09-19 20:23:13 +0800 | [diff] [blame] | 164 | /* multiply by x in ble format, needed by XTS */ |
| 165 | void gf128mul_x_ble(be128 *a, const be128 *b); |
Rik Snel | c494e07 | 2006-11-29 18:59:44 +1100 | [diff] [blame] | 166 | |
| 167 | /* 4k table optimization */ |
| 168 | |
| 169 | struct gf128mul_4k { |
| 170 | be128 t[256]; |
| 171 | }; |
| 172 | |
| 173 | struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); |
| 174 | struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); |
| 175 | void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); |
| 176 | void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); |
| 177 | |
| 178 | static inline void gf128mul_free_4k(struct gf128mul_4k *t) |
| 179 | { |
| 180 | kfree(t); |
| 181 | } |
| 182 | |
| 183 | |
| 184 | /* 64k table optimization, implemented for lle and bbe */ |
| 185 | |
| 186 | struct gf128mul_64k { |
| 187 | struct gf128mul_4k *t[16]; |
| 188 | }; |
| 189 | |
| 190 | /* first initialize with the constant factor with which you |
| 191 | * want to multiply and then call gf128_64k_lle with the other |
| 192 | * factor in the first argument, the table in the second and a |
| 193 | * scratch register in the third. Afterwards *a = *r. */ |
| 194 | struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); |
| 195 | struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); |
| 196 | void gf128mul_free_64k(struct gf128mul_64k *t); |
| 197 | void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t); |
| 198 | void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); |
| 199 | |
| 200 | #endif /* _CRYPTO_GF128MUL_H */ |