Blame - crypto/gf128mul.c - kernel/msm-4.19

blob: 24e601954c7a405c12d30fbfdb683430edff1f07 [file] [log] [blame]

Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	1	/* gf128mul.c - GF(2^128) multiplication functions
				2	*
				3	* Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
				4	* Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
				5	*
				6	* Based on Dr Brian Gladman's (GPL'd) work published at
Adrian-Ken Rueegsegger	8c882f6	2009-03-04 14:43:52 +0800	[diff] [blame]	7	* http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	8	* See the original copyright notice below.
				9	*
				10	* This program is free software; you can redistribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the Free
				12	* Software Foundation; either version 2 of the License, or (at your option)
				13	* any later version.
				14	*/
				15
				16	/*
				17	---------------------------------------------------------------------------
				18	Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
				19
				20	LICENSE TERMS
				21
				22	The free distribution and use of this software in both source and binary
				23	form is allowed (with or without changes) provided that:
				24
				25	1. distributions of this source code include the above copyright
				26	notice, this list of conditions and the following disclaimer;
				27
				28	2. distributions in binary form include the above copyright
				29	notice, this list of conditions and the following disclaimer
				30	in the documentation and/or other associated materials;
				31
				32	3. the copyright holder's name is not used to endorse products
				33	built using this software without specific written permission.
				34
				35	ALTERNATIVELY, provided that this notice is retained in full, this product
				36	may be distributed under the terms of the GNU General Public License (GPL),
				37	in which case the provisions of the GPL apply INSTEAD OF those given above.
				38
				39	DISCLAIMER
				40
				41	This software is provided 'as is' with no explicit or implied warranties
				42	in respect of its properties, including, but not limited to, correctness
				43	and/or fitness for purpose.
				44	---------------------------------------------------------------------------
				45	Issue 31/01/2006
				46
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	47	This file provides fast multiplication in GF(2^128) as required by several
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	48	cryptographic authentication modes
				49	*/
				50
				51	#include <crypto/gf128mul.h>
				52	#include <linux/kernel.h>
				53	#include <linux/module.h>
				54	#include <linux/slab.h>
				55
				56	#define gf128mul_dat(q) { \
				57	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
				58	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
				59	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
				60	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
				61	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
				62	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
				63	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
				64	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
				65	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
				66	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
				67	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
				68	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
				69	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
				70	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
				71	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
				72	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
				73	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
				74	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
				75	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
				76	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
				77	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
				78	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
				79	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
				80	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
				81	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
				82	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
				83	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
				84	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
				85	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
				86	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
				87	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
				88	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
				89	}
				90
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	91	/*
				92	* Given a value i in 0..255 as the byte overflow when a field element
				93	* in GF(2^128) is multiplied by x^8, the following macro returns the
				94	* 16-bit value that must be XOR-ed into the low-degree end of the
				95	* product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
				96	*
				97	* There are two versions of the macro, and hence two tables: one for
				98	* the "be" convention where the highest-order bit is the coefficient of
				99	* the highest-degree polynomial term, and one for the "le" convention
				100	* where the highest-order bit is the coefficient of the lowest-degree
				101	* polynomial term. In both cases the values are stored in CPU byte
				102	* endianness such that the coefficients are ordered consistently across
				103	* bytes, i.e. in the "be" table bits 15..0 of the stored value
				104	* correspond to the coefficients of x^15..x^0, and in the "le" table
				105	* bits 15..0 correspond to the coefficients of x^0..x^15.
				106	*
				107	* Therefore, provided that the appropriate byte endianness conversions
				108	* are done by the multiplication functions (and these must be in place
				109	* anyway to support both little endian and big endian CPUs), the "be"
				110	* table can be used for multiplications of both "bbe" and "ble"
				111	* elements, and the "le" table can be used for multiplications of both
				112	* "lle" and "lbe" elements.
				113	*/
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	114
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	115	#define xda_be(i) ( \
Eric Biggers	2416e4f	2017-02-14 13:43:28 -0800	[diff] [blame]	116	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
				117	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
				118	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
				119	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	120	)
				121
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	122	#define xda_le(i) ( \
Eric Biggers	2416e4f	2017-02-14 13:43:28 -0800	[diff] [blame]	123	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
				124	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
				125	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
				126	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	127	)
				128
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	129	static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
				130	static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	131
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	132	/*
Ondrej Mosnáček	acb9b15	2017-04-02 21:19:13 +0200	[diff] [blame]	133	* The following functions multiply a field element by x^8 in
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	134	* the polynomial field representation. They use 64-bit word operations
				135	* to gain speed but compensate for machine endianness and hence work
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	136	* correctly on both styles of machine.
				137	*/
				138
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	139	static void gf128mul_x8_lle(be128 *x)
				140	{
				141	u64 a = be64_to_cpu(x->a);
				142	u64 b = be64_to_cpu(x->b);
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	143	u64 _tt = gf128mul_table_le[b & 0xff];
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	144
				145	x->b = cpu_to_be64((b >> 8) \| (a << 56));
				146	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
				147	}
				148
				149	static void gf128mul_x8_bbe(be128 *x)
				150	{
				151	u64 a = be64_to_cpu(x->a);
				152	u64 b = be64_to_cpu(x->b);
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	153	u64 _tt = gf128mul_table_be[a >> 56];
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	154
				155	x->a = cpu_to_be64((a << 8) \| (b >> 56));
				156	x->b = cpu_to_be64((b << 8) ^ _tt);
				157	}
				158
Harsh Jain	acfc587	2017-10-08 13:37:20 +0530	[diff] [blame]	159	void gf128mul_x8_ble(le128 r, const le128 x)
				160	{
				161	u64 a = le64_to_cpu(x->a);
				162	u64 b = le64_to_cpu(x->b);
				163
				164	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
				165	u64 _tt = gf128mul_table_be[a >> 56];
				166
				167	r->a = cpu_to_le64((a << 8) \| (b >> 56));
				168	r->b = cpu_to_le64((b << 8) ^ _tt);
				169	}
				170	EXPORT_SYMBOL(gf128mul_x8_ble);
				171
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	172	void gf128mul_lle(be128 r, const be128 b)
				173	{
				174	be128 p[8];
				175	int i;
				176
				177	p[0] = *r;
				178	for (i = 0; i < 7; ++i)
				179	gf128mul_x_lle(&p[i + 1], &p[i]);
				180
Mathias Krause	6254266	2011-07-08 17:21:21 +0800	[diff] [blame]	181	memset(r, 0, sizeof(*r));
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	182	for (i = 0;;) {
				183	u8 ch = ((u8 *)b)[15 - i];
				184
				185	if (ch & 0x80)
				186	be128_xor(r, r, &p[0]);
				187	if (ch & 0x40)
				188	be128_xor(r, r, &p[1]);
				189	if (ch & 0x20)
				190	be128_xor(r, r, &p[2]);
				191	if (ch & 0x10)
				192	be128_xor(r, r, &p[3]);
				193	if (ch & 0x08)
				194	be128_xor(r, r, &p[4]);
				195	if (ch & 0x04)
				196	be128_xor(r, r, &p[5]);
				197	if (ch & 0x02)
				198	be128_xor(r, r, &p[6]);
				199	if (ch & 0x01)
				200	be128_xor(r, r, &p[7]);
				201
				202	if (++i >= 16)
				203	break;
				204
				205	gf128mul_x8_lle(r);
				206	}
				207	}
				208	EXPORT_SYMBOL(gf128mul_lle);
				209
				210	void gf128mul_bbe(be128 r, const be128 b)
				211	{
				212	be128 p[8];
				213	int i;
				214
				215	p[0] = *r;
				216	for (i = 0; i < 7; ++i)
				217	gf128mul_x_bbe(&p[i + 1], &p[i]);
				218
Mathias Krause	6254266	2011-07-08 17:21:21 +0800	[diff] [blame]	219	memset(r, 0, sizeof(*r));
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	220	for (i = 0;;) {
				221	u8 ch = ((u8 *)b)[i];
				222
				223	if (ch & 0x80)
				224	be128_xor(r, r, &p[7]);
				225	if (ch & 0x40)
				226	be128_xor(r, r, &p[6]);
				227	if (ch & 0x20)
				228	be128_xor(r, r, &p[5]);
				229	if (ch & 0x10)
				230	be128_xor(r, r, &p[4]);
				231	if (ch & 0x08)
				232	be128_xor(r, r, &p[3]);
				233	if (ch & 0x04)
				234	be128_xor(r, r, &p[2]);
				235	if (ch & 0x02)
				236	be128_xor(r, r, &p[1]);
				237	if (ch & 0x01)
				238	be128_xor(r, r, &p[0]);
				239
				240	if (++i >= 16)
				241	break;
				242
				243	gf128mul_x8_bbe(r);
				244	}
				245	}
				246	EXPORT_SYMBOL(gf128mul_bbe);
				247
				248	/* This version uses 64k bytes of table space.
				249	A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	250	value in GF(2^128). If we consider a GF(2^128) value in
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	251	the buffer's lowest byte, we can construct a table of
				252	the 256 16 byte values that result from the 256 values
				253	of this byte. This requires 4096 bytes. But we also
				254	need tables for each of the 16 higher bytes in the
				255	buffer as well, which makes 64 kbytes in total.
				256	*/
				257	/* additional explanation
				258	* t[0][BYTE] contains g*BYTE
				259	* t[1][BYTE] contains gx^8BYTE
				260	* ..
				261	* t[15][BYTE] contains gx^120BYTE */
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	262	struct gf128mul_64k gf128mul_init_64k_bbe(const be128 g)
				263	{
				264	struct gf128mul_64k *t;
				265	int i, j, k;
				266
				267	t = kzalloc(sizeof(*t), GFP_KERNEL);
				268	if (!t)
				269	goto out;
				270
				271	for (i = 0; i < 16; i++) {
				272	t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
				273	if (!t->t[i]) {
				274	gf128mul_free_64k(t);
				275	t = NULL;
				276	goto out;
				277	}
				278	}
				279
				280	t->t[0]->t[1] = *g;
				281	for (j = 1; j <= 64; j <<= 1)
				282	gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
				283
				284	for (i = 0;;) {
				285	for (j = 2; j < 256; j += j)
				286	for (k = 1; k < j; ++k)
				287	be128_xor(&t->t[i]->t[j + k],
				288	&t->t[i]->t[j], &t->t[i]->t[k]);
				289
				290	if (++i >= 16)
				291	break;
				292
				293	for (j = 128; j > 0; j >>= 1) {
				294	t->t[i]->t[j] = t->t[i - 1]->t[j];
				295	gf128mul_x8_bbe(&t->t[i]->t[j]);
				296	}
				297	}
				298
				299	out:
				300	return t;
				301	}
				302	EXPORT_SYMBOL(gf128mul_init_64k_bbe);
				303
				304	void gf128mul_free_64k(struct gf128mul_64k *t)
				305	{
				306	int i;
				307
				308	for (i = 0; i < 16; i++)
Alex Cope	75aa0a7	2016-11-14 11:02:54 -0800	[diff] [blame]	309	kzfree(t->t[i]);
				310	kzfree(t);
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	311	}
				312	EXPORT_SYMBOL(gf128mul_free_64k);
				313
Eric Biggers	3ea996d	2017-02-14 13:43:30 -0800	[diff] [blame]	314	void gf128mul_64k_bbe(be128 a, const struct gf128mul_64k t)
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	315	{
				316	u8 ap = (u8 )a;
				317	be128 r[1];
				318	int i;
				319
				320	*r = t->t[0]->t[ap[15]];
				321	for (i = 1; i < 16; ++i)
				322	be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
				323	a = r;
				324	}
				325	EXPORT_SYMBOL(gf128mul_64k_bbe);
				326
				327	/* This version uses 4k bytes of table space.
				328	A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	329	value in GF(2^128). If we consider a GF(2^128) value in a
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	330	single byte, we can construct a table of the 256 16 byte
				331	values that result from the 256 values of this byte.
				332	This requires 4096 bytes. If we take the highest byte in
				333	the buffer and use this table to get the result, we then
				334	have to multiply by x^120 to get the final value. For the
				335	next highest byte the result has to be multiplied by x^112
				336	and so on. But we can do this by accumulating the result
				337	in an accumulator starting with the result for the top
				338	byte. We repeatedly multiply the accumulator value by
				339	x^8 and then add in (i.e. xor) the 16 bytes of the next
				340	lower byte in the buffer, stopping when we reach the
				341	lowest byte. This requires a 4096 byte table.
				342	*/
				343	struct gf128mul_4k gf128mul_init_4k_lle(const be128 g)
				344	{
				345	struct gf128mul_4k *t;
				346	int j, k;
				347
				348	t = kzalloc(sizeof(*t), GFP_KERNEL);
				349	if (!t)
				350	goto out;
				351
				352	t->t[128] = *g;
				353	for (j = 64; j > 0; j >>= 1)
				354	gf128mul_x_lle(&t->t[j], &t->t[j+j]);
				355
				356	for (j = 2; j < 256; j += j)
				357	for (k = 1; k < j; ++k)
				358	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
				359
				360	out:
				361	return t;
				362	}
				363	EXPORT_SYMBOL(gf128mul_init_4k_lle);
				364
				365	struct gf128mul_4k gf128mul_init_4k_bbe(const be128 g)
				366	{
				367	struct gf128mul_4k *t;
				368	int j, k;
				369
				370	t = kzalloc(sizeof(*t), GFP_KERNEL);
				371	if (!t)
				372	goto out;
				373
				374	t->t[1] = *g;
				375	for (j = 1; j <= 64; j <<= 1)
				376	gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
				377
				378	for (j = 2; j < 256; j += j)
				379	for (k = 1; k < j; ++k)
				380	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
				381
				382	out:
				383	return t;
				384	}
				385	EXPORT_SYMBOL(gf128mul_init_4k_bbe);
				386
Eric Biggers	3ea996d	2017-02-14 13:43:30 -0800	[diff] [blame]	387	void gf128mul_4k_lle(be128 a, const struct gf128mul_4k t)
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	388	{
				389	u8 ap = (u8 )a;
				390	be128 r[1];
				391	int i = 15;
				392
				393	*r = t->t[ap[15]];
				394	while (i--) {
				395	gf128mul_x8_lle(r);
				396	be128_xor(r, r, &t->t[ap[i]]);
				397	}
				398	a = r;
				399	}
				400	EXPORT_SYMBOL(gf128mul_4k_lle);
				401
Eric Biggers	3ea996d	2017-02-14 13:43:30 -0800	[diff] [blame]	402	void gf128mul_4k_bbe(be128 a, const struct gf128mul_4k t)
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	403	{
				404	u8 ap = (u8 )a;
				405	be128 r[1];
				406	int i = 0;
				407
				408	*r = t->t[ap[0]];
				409	while (++i < 16) {
				410	gf128mul_x8_bbe(r);
				411	be128_xor(r, r, &t->t[ap[i]]);
				412	}
				413	a = r;
				414	}
				415	EXPORT_SYMBOL(gf128mul_4k_bbe);
				416
				417	MODULE_LICENSE("GPL");
				418	MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");