Blame - crypto/gf128mul.c - kernel/msm-4.19

blob: dc012129c0631ab11a9d8afa69d8c57b06dc3ead [file] [log] [blame]

Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	1	/* gf128mul.c - GF(2^128) multiplication functions
				2	*
				3	* Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
				4	* Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
				5	*
				6	* Based on Dr Brian Gladman's (GPL'd) work published at
Adrian-Ken Rueegsegger	8c882f6	2009-03-04 14:43:52 +0800	[diff] [blame]	7	* http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	8	* See the original copyright notice below.
				9	*
				10	* This program is free software; you can redistribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the Free
				12	* Software Foundation; either version 2 of the License, or (at your option)
				13	* any later version.
				14	*/
				15
				16	/*
				17	---------------------------------------------------------------------------
				18	Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
				19
				20	LICENSE TERMS
				21
				22	The free distribution and use of this software in both source and binary
				23	form is allowed (with or without changes) provided that:
				24
				25	1. distributions of this source code include the above copyright
				26	notice, this list of conditions and the following disclaimer;
				27
				28	2. distributions in binary form include the above copyright
				29	notice, this list of conditions and the following disclaimer
				30	in the documentation and/or other associated materials;
				31
				32	3. the copyright holder's name is not used to endorse products
				33	built using this software without specific written permission.
				34
				35	ALTERNATIVELY, provided that this notice is retained in full, this product
				36	may be distributed under the terms of the GNU General Public License (GPL),
				37	in which case the provisions of the GPL apply INSTEAD OF those given above.
				38
				39	DISCLAIMER
				40
				41	This software is provided 'as is' with no explicit or implied warranties
				42	in respect of its properties, including, but not limited to, correctness
				43	and/or fitness for purpose.
				44	---------------------------------------------------------------------------
				45	Issue 31/01/2006
				46
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	47	This file provides fast multiplication in GF(2^128) as required by several
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	48	cryptographic authentication modes
				49	*/
				50
				51	#include <crypto/gf128mul.h>
				52	#include <linux/kernel.h>
				53	#include <linux/module.h>
				54	#include <linux/slab.h>
				55
				56	#define gf128mul_dat(q) { \
				57	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
				58	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
				59	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
				60	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
				61	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
				62	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
				63	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
				64	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
				65	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
				66	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
				67	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
				68	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
				69	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
				70	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
				71	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
				72	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
				73	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
				74	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
				75	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
				76	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
				77	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
				78	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
				79	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
				80	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
				81	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
				82	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
				83	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
				84	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
				85	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
				86	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
				87	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
				88	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
				89	}
				90
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	91	/*
				92	* Given a value i in 0..255 as the byte overflow when a field element
				93	* in GF(2^128) is multiplied by x^8, the following macro returns the
				94	* 16-bit value that must be XOR-ed into the low-degree end of the
				95	* product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
				96	*
				97	* There are two versions of the macro, and hence two tables: one for
				98	* the "be" convention where the highest-order bit is the coefficient of
				99	* the highest-degree polynomial term, and one for the "le" convention
				100	* where the highest-order bit is the coefficient of the lowest-degree
				101	* polynomial term. In both cases the values are stored in CPU byte
				102	* endianness such that the coefficients are ordered consistently across
				103	* bytes, i.e. in the "be" table bits 15..0 of the stored value
				104	* correspond to the coefficients of x^15..x^0, and in the "le" table
				105	* bits 15..0 correspond to the coefficients of x^0..x^15.
				106	*
				107	* Therefore, provided that the appropriate byte endianness conversions
				108	* are done by the multiplication functions (and these must be in place
				109	* anyway to support both little endian and big endian CPUs), the "be"
				110	* table can be used for multiplications of both "bbe" and "ble"
				111	* elements, and the "le" table can be used for multiplications of both
				112	* "lle" and "lbe" elements.
				113	*/
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	114
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	115	#define xda_be(i) ( \
Eric Biggers	2416e4f	2017-02-14 13:43:28 -0800	[diff] [blame]	116	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
				117	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
				118	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
				119	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	120	)
				121
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	122	#define xda_le(i) ( \
Eric Biggers	2416e4f	2017-02-14 13:43:28 -0800	[diff] [blame]	123	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
				124	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
				125	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
				126	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	127	)
				128
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	129	static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
				130	static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	131
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	132	/*
Ondrej Mosnáček	acb9b15	2017-04-02 21:19:13 +0200	[diff] [blame]	133	* The following functions multiply a field element by x^8 in
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	134	* the polynomial field representation. They use 64-bit word operations
				135	* to gain speed but compensate for machine endianness and hence work
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	136	* correctly on both styles of machine.
				137	*/
				138
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	139	static void gf128mul_x8_lle(be128 *x)
				140	{
				141	u64 a = be64_to_cpu(x->a);
				142	u64 b = be64_to_cpu(x->b);
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	143	u64 _tt = gf128mul_table_le[b & 0xff];
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	144
				145	x->b = cpu_to_be64((b >> 8) \| (a << 56));
				146	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
				147	}
				148
				149	static void gf128mul_x8_bbe(be128 *x)
				150	{
				151	u64 a = be64_to_cpu(x->a);
				152	u64 b = be64_to_cpu(x->b);
Eric Biggers	f33fd64	2017-02-14 13:43:29 -0800	[diff] [blame]	153	u64 _tt = gf128mul_table_be[a >> 56];
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	154
				155	x->a = cpu_to_be64((a << 8) \| (b >> 56));
				156	x->b = cpu_to_be64((b << 8) ^ _tt);
				157	}
				158
				159	void gf128mul_lle(be128 r, const be128 b)
				160	{
				161	be128 p[8];
				162	int i;
				163
				164	p[0] = *r;
				165	for (i = 0; i < 7; ++i)
				166	gf128mul_x_lle(&p[i + 1], &p[i]);
				167
Mathias Krause	6254266	2011-07-08 17:21:21 +0800	[diff] [blame]	168	memset(r, 0, sizeof(*r));
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	169	for (i = 0;;) {
				170	u8 ch = ((u8 *)b)[15 - i];
				171
				172	if (ch & 0x80)
				173	be128_xor(r, r, &p[0]);
				174	if (ch & 0x40)
				175	be128_xor(r, r, &p[1]);
				176	if (ch & 0x20)
				177	be128_xor(r, r, &p[2]);
				178	if (ch & 0x10)
				179	be128_xor(r, r, &p[3]);
				180	if (ch & 0x08)
				181	be128_xor(r, r, &p[4]);
				182	if (ch & 0x04)
				183	be128_xor(r, r, &p[5]);
				184	if (ch & 0x02)
				185	be128_xor(r, r, &p[6]);
				186	if (ch & 0x01)
				187	be128_xor(r, r, &p[7]);
				188
				189	if (++i >= 16)
				190	break;
				191
				192	gf128mul_x8_lle(r);
				193	}
				194	}
				195	EXPORT_SYMBOL(gf128mul_lle);
				196
				197	void gf128mul_bbe(be128 r, const be128 b)
				198	{
				199	be128 p[8];
				200	int i;
				201
				202	p[0] = *r;
				203	for (i = 0; i < 7; ++i)
				204	gf128mul_x_bbe(&p[i + 1], &p[i]);
				205
Mathias Krause	6254266	2011-07-08 17:21:21 +0800	[diff] [blame]	206	memset(r, 0, sizeof(*r));
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	207	for (i = 0;;) {
				208	u8 ch = ((u8 *)b)[i];
				209
				210	if (ch & 0x80)
				211	be128_xor(r, r, &p[7]);
				212	if (ch & 0x40)
				213	be128_xor(r, r, &p[6]);
				214	if (ch & 0x20)
				215	be128_xor(r, r, &p[5]);
				216	if (ch & 0x10)
				217	be128_xor(r, r, &p[4]);
				218	if (ch & 0x08)
				219	be128_xor(r, r, &p[3]);
				220	if (ch & 0x04)
				221	be128_xor(r, r, &p[2]);
				222	if (ch & 0x02)
				223	be128_xor(r, r, &p[1]);
				224	if (ch & 0x01)
				225	be128_xor(r, r, &p[0]);
				226
				227	if (++i >= 16)
				228	break;
				229
				230	gf128mul_x8_bbe(r);
				231	}
				232	}
				233	EXPORT_SYMBOL(gf128mul_bbe);
				234
				235	/* This version uses 64k bytes of table space.
				236	A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	237	value in GF(2^128). If we consider a GF(2^128) value in
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	238	the buffer's lowest byte, we can construct a table of
				239	the 256 16 byte values that result from the 256 values
				240	of this byte. This requires 4096 bytes. But we also
				241	need tables for each of the 16 higher bytes in the
				242	buffer as well, which makes 64 kbytes in total.
				243	*/
				244	/* additional explanation
				245	* t[0][BYTE] contains g*BYTE
				246	* t[1][BYTE] contains gx^8BYTE
				247	* ..
				248	* t[15][BYTE] contains gx^120BYTE */
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	249	struct gf128mul_64k gf128mul_init_64k_bbe(const be128 g)
				250	{
				251	struct gf128mul_64k *t;
				252	int i, j, k;
				253
				254	t = kzalloc(sizeof(*t), GFP_KERNEL);
				255	if (!t)
				256	goto out;
				257
				258	for (i = 0; i < 16; i++) {
				259	t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
				260	if (!t->t[i]) {
				261	gf128mul_free_64k(t);
				262	t = NULL;
				263	goto out;
				264	}
				265	}
				266
				267	t->t[0]->t[1] = *g;
				268	for (j = 1; j <= 64; j <<= 1)
				269	gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
				270
				271	for (i = 0;;) {
				272	for (j = 2; j < 256; j += j)
				273	for (k = 1; k < j; ++k)
				274	be128_xor(&t->t[i]->t[j + k],
				275	&t->t[i]->t[j], &t->t[i]->t[k]);
				276
				277	if (++i >= 16)
				278	break;
				279
				280	for (j = 128; j > 0; j >>= 1) {
				281	t->t[i]->t[j] = t->t[i - 1]->t[j];
				282	gf128mul_x8_bbe(&t->t[i]->t[j]);
				283	}
				284	}
				285
				286	out:
				287	return t;
				288	}
				289	EXPORT_SYMBOL(gf128mul_init_64k_bbe);
				290
				291	void gf128mul_free_64k(struct gf128mul_64k *t)
				292	{
				293	int i;
				294
				295	for (i = 0; i < 16; i++)
Alex Cope	75aa0a7	2016-11-14 11:02:54 -0800	[diff] [blame]	296	kzfree(t->t[i]);
				297	kzfree(t);
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	298	}
				299	EXPORT_SYMBOL(gf128mul_free_64k);
				300
Eric Biggers	3ea996d	2017-02-14 13:43:30 -0800	[diff] [blame]	301	void gf128mul_64k_bbe(be128 a, const struct gf128mul_64k t)
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	302	{
				303	u8 ap = (u8 )a;
				304	be128 r[1];
				305	int i;
				306
				307	*r = t->t[0]->t[ap[15]];
				308	for (i = 1; i < 16; ++i)
				309	be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
				310	a = r;
				311	}
				312	EXPORT_SYMBOL(gf128mul_64k_bbe);
				313
				314	/* This version uses 4k bytes of table space.
				315	A 16 byte buffer has to be multiplied by a 16 byte key
Eric Biggers	63be5b5	2017-02-14 13:43:27 -0800	[diff] [blame]	316	value in GF(2^128). If we consider a GF(2^128) value in a
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	317	single byte, we can construct a table of the 256 16 byte
				318	values that result from the 256 values of this byte.
				319	This requires 4096 bytes. If we take the highest byte in
				320	the buffer and use this table to get the result, we then
				321	have to multiply by x^120 to get the final value. For the
				322	next highest byte the result has to be multiplied by x^112
				323	and so on. But we can do this by accumulating the result
				324	in an accumulator starting with the result for the top
				325	byte. We repeatedly multiply the accumulator value by
				326	x^8 and then add in (i.e. xor) the 16 bytes of the next
				327	lower byte in the buffer, stopping when we reach the
				328	lowest byte. This requires a 4096 byte table.
				329	*/
				330	struct gf128mul_4k gf128mul_init_4k_lle(const be128 g)
				331	{
				332	struct gf128mul_4k *t;
				333	int j, k;
				334
				335	t = kzalloc(sizeof(*t), GFP_KERNEL);
				336	if (!t)
				337	goto out;
				338
				339	t->t[128] = *g;
				340	for (j = 64; j > 0; j >>= 1)
				341	gf128mul_x_lle(&t->t[j], &t->t[j+j]);
				342
				343	for (j = 2; j < 256; j += j)
				344	for (k = 1; k < j; ++k)
				345	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
				346
				347	out:
				348	return t;
				349	}
				350	EXPORT_SYMBOL(gf128mul_init_4k_lle);
				351
				352	struct gf128mul_4k gf128mul_init_4k_bbe(const be128 g)
				353	{
				354	struct gf128mul_4k *t;
				355	int j, k;
				356
				357	t = kzalloc(sizeof(*t), GFP_KERNEL);
				358	if (!t)
				359	goto out;
				360
				361	t->t[1] = *g;
				362	for (j = 1; j <= 64; j <<= 1)
				363	gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
				364
				365	for (j = 2; j < 256; j += j)
				366	for (k = 1; k < j; ++k)
				367	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
				368
				369	out:
				370	return t;
				371	}
				372	EXPORT_SYMBOL(gf128mul_init_4k_bbe);
				373
Eric Biggers	3ea996d	2017-02-14 13:43:30 -0800	[diff] [blame]	374	void gf128mul_4k_lle(be128 a, const struct gf128mul_4k t)
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	375	{
				376	u8 ap = (u8 )a;
				377	be128 r[1];
				378	int i = 15;
				379
				380	*r = t->t[ap[15]];
				381	while (i--) {
				382	gf128mul_x8_lle(r);
				383	be128_xor(r, r, &t->t[ap[i]]);
				384	}
				385	a = r;
				386	}
				387	EXPORT_SYMBOL(gf128mul_4k_lle);
				388
Eric Biggers	3ea996d	2017-02-14 13:43:30 -0800	[diff] [blame]	389	void gf128mul_4k_bbe(be128 a, const struct gf128mul_4k t)
Rik Snel	c494e07	2006-11-29 18:59:44 +1100	[diff] [blame]	390	{
				391	u8 ap = (u8 )a;
				392	be128 r[1];
				393	int i = 0;
				394
				395	*r = t->t[ap[0]];
				396	while (++i < 16) {
				397	gf128mul_x8_bbe(r);
				398	be128_xor(r, r, &t->t[ap[i]]);
				399	}
				400	a = r;
				401	}
				402	EXPORT_SYMBOL(gf128mul_4k_bbe);
				403
				404	MODULE_LICENSE("GPL");
				405	MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");