blob: f19b18b36ac4fe356ed0609590c3be60a1d546a3 [file] [log] [blame]
Christian Heimes4a0270d2012-10-06 02:23:36 +02001/*
2The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3Michaƫl Peeters and Gilles Van Assche. For more information, feedback or
4questions, please refer to our website: http://keccak.noekeon.org/
5
6Implementation by the designers,
7hereby denoted as "the implementer".
8
9To the extent possible under law, the implementer has waived all copyright
10and related or neighboring rights to the source code in this file.
11http://creativecommons.org/publicdomain/zero/1.0/
12*/
13
14#include <string.h>
Christian Heimes743e0cd2012-10-17 23:52:17 +020015/* #include "brg_endian.h" */
Christian Heimes4a0270d2012-10-06 02:23:36 +020016#include "KeccakF-1600-opt64-settings.h"
17#include "KeccakF-1600-interface.h"
18
19typedef unsigned char UINT8;
20/* typedef unsigned long long int UINT64; */
21
22#if defined(__GNUC__)
23#define ALIGN __attribute__ ((aligned(32)))
24#elif defined(_MSC_VER)
25#define ALIGN __declspec(align(32))
26#else
27#define ALIGN
28#endif
29
30#if defined(UseSSE)
31 #include <x86intrin.h>
32 typedef __m128i V64;
33 typedef __m128i V128;
34 typedef union {
35 V128 v128;
36 UINT64 v64[2];
37 } V6464;
38
39 #define ANDnu64(a, b) _mm_andnot_si128(a, b)
40 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
41 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
42 #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
43 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
44 #define XOR64(a, b) _mm_xor_si128(a, b)
45 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
46 #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
47
48 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
49 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
50 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
51 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
52 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
53 #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
54 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
55 #define XOR128(a, b) _mm_xor_si128(a, b)
56 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
57 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
58 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
59 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
60 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
61 #define ZERO128() _mm_setzero_si128()
62
63 #ifdef UseOnlySIMD64
64 #include "KeccakF-1600-simd64.macros"
65 #else
66ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
67 #include "KeccakF-1600-simd128.macros"
68 #endif
69
70 #ifdef UseBebigokimisa
71 #error "UseBebigokimisa cannot be used in combination with UseSSE"
72 #endif
73#elif defined(UseXOP)
74 #include <x86intrin.h>
75 typedef __m128i V64;
76 typedef __m128i V128;
77
78 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
79 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
80 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
81 #define XOR64(a, b) _mm_xor_si128(a, b)
82 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
83
84 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
85 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
86 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
87 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
88 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
89 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
90 #define XOR128(a, b) _mm_xor_si128(a, b)
91 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
92 #define ZERO128() _mm_setzero_si128()
93
94 #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
95 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
96 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
97 #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
98 #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
99 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
100 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
101
102 #define ROL6464same(a, o) _mm_roti_epi64(a, o)
103 #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
104ALIGN const UINT64 rot_0_20[2] = { 0, 20};
105ALIGN const UINT64 rot_44_3[2] = {44, 3};
106ALIGN const UINT64 rot_43_45[2] = {43, 45};
107ALIGN const UINT64 rot_21_61[2] = {21, 61};
108ALIGN const UINT64 rot_14_28[2] = {14, 28};
109ALIGN const UINT64 rot_1_36[2] = { 1, 36};
110ALIGN const UINT64 rot_6_10[2] = { 6, 10};
111ALIGN const UINT64 rot_25_15[2] = {25, 15};
112ALIGN const UINT64 rot_8_56[2] = { 8, 56};
113ALIGN const UINT64 rot_18_27[2] = {18, 27};
114ALIGN const UINT64 rot_62_55[2] = {62, 55};
115ALIGN const UINT64 rot_39_41[2] = {39, 41};
116
117#if defined(UseSimulatedXOP)
118 /* For debugging purposes, when XOP is not available */
119 #undef ROL6464
120 #undef ROL6464same
121 #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
122 V128 ROL6464(V128 a, int r0, int r1)
123 {
124 V128 a0 = ROL64(a, r0);
125 V128 a1 = COPY64HI2LO(ROL64(a, r1));
126 return GET64LOLO(a0, a1);
127 }
128#endif
129
130 #include "KeccakF-1600-xop.macros"
131
132 #ifdef UseBebigokimisa
133 #error "UseBebigokimisa cannot be used in combination with UseXOP"
134 #endif
135#elif defined(UseMMX)
136 #include <mmintrin.h>
137 typedef __m64 V64;
138 #define ANDnu64(a, b) _mm_andnot_si64(a, b)
139
140 #if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
141 #define LOAD64(a) *(V64*)&(a)
142 #define CONST64(a) *(V64*)&(a)
143 #define STORE64(a, b) *(V64*)&(a) = b
144 #else
145 #define LOAD64(a) (V64)a
146 #define CONST64(a) (V64)a
147 #define STORE64(a, b) a = (UINT64)b
148 #endif
149 #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
150 #define XOR64(a, b) _mm_xor_si64(a, b)
151 #define XOReq64(a, b) a = _mm_xor_si64(a, b)
152
153 #include "KeccakF-1600-simd64.macros"
154
155 #ifdef UseBebigokimisa
156 #error "UseBebigokimisa cannot be used in combination with UseMMX"
157 #endif
158#else
159 #if defined(_MSC_VER)
160 #define ROL64(a, offset) _rotl64(a, offset)
161 #elif defined(UseSHLD)
162 #define ROL64(x,N) ({ \
163 register UINT64 __out; \
164 register UINT64 __in = x; \
165 __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
166 __out; \
167 })
168 #else
169 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
170 #endif
171
172 #include "KeccakF-1600-64.macros"
173#endif
174
175#include "KeccakF-1600-unrolling.macros"
176
177static void KeccakPermutationOnWords(UINT64 *state)
178{
179 declareABCDE
180#if (Unrolling != 24)
181 unsigned int i;
182#endif
183
184 copyFromState(A, state)
185 rounds
186#if defined(UseMMX)
187 _mm_empty();
188#endif
189}
190
191static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
192{
193 declareABCDE
194#if (Unrolling != 24)
195 unsigned int i;
196#endif
197 unsigned int j;
198
199 for(j=0; j<laneCount; j++)
200 state[j] ^= input[j];
201 copyFromState(A, state)
202 rounds
203#if defined(UseMMX)
204 _mm_empty();
205#endif
206}
207
208#ifdef ProvideFast576
209static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
210{
211 declareABCDE
212#if (Unrolling != 24)
213 unsigned int i;
214#endif
215
216 copyFromStateAndXor576bits(A, state, input)
217 rounds
218#if defined(UseMMX)
219 _mm_empty();
220#endif
221}
222#endif
223
224#ifdef ProvideFast832
225static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
226{
227 declareABCDE
228#if (Unrolling != 24)
229 unsigned int i;
230#endif
231
232 copyFromStateAndXor832bits(A, state, input)
233 rounds
234#if defined(UseMMX)
235 _mm_empty();
236#endif
237}
238#endif
239
240#ifdef ProvideFast1024
241static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
242{
243 declareABCDE
244#if (Unrolling != 24)
245 unsigned int i;
246#endif
247
248 copyFromStateAndXor1024bits(A, state, input)
249 rounds
250#if defined(UseMMX)
251 _mm_empty();
252#endif
253}
254#endif
255
256#ifdef ProvideFast1088
257static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
258{
259 declareABCDE
260#if (Unrolling != 24)
261 unsigned int i;
262#endif
263
264 copyFromStateAndXor1088bits(A, state, input)
265 rounds
266#if defined(UseMMX)
267 _mm_empty();
268#endif
269}
270#endif
271
272#ifdef ProvideFast1152
273static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
274{
275 declareABCDE
276#if (Unrolling != 24)
277 unsigned int i;
278#endif
279
280 copyFromStateAndXor1152bits(A, state, input)
281 rounds
282#if defined(UseMMX)
283 _mm_empty();
284#endif
285}
286#endif
287
288#ifdef ProvideFast1344
289static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
290{
291 declareABCDE
292#if (Unrolling != 24)
293 unsigned int i;
294#endif
295
296 copyFromStateAndXor1344bits(A, state, input)
297 rounds
298#if defined(UseMMX)
299 _mm_empty();
300#endif
301}
302#endif
303
304static void KeccakInitialize()
305{
306}
307
308static void KeccakInitializeState(unsigned char *state)
309{
310 memset(state, 0, 200);
311#ifdef UseBebigokimisa
312 ((UINT64*)state)[ 1] = ~(UINT64)0;
313 ((UINT64*)state)[ 2] = ~(UINT64)0;
314 ((UINT64*)state)[ 8] = ~(UINT64)0;
315 ((UINT64*)state)[12] = ~(UINT64)0;
316 ((UINT64*)state)[17] = ~(UINT64)0;
317 ((UINT64*)state)[20] = ~(UINT64)0;
318#endif
319}
320
321static void KeccakPermutation(unsigned char *state)
322{
323 /* We assume the state is always stored as words */
324 KeccakPermutationOnWords((UINT64*)state);
325}
326
Christian Heimesba3e4a02012-10-14 00:59:29 +0200327#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
Christian Heimes4a0270d2012-10-06 02:23:36 +0200328static void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
329{
330 unsigned int i;
331
332 *word = 0;
333 for(i=0; i<(64/8); i++)
334 *word |= (UINT64)(bytes[i]) << (8*i);
335}
Christian Heimesba3e4a02012-10-14 00:59:29 +0200336#endif
337
Christian Heimes4a0270d2012-10-06 02:23:36 +0200338
339#ifdef ProvideFast576
340static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
341{
342#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
343 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
344#else
345 UINT64 dataAsWords[9];
346 unsigned int i;
347
348 for(i=0; i<9; i++)
349 fromBytesToWord(dataAsWords+i, data+(i*8));
350 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
351#endif
352}
353#endif
354
355#ifdef ProvideFast832
356static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
357{
358#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
359 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
360#else
361 UINT64 dataAsWords[13];
362 unsigned int i;
363
364 for(i=0; i<13; i++)
365 fromBytesToWord(dataAsWords+i, data+(i*8));
366 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
367#endif
368}
369#endif
370
371#ifdef ProvideFast1024
372static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
373{
374#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
375 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
376#else
377 UINT64 dataAsWords[16];
378 unsigned int i;
379
380 for(i=0; i<16; i++)
381 fromBytesToWord(dataAsWords+i, data+(i*8));
382 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
383#endif
384}
385#endif
386
387#ifdef ProvideFast1088
388static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
389{
390#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
391 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
392#else
393 UINT64 dataAsWords[17];
394 unsigned int i;
395
396 for(i=0; i<17; i++)
397 fromBytesToWord(dataAsWords+i, data+(i*8));
398 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
399#endif
400}
401#endif
402
403#ifdef ProvideFast1152
404static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
405{
406#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
407 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
408#else
409 UINT64 dataAsWords[18];
410 unsigned int i;
411
412 for(i=0; i<18; i++)
413 fromBytesToWord(dataAsWords+i, data+(i*8));
414 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
415#endif
416}
417#endif
418
419#ifdef ProvideFast1344
420static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
421{
422#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
423 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
424#else
425 UINT64 dataAsWords[21];
426 unsigned int i;
427
428 for(i=0; i<21; i++)
429 fromBytesToWord(dataAsWords+i, data+(i*8));
430 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
431#endif
432}
433#endif
434
435static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
436{
437#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
438 KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
439#else
440 UINT64 dataAsWords[25];
441 unsigned int i;
442
443 for(i=0; i<laneCount; i++)
444 fromBytesToWord(dataAsWords+i, data+(i*8));
445 KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
446#endif
447}
448
Christian Heimesba3e4a02012-10-14 00:59:29 +0200449#if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
Christian Heimes4a0270d2012-10-06 02:23:36 +0200450static void fromWordToBytes(UINT8 *bytes, const UINT64 word)
451{
452 unsigned int i;
453
454 for(i=0; i<(64/8); i++)
455 bytes[i] = (word >> (8*i)) & 0xFF;
456}
Christian Heimesba3e4a02012-10-14 00:59:29 +0200457#endif
458
Christian Heimes4a0270d2012-10-06 02:23:36 +0200459
460#ifdef ProvideFast1024
461static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
462{
463#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
464 memcpy(data, state, 128);
465#else
466 unsigned int i;
467
468 for(i=0; i<16; i++)
469 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
470#endif
471#ifdef UseBebigokimisa
472 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
473 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
474 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
475 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
476#endif
477}
478#endif
479
480static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
481{
482#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
483 memcpy(data, state, laneCount*8);
484#else
485 unsigned int i;
486
487 for(i=0; i<laneCount; i++)
488 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
489#endif
490#ifdef UseBebigokimisa
491 if (laneCount > 1) {
492 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
493 if (laneCount > 2) {
494 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
495 if (laneCount > 8) {
496 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
497 if (laneCount > 12) {
498 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
499 if (laneCount > 17) {
500 ((UINT64*)data)[17] = ~((UINT64*)data)[17];
501 if (laneCount > 20) {
502 ((UINT64*)data)[20] = ~((UINT64*)data)[20];
503 }
504 }
505 }
506 }
507 }
508 }
509#endif
510}