blob: 5987046685859829899895a4718a267c6b4ba616 [file] [log] [blame]
Christian Heimes4a0270d2012-10-06 02:23:36 +02001/*
2The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3Michaƫl Peeters and Gilles Van Assche. For more information, feedback or
4questions, please refer to our website: http://keccak.noekeon.org/
5
6Implementation by the designers,
7hereby denoted as "the implementer".
8
9To the extent possible under law, the implementer has waived all copyright
10and related or neighboring rights to the source code in this file.
11http://creativecommons.org/publicdomain/zero/1.0/
12*/
13
14#include <string.h>
Christian Heimesdbc573f2012-10-07 22:44:41 +020015#include "brg_endian.h"
Christian Heimes4a0270d2012-10-06 02:23:36 +020016#include "KeccakF-1600-opt64-settings.h"
17#include "KeccakF-1600-interface.h"
18
19typedef unsigned char UINT8;
20/* typedef unsigned long long int UINT64; */
21
22#if defined(__GNUC__)
23#define ALIGN __attribute__ ((aligned(32)))
24#elif defined(_MSC_VER)
25#define ALIGN __declspec(align(32))
26#else
27#define ALIGN
28#endif
29
30#if defined(UseSSE)
31 #include <x86intrin.h>
32 typedef __m128i V64;
33 typedef __m128i V128;
34 typedef union {
35 V128 v128;
36 UINT64 v64[2];
37 } V6464;
38
39 #define ANDnu64(a, b) _mm_andnot_si128(a, b)
40 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
41 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
42 #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
43 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
44 #define XOR64(a, b) _mm_xor_si128(a, b)
45 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
46 #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
47
48 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
49 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
50 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
51 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
52 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
53 #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
54 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
55 #define XOR128(a, b) _mm_xor_si128(a, b)
56 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
57 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
58 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
59 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
60 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
61 #define ZERO128() _mm_setzero_si128()
62
63 #ifdef UseOnlySIMD64
64 #include "KeccakF-1600-simd64.macros"
65 #else
66ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
67 #include "KeccakF-1600-simd128.macros"
68 #endif
69
70 #ifdef UseBebigokimisa
71 #error "UseBebigokimisa cannot be used in combination with UseSSE"
72 #endif
73#elif defined(UseXOP)
74 #include <x86intrin.h>
75 typedef __m128i V64;
76 typedef __m128i V128;
77
78 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
79 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
80 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
81 #define XOR64(a, b) _mm_xor_si128(a, b)
82 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
83
84 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
85 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
86 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
87 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
88 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
89 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
90 #define XOR128(a, b) _mm_xor_si128(a, b)
91 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
92 #define ZERO128() _mm_setzero_si128()
93
94 #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
95 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
96 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
97 #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
98 #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
99 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
100 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
101
102 #define ROL6464same(a, o) _mm_roti_epi64(a, o)
103 #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
104ALIGN const UINT64 rot_0_20[2] = { 0, 20};
105ALIGN const UINT64 rot_44_3[2] = {44, 3};
106ALIGN const UINT64 rot_43_45[2] = {43, 45};
107ALIGN const UINT64 rot_21_61[2] = {21, 61};
108ALIGN const UINT64 rot_14_28[2] = {14, 28};
109ALIGN const UINT64 rot_1_36[2] = { 1, 36};
110ALIGN const UINT64 rot_6_10[2] = { 6, 10};
111ALIGN const UINT64 rot_25_15[2] = {25, 15};
112ALIGN const UINT64 rot_8_56[2] = { 8, 56};
113ALIGN const UINT64 rot_18_27[2] = {18, 27};
114ALIGN const UINT64 rot_62_55[2] = {62, 55};
115ALIGN const UINT64 rot_39_41[2] = {39, 41};
116
117#if defined(UseSimulatedXOP)
118 /* For debugging purposes, when XOP is not available */
119 #undef ROL6464
120 #undef ROL6464same
121 #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
122 V128 ROL6464(V128 a, int r0, int r1)
123 {
124 V128 a0 = ROL64(a, r0);
125 V128 a1 = COPY64HI2LO(ROL64(a, r1));
126 return GET64LOLO(a0, a1);
127 }
128#endif
129
130 #include "KeccakF-1600-xop.macros"
131
132 #ifdef UseBebigokimisa
133 #error "UseBebigokimisa cannot be used in combination with UseXOP"
134 #endif
135#elif defined(UseMMX)
136 #include <mmintrin.h>
137 typedef __m64 V64;
138 #define ANDnu64(a, b) _mm_andnot_si64(a, b)
139
140 #if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
141 #define LOAD64(a) *(V64*)&(a)
142 #define CONST64(a) *(V64*)&(a)
143 #define STORE64(a, b) *(V64*)&(a) = b
144 #else
145 #define LOAD64(a) (V64)a
146 #define CONST64(a) (V64)a
147 #define STORE64(a, b) a = (UINT64)b
148 #endif
149 #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
150 #define XOR64(a, b) _mm_xor_si64(a, b)
151 #define XOReq64(a, b) a = _mm_xor_si64(a, b)
152
153 #include "KeccakF-1600-simd64.macros"
154
155 #ifdef UseBebigokimisa
156 #error "UseBebigokimisa cannot be used in combination with UseMMX"
157 #endif
158#else
159 #if defined(_MSC_VER)
160 #define ROL64(a, offset) _rotl64(a, offset)
161 #elif defined(UseSHLD)
162 #define ROL64(x,N) ({ \
163 register UINT64 __out; \
164 register UINT64 __in = x; \
165 __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
166 __out; \
167 })
168 #else
169 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
170 #endif
171
172 #include "KeccakF-1600-64.macros"
173#endif
174
175#include "KeccakF-1600-unrolling.macros"
176
177static void KeccakPermutationOnWords(UINT64 *state)
178{
179 declareABCDE
180#if (Unrolling != 24)
181 unsigned int i;
182#endif
183
184 copyFromState(A, state)
185 rounds
186#if defined(UseMMX)
187 _mm_empty();
188#endif
189}
190
191static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
192{
193 declareABCDE
194#if (Unrolling != 24)
195 unsigned int i;
196#endif
197 unsigned int j;
198
199 for(j=0; j<laneCount; j++)
200 state[j] ^= input[j];
201 copyFromState(A, state)
202 rounds
203#if defined(UseMMX)
204 _mm_empty();
205#endif
206}
207
208#ifdef ProvideFast576
209static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
210{
211 declareABCDE
212#if (Unrolling != 24)
213 unsigned int i;
214#endif
215
216 copyFromStateAndXor576bits(A, state, input)
217 rounds
218#if defined(UseMMX)
219 _mm_empty();
220#endif
221}
222#endif
223
224#ifdef ProvideFast832
225static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
226{
227 declareABCDE
228#if (Unrolling != 24)
229 unsigned int i;
230#endif
231
232 copyFromStateAndXor832bits(A, state, input)
233 rounds
234#if defined(UseMMX)
235 _mm_empty();
236#endif
237}
238#endif
239
240#ifdef ProvideFast1024
241static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
242{
243 declareABCDE
244#if (Unrolling != 24)
245 unsigned int i;
246#endif
247
248 copyFromStateAndXor1024bits(A, state, input)
249 rounds
250#if defined(UseMMX)
251 _mm_empty();
252#endif
253}
254#endif
255
256#ifdef ProvideFast1088
257static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
258{
259 declareABCDE
260#if (Unrolling != 24)
261 unsigned int i;
262#endif
263
264 copyFromStateAndXor1088bits(A, state, input)
265 rounds
266#if defined(UseMMX)
267 _mm_empty();
268#endif
269}
270#endif
271
272#ifdef ProvideFast1152
273static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
274{
275 declareABCDE
276#if (Unrolling != 24)
277 unsigned int i;
278#endif
279
280 copyFromStateAndXor1152bits(A, state, input)
281 rounds
282#if defined(UseMMX)
283 _mm_empty();
284#endif
285}
286#endif
287
288#ifdef ProvideFast1344
289static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
290{
291 declareABCDE
292#if (Unrolling != 24)
293 unsigned int i;
294#endif
295
296 copyFromStateAndXor1344bits(A, state, input)
297 rounds
298#if defined(UseMMX)
299 _mm_empty();
300#endif
301}
302#endif
303
304static void KeccakInitialize()
305{
306}
307
308static void KeccakInitializeState(unsigned char *state)
309{
310 memset(state, 0, 200);
311#ifdef UseBebigokimisa
312 ((UINT64*)state)[ 1] = ~(UINT64)0;
313 ((UINT64*)state)[ 2] = ~(UINT64)0;
314 ((UINT64*)state)[ 8] = ~(UINT64)0;
315 ((UINT64*)state)[12] = ~(UINT64)0;
316 ((UINT64*)state)[17] = ~(UINT64)0;
317 ((UINT64*)state)[20] = ~(UINT64)0;
318#endif
319}
320
321static void KeccakPermutation(unsigned char *state)
322{
323 /* We assume the state is always stored as words */
324 KeccakPermutationOnWords((UINT64*)state);
325}
326
327/*
328static void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
329{
330 unsigned int i;
331
332 *word = 0;
333 for(i=0; i<(64/8); i++)
334 *word |= (UINT64)(bytes[i]) << (8*i);
335}
336*/
337
338#ifdef ProvideFast576
339static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
340{
341#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
342 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
343#else
344 UINT64 dataAsWords[9];
345 unsigned int i;
346
347 for(i=0; i<9; i++)
348 fromBytesToWord(dataAsWords+i, data+(i*8));
349 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
350#endif
351}
352#endif
353
354#ifdef ProvideFast832
355static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
356{
357#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
358 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
359#else
360 UINT64 dataAsWords[13];
361 unsigned int i;
362
363 for(i=0; i<13; i++)
364 fromBytesToWord(dataAsWords+i, data+(i*8));
365 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
366#endif
367}
368#endif
369
370#ifdef ProvideFast1024
371static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
372{
373#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
374 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
375#else
376 UINT64 dataAsWords[16];
377 unsigned int i;
378
379 for(i=0; i<16; i++)
380 fromBytesToWord(dataAsWords+i, data+(i*8));
381 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
382#endif
383}
384#endif
385
386#ifdef ProvideFast1088
387static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
388{
389#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
390 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
391#else
392 UINT64 dataAsWords[17];
393 unsigned int i;
394
395 for(i=0; i<17; i++)
396 fromBytesToWord(dataAsWords+i, data+(i*8));
397 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
398#endif
399}
400#endif
401
402#ifdef ProvideFast1152
403static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
404{
405#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
406 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
407#else
408 UINT64 dataAsWords[18];
409 unsigned int i;
410
411 for(i=0; i<18; i++)
412 fromBytesToWord(dataAsWords+i, data+(i*8));
413 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
414#endif
415}
416#endif
417
418#ifdef ProvideFast1344
419static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
420{
421#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
422 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
423#else
424 UINT64 dataAsWords[21];
425 unsigned int i;
426
427 for(i=0; i<21; i++)
428 fromBytesToWord(dataAsWords+i, data+(i*8));
429 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
430#endif
431}
432#endif
433
434static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
435{
436#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
437 KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
438#else
439 UINT64 dataAsWords[25];
440 unsigned int i;
441
442 for(i=0; i<laneCount; i++)
443 fromBytesToWord(dataAsWords+i, data+(i*8));
444 KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
445#endif
446}
447
448/*
449static void fromWordToBytes(UINT8 *bytes, const UINT64 word)
450{
451 unsigned int i;
452
453 for(i=0; i<(64/8); i++)
454 bytes[i] = (word >> (8*i)) & 0xFF;
455}
456*/
457
458#ifdef ProvideFast1024
459static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
460{
461#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
462 memcpy(data, state, 128);
463#else
464 unsigned int i;
465
466 for(i=0; i<16; i++)
467 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
468#endif
469#ifdef UseBebigokimisa
470 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
471 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
472 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
473 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
474#endif
475}
476#endif
477
478static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
479{
480#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
481 memcpy(data, state, laneCount*8);
482#else
483 unsigned int i;
484
485 for(i=0; i<laneCount; i++)
486 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
487#endif
488#ifdef UseBebigokimisa
489 if (laneCount > 1) {
490 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
491 if (laneCount > 2) {
492 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
493 if (laneCount > 8) {
494 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
495 if (laneCount > 12) {
496 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
497 if (laneCount > 17) {
498 ((UINT64*)data)[17] = ~((UINT64*)data)[17];
499 if (laneCount > 20) {
500 ((UINT64*)data)[20] = ~((UINT64*)data)[20];
501 }
502 }
503 }
504 }
505 }
506 }
507#endif
508}