blob: 780b4936f78393b4db8e290bcd1eb7b19b0d7e0d [file] [log] [blame]
Jason Cooper449bb812014-03-24 01:48:58 +00001/***********************************************************************
2**
3** Implementation of the Skein block functions.
4**
5** Source code author: Doug Whiting, 2008.
6**
7** This algorithm and source code is released to the public domain.
8**
9** Compile-time switches:
10**
11** SKEIN_USE_ASM -- set bits (256/512/1024) to select which
12** versions use ASM code for block processing
13** [default: use C for all block sizes]
14**
15************************************************************************/
16
Jason Cooperc2c74262014-03-24 02:32:49 +000017#include <linux/string.h>
Jason Cooper449bb812014-03-24 01:48:58 +000018#include <skein.h>
19
20#ifndef SKEIN_USE_ASM
Jason Cooper60eb8172014-03-24 01:49:11 +000021#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
Jason Cooper449bb812014-03-24 01:48:58 +000022#endif
23
24#ifndef SKEIN_LOOP
Jason Cooper60eb8172014-03-24 01:49:11 +000025#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
Jason Cooper449bb812014-03-24 01:48:58 +000026#endif
27
Jason Cooper60eb8172014-03-24 01:49:11 +000028#define BLK_BITS (WCNT*64) /* some useful definitions for code here */
Jason Cooper449bb812014-03-24 01:48:58 +000029#define KW_TWK_BASE (0)
30#define KW_KEY_BASE (3)
Jason Cooper06a620f2014-03-24 01:49:10 +000031#define ks (kw + KW_KEY_BASE)
Jason Cooper449bb812014-03-24 01:48:58 +000032#define ts (kw + KW_TWK_BASE)
33
34#ifdef SKEIN_DEBUG
35#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
36#else
37#define DebugSaveTweak(ctx)
38#endif
39
40/***************************** Skein_256 ******************************/
41#if !(SKEIN_USE_ASM & 256)
Jason Cooper60eb8172014-03-24 01:49:11 +000042void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr,
43 size_t blkCnt, size_t byteCntAdd)
Jason Cooper39bd42b2014-03-24 01:49:09 +000044 { /* do it in C */
45 enum {
46 WCNT = SKEIN_256_STATE_WORDS
47 };
Jason Cooper449bb812014-03-24 01:48:58 +000048#undef RCNT
49#define RCNT (SKEIN_256_ROUNDS_TOTAL/8)
50
Jason Cooper60eb8172014-03-24 01:49:11 +000051#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
Jason Cooper449bb812014-03-24 01:48:58 +000052#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10)
53#else
54#define SKEIN_UNROLL_256 (0)
55#endif
56
57#if SKEIN_UNROLL_256
58#if (RCNT % SKEIN_UNROLL_256)
Jason Cooper60eb8172014-03-24 01:49:11 +000059#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
Jason Cooper449bb812014-03-24 01:48:58 +000060#endif
Jason Cooper39bd42b2014-03-24 01:49:09 +000061 size_t r;
Jason Cooper60eb8172014-03-24 01:49:11 +000062 u64 kw[WCNT+4+RCNT*2]; /* key schedule: chaining vars + tweak + "rot"*/
Jason Cooper449bb812014-03-24 01:48:58 +000063#else
Jason Cooper60eb8172014-03-24 01:49:11 +000064 u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
Jason Cooper449bb812014-03-24 01:48:58 +000065#endif
Jason Cooper60eb8172014-03-24 01:49:11 +000066 u64 X0, X1, X2, X3; /* local copy of context vars, for speed */
67 u64 w[WCNT]; /* local copy of input block */
Jason Cooper449bb812014-03-24 01:48:58 +000068#ifdef SKEIN_DEBUG
Jason Cooper60eb8172014-03-24 01:49:11 +000069 const u64 *Xptr[4]; /* use for debugging (help cc put Xn in regs) */
Jason Cooper39bd42b2014-03-24 01:49:09 +000070 Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
Jason Cooper449bb812014-03-24 01:48:58 +000071#endif
Jason Cooper60eb8172014-03-24 01:49:11 +000072 Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
Jason Cooper39bd42b2014-03-24 01:49:09 +000073 ts[0] = ctx->h.T[0];
74 ts[1] = ctx->h.T[1];
75 do {
Jason Cooper60eb8172014-03-24 01:49:11 +000076 /*
77 * this implementation only supports 2**64 input bytes
78 * (no carry out here)
79 */
80 ts[0] += byteCntAdd; /* update processed length */
Jason Cooper449bb812014-03-24 01:48:58 +000081
Jason Cooper39bd42b2014-03-24 01:49:09 +000082 /* precompute the key schedule for this block */
Jason Cooper06a620f2014-03-24 01:49:10 +000083 ks[0] = ctx->X[0];
Jason Cooper39bd42b2014-03-24 01:49:09 +000084 ks[1] = ctx->X[1];
85 ks[2] = ctx->X[2];
86 ks[3] = ctx->X[3];
87 ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
Jason Cooper449bb812014-03-24 01:48:58 +000088
Jason Cooper39bd42b2014-03-24 01:49:09 +000089 ts[2] = ts[0] ^ ts[1];
Jason Cooper449bb812014-03-24 01:48:58 +000090
Jason Cooper60eb8172014-03-24 01:49:11 +000091 /* get input block in little-endian format */
92 Skein_Get64_LSB_First(w, blkPtr, WCNT);
Jason Cooper39bd42b2014-03-24 01:49:09 +000093 DebugSaveTweak(ctx);
94 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
Jason Cooper449bb812014-03-24 01:48:58 +000095
Jason Cooper60eb8172014-03-24 01:49:11 +000096 X0 = w[0] + ks[0]; /* do the first full key injection */
Jason Cooper39bd42b2014-03-24 01:49:09 +000097 X1 = w[1] + ks[1] + ts[0];
98 X2 = w[2] + ks[2] + ts[1];
99 X3 = w[3] + ks[3];
Jason Cooper449bb812014-03-24 01:48:58 +0000100
Jason Cooper60eb8172014-03-24 01:49:11 +0000101 /* show starting state values */
102 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
103 Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000104
Jason Cooper39bd42b2014-03-24 01:49:09 +0000105 blkPtr += SKEIN_256_BLOCK_BYTES;
Jason Cooper449bb812014-03-24 01:48:58 +0000106
Jason Cooper39bd42b2014-03-24 01:49:09 +0000107 /* run the rounds */
Jason Cooper449bb812014-03-24 01:48:58 +0000108
Jason Coopera0d5dd82014-03-24 01:49:07 +0000109#define Round256(p0, p1, p2, p3, ROT, rNum) \
Jason Cooper39bd42b2014-03-24 01:49:09 +0000110 X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
111 X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
Jason Cooper449bb812014-03-24 01:48:58 +0000112
Jason Cooper06a620f2014-03-24 01:49:10 +0000113#if SKEIN_UNROLL_256 == 0
Jason Cooper60eb8172014-03-24 01:49:11 +0000114#define R256(p0, p1, p2, p3, ROT, rNum) /* fully unrolled */ \
115 Round256(p0, p1, p2, p3, ROT, rNum) \
Jason Cooper39bd42b2014-03-24 01:49:09 +0000116 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000117
Jason Cooper60eb8172014-03-24 01:49:11 +0000118#define I256(R) \
119 /* inject the key schedule value */ \
120 X0 += ks[((R)+1) % 5]; \
121 X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \
122 X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \
123 X3 += ks[((R)+4) % 5] + (R)+1; \
Jason Cooper39bd42b2014-03-24 01:49:09 +0000124 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
Jason Cooper60eb8172014-03-24 01:49:11 +0000125#else /* looping version */
126#define R256(p0, p1, p2, p3, ROT, rNum) \
127 Round256(p0, p1, p2, p3, ROT, rNum) \
Jason Cooper39bd42b2014-03-24 01:49:09 +0000128 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000129
Jason Cooper60eb8172014-03-24 01:49:11 +0000130#define I256(R) \
131 /* inject the key schedule value */ \
132 X0 += ks[r+(R)+0]; \
133 X1 += ks[r+(R)+1] + ts[r+(R)+0]; \
134 X2 += ks[r+(R)+2] + ts[r+(R)+1]; \
135 X3 += ks[r+(R)+3] + r+(R); \
136 /* rotate key schedule */ \
137 ks[r + (R) + 4] = ks[r + (R) - 1]; \
138 ts[r + (R) + 2] = ts[r + (R) - 1]; \
Jason Cooper39bd42b2014-03-24 01:49:09 +0000139 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000140
Jason Cooper60eb8172014-03-24 01:49:11 +0000141 for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256)
Jason Cooper06a620f2014-03-24 01:49:10 +0000142#endif
143 {
Jason Cooper449bb812014-03-24 01:48:58 +0000144#define R256_8_rounds(R) \
Jason Cooper39bd42b2014-03-24 01:49:09 +0000145 R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \
146 R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \
147 R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \
148 R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \
149 I256(2 * (R)); \
150 R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \
151 R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \
152 R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \
153 R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \
154 I256(2 * (R) + 1);
Jason Cooper449bb812014-03-24 01:48:58 +0000155
Jason Cooper39bd42b2014-03-24 01:49:09 +0000156 R256_8_rounds(0);
Jason Cooper449bb812014-03-24 01:48:58 +0000157
Jason Cooper60eb8172014-03-24 01:49:11 +0000158#define R256_Unroll_R(NN) \
159 ((SKEIN_UNROLL_256 == 0 && \
160 SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || \
161 (SKEIN_UNROLL_256 > (NN)))
Jason Cooper449bb812014-03-24 01:48:58 +0000162
Jason Cooper39bd42b2014-03-24 01:49:09 +0000163 #if R256_Unroll_R(1)
164 R256_8_rounds(1);
165 #endif
166 #if R256_Unroll_R(2)
167 R256_8_rounds(2);
168 #endif
169 #if R256_Unroll_R(3)
170 R256_8_rounds(3);
171 #endif
172 #if R256_Unroll_R(4)
173 R256_8_rounds(4);
174 #endif
175 #if R256_Unroll_R(5)
176 R256_8_rounds(5);
177 #endif
178 #if R256_Unroll_R(6)
179 R256_8_rounds(6);
180 #endif
181 #if R256_Unroll_R(7)
182 R256_8_rounds(7);
183 #endif
184 #if R256_Unroll_R(8)
185 R256_8_rounds(8);
186 #endif
187 #if R256_Unroll_R(9)
188 R256_8_rounds(9);
189 #endif
190 #if R256_Unroll_R(10)
191 R256_8_rounds(10);
192 #endif
193 #if R256_Unroll_R(11)
194 R256_8_rounds(11);
195 #endif
196 #if R256_Unroll_R(12)
197 R256_8_rounds(12);
198 #endif
199 #if R256_Unroll_R(13)
200 R256_8_rounds(13);
201 #endif
202 #if R256_Unroll_R(14)
203 R256_8_rounds(14);
204 #endif
205 #if (SKEIN_UNROLL_256 > 14)
Jason Cooper449bb812014-03-24 01:48:58 +0000206#error "need more unrolling in Skein_256_Process_Block"
Jason Cooper39bd42b2014-03-24 01:49:09 +0000207 #endif
208 }
Jason Cooper60eb8172014-03-24 01:49:11 +0000209 /* do the final "feedforward" xor, update context chaining */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000210 ctx->X[0] = X0 ^ w[0];
211 ctx->X[1] = X1 ^ w[1];
212 ctx->X[2] = X2 ^ w[2];
213 ctx->X[3] = X3 ^ w[3];
Jason Cooper449bb812014-03-24 01:48:58 +0000214
Jason Cooper39bd42b2014-03-24 01:49:09 +0000215 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
Jason Cooper449bb812014-03-24 01:48:58 +0000216
Jason Cooper39bd42b2014-03-24 01:49:09 +0000217 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
218 }
219 while (--blkCnt);
220 ctx->h.T[0] = ts[0];
221 ctx->h.T[1] = ts[1];
222}
Jason Cooper449bb812014-03-24 01:48:58 +0000223
224#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
225size_t Skein_256_Process_Block_CodeSize(void)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000226{
227 return ((u8 *) Skein_256_Process_Block_CodeSize) -
228 ((u8 *) Skein_256_Process_Block);
229}
Jason Cooper2ab31bb2014-03-24 01:49:03 +0000230unsigned int Skein_256_Unroll_Cnt(void)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000231{
232 return SKEIN_UNROLL_256;
233}
Jason Cooper449bb812014-03-24 01:48:58 +0000234#endif
235#endif
236
237/***************************** Skein_512 ******************************/
238#if !(SKEIN_USE_ASM & 512)
Jason Cooper60eb8172014-03-24 01:49:11 +0000239void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr,
240 size_t blkCnt, size_t byteCntAdd)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000241{ /* do it in C */
242 enum {
243 WCNT = SKEIN_512_STATE_WORDS
244 };
Jason Cooper449bb812014-03-24 01:48:58 +0000245#undef RCNT
246#define RCNT (SKEIN_512_ROUNDS_TOTAL/8)
247
Jason Cooper60eb8172014-03-24 01:49:11 +0000248#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
Jason Cooper449bb812014-03-24 01:48:58 +0000249#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
250#else
251#define SKEIN_UNROLL_512 (0)
252#endif
253
254#if SKEIN_UNROLL_512
255#if (RCNT % SKEIN_UNROLL_512)
Jason Cooper60eb8172014-03-24 01:49:11 +0000256#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
Jason Cooper449bb812014-03-24 01:48:58 +0000257#endif
Jason Cooper39bd42b2014-03-24 01:49:09 +0000258 size_t r;
Jason Cooper60eb8172014-03-24 01:49:11 +0000259 u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot"*/
Jason Cooper449bb812014-03-24 01:48:58 +0000260#else
Jason Cooper60eb8172014-03-24 01:49:11 +0000261 u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
Jason Cooper449bb812014-03-24 01:48:58 +0000262#endif
Jason Cooper60eb8172014-03-24 01:49:11 +0000263 u64 X0, X1, X2, X3, X4, X5, X6, X7; /* local copies, for speed */
264 u64 w[WCNT]; /* local copy of input block */
Jason Cooper449bb812014-03-24 01:48:58 +0000265#ifdef SKEIN_DEBUG
Jason Cooper60eb8172014-03-24 01:49:11 +0000266 const u64 *Xptr[8]; /* use for debugging (help cc put Xn in regs) */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000267 Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
268 Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7;
Jason Cooper449bb812014-03-24 01:48:58 +0000269#endif
270
Jason Cooper60eb8172014-03-24 01:49:11 +0000271 Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000272 ts[0] = ctx->h.T[0];
273 ts[1] = ctx->h.T[1];
274 do {
Jason Cooper60eb8172014-03-24 01:49:11 +0000275 /*
276 * this implementation only supports 2**64 input bytes
277 * (no carry out here)
278 */
279 ts[0] += byteCntAdd; /* update processed length */
Jason Cooper449bb812014-03-24 01:48:58 +0000280
Jason Cooper39bd42b2014-03-24 01:49:09 +0000281 /* precompute the key schedule for this block */
282 ks[0] = ctx->X[0];
283 ks[1] = ctx->X[1];
284 ks[2] = ctx->X[2];
285 ks[3] = ctx->X[3];
286 ks[4] = ctx->X[4];
287 ks[5] = ctx->X[5];
288 ks[6] = ctx->X[6];
289 ks[7] = ctx->X[7];
Jason Cooper06a620f2014-03-24 01:49:10 +0000290 ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
Jason Cooper39bd42b2014-03-24 01:49:09 +0000291 ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
Jason Cooper449bb812014-03-24 01:48:58 +0000292
Jason Cooper39bd42b2014-03-24 01:49:09 +0000293 ts[2] = ts[0] ^ ts[1];
Jason Cooper449bb812014-03-24 01:48:58 +0000294
Jason Cooper60eb8172014-03-24 01:49:11 +0000295 /* get input block in little-endian format */
296 Skein_Get64_LSB_First(w, blkPtr, WCNT);
Jason Cooper39bd42b2014-03-24 01:49:09 +0000297 DebugSaveTweak(ctx);
298 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
Jason Cooper449bb812014-03-24 01:48:58 +0000299
Jason Cooper60eb8172014-03-24 01:49:11 +0000300 X0 = w[0] + ks[0]; /* do the first full key injection */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000301 X1 = w[1] + ks[1];
302 X2 = w[2] + ks[2];
303 X3 = w[3] + ks[3];
304 X4 = w[4] + ks[4];
305 X5 = w[5] + ks[5] + ts[0];
306 X6 = w[6] + ks[6] + ts[1];
307 X7 = w[7] + ks[7];
Jason Cooper449bb812014-03-24 01:48:58 +0000308
Jason Cooper39bd42b2014-03-24 01:49:09 +0000309 blkPtr += SKEIN_512_BLOCK_BYTES;
Jason Cooper449bb812014-03-24 01:48:58 +0000310
Jason Cooper60eb8172014-03-24 01:49:11 +0000311 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
312 Xptr);
Jason Cooper39bd42b2014-03-24 01:49:09 +0000313 /* run the rounds */
Jason Cooper60eb8172014-03-24 01:49:11 +0000314#define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
315 X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
316 X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
317 X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4; \
318 X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6; \
Jason Cooper449bb812014-03-24 01:48:58 +0000319
Jason Cooper06a620f2014-03-24 01:49:10 +0000320#if SKEIN_UNROLL_512 == 0
Jason Cooper60eb8172014-03-24 01:49:11 +0000321#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */ \
322 Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
323 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000324
Jason Cooper60eb8172014-03-24 01:49:11 +0000325#define I512(R) \
326 /* inject the key schedule value */ \
327 X0 += ks[((R) + 1) % 9]; \
328 X1 += ks[((R) + 2) % 9]; \
329 X2 += ks[((R) + 3) % 9]; \
330 X3 += ks[((R) + 4) % 9]; \
331 X4 += ks[((R) + 5) % 9]; \
332 X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \
333 X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \
334 X7 += ks[((R) + 8) % 9] + (R) + 1; \
335 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
336#else /* looping version */
337#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
338 Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
339 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000340
Jason Cooper60eb8172014-03-24 01:49:11 +0000341#define I512(R) \
342 /* inject the key schedule value */ \
343 X0 += ks[r + (R) + 0]; \
344 X1 += ks[r + (R) + 1]; \
345 X2 += ks[r + (R) + 2]; \
346 X3 += ks[r + (R) + 3]; \
347 X4 += ks[r + (R) + 4]; \
348 X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \
349 X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \
350 X7 += ks[r + (R) + 7] + r + (R); \
351 /* rotate key schedule */ \
352 ks[r + (R) + 8] = ks[r + (R) - 1]; \
353 ts[r + (R) + 2] = ts[r + (R) - 1]; \
354 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000355
Jason Cooper60eb8172014-03-24 01:49:11 +0000356 for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512)
357#endif /* end of looped code definitions */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000358 {
Jason Cooper449bb812014-03-24 01:48:58 +0000359#define R512_8_rounds(R) /* do 8 full rounds */ \
Jason Cooper60eb8172014-03-24 01:49:11 +0000360 R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \
361 R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \
362 R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \
363 R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \
364 I512(2 * (R)); \
365 R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \
366 R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \
367 R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \
368 R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \
369 I512(2 * (R) + 1); /* and key injection */
Jason Cooper449bb812014-03-24 01:48:58 +0000370
Jason Cooper39bd42b2014-03-24 01:49:09 +0000371 R512_8_rounds(0);
Jason Cooper449bb812014-03-24 01:48:58 +0000372
Jason Cooper60eb8172014-03-24 01:49:11 +0000373#define R512_Unroll_R(NN) \
374 ((SKEIN_UNROLL_512 == 0 && \
375 SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || \
376 (SKEIN_UNROLL_512 > (NN)))
Jason Cooper449bb812014-03-24 01:48:58 +0000377
Jason Cooper39bd42b2014-03-24 01:49:09 +0000378 #if R512_Unroll_R(1)
379 R512_8_rounds(1);
380 #endif
381 #if R512_Unroll_R(2)
382 R512_8_rounds(2);
383 #endif
384 #if R512_Unroll_R(3)
385 R512_8_rounds(3);
386 #endif
387 #if R512_Unroll_R(4)
388 R512_8_rounds(4);
389 #endif
390 #if R512_Unroll_R(5)
391 R512_8_rounds(5);
392 #endif
393 #if R512_Unroll_R(6)
394 R512_8_rounds(6);
395 #endif
396 #if R512_Unroll_R(7)
397 R512_8_rounds(7);
398 #endif
399 #if R512_Unroll_R(8)
400 R512_8_rounds(8);
401 #endif
402 #if R512_Unroll_R(9)
403 R512_8_rounds(9);
404 #endif
405 #if R512_Unroll_R(10)
406 R512_8_rounds(10);
407 #endif
408 #if R512_Unroll_R(11)
409 R512_8_rounds(11);
410 #endif
411 #if R512_Unroll_R(12)
412 R512_8_rounds(12);
413 #endif
414 #if R512_Unroll_R(13)
415 R512_8_rounds(13);
416 #endif
417 #if R512_Unroll_R(14)
418 R512_8_rounds(14);
419 #endif
420 #if (SKEIN_UNROLL_512 > 14)
Jason Cooper449bb812014-03-24 01:48:58 +0000421#error "need more unrolling in Skein_512_Process_Block"
Jason Cooper39bd42b2014-03-24 01:49:09 +0000422 #endif
423 }
Jason Cooper449bb812014-03-24 01:48:58 +0000424
Jason Cooper60eb8172014-03-24 01:49:11 +0000425 /* do the final "feedforward" xor, update context chaining */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000426 ctx->X[0] = X0 ^ w[0];
427 ctx->X[1] = X1 ^ w[1];
428 ctx->X[2] = X2 ^ w[2];
429 ctx->X[3] = X3 ^ w[3];
430 ctx->X[4] = X4 ^ w[4];
431 ctx->X[5] = X5 ^ w[5];
432 ctx->X[6] = X6 ^ w[6];
433 ctx->X[7] = X7 ^ w[7];
434 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
Jason Cooper449bb812014-03-24 01:48:58 +0000435
Jason Cooper39bd42b2014-03-24 01:49:09 +0000436 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
437 }
438 while (--blkCnt);
439 ctx->h.T[0] = ts[0];
440 ctx->h.T[1] = ts[1];
441}
Jason Cooper449bb812014-03-24 01:48:58 +0000442
443#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
444size_t Skein_512_Process_Block_CodeSize(void)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000445{
446 return ((u8 *) Skein_512_Process_Block_CodeSize) -
447 ((u8 *) Skein_512_Process_Block);
448}
Jason Cooper2ab31bb2014-03-24 01:49:03 +0000449unsigned int Skein_512_Unroll_Cnt(void)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000450{
451 return SKEIN_UNROLL_512;
452}
Jason Cooper449bb812014-03-24 01:48:58 +0000453#endif
454#endif
455
456/***************************** Skein1024 ******************************/
457#if !(SKEIN_USE_ASM & 1024)
Jason Cooper60eb8172014-03-24 01:49:11 +0000458void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, \
459 size_t blkCnt, size_t byteCntAdd)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000460{ /* do it in C, always looping (unrolled is bigger AND slower!) */
461 enum {
462 WCNT = SKEIN1024_STATE_WORDS
463 };
Jason Cooper449bb812014-03-24 01:48:58 +0000464#undef RCNT
465#define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
466
Jason Cooper60eb8172014-03-24 01:49:11 +0000467#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
Jason Cooper449bb812014-03-24 01:48:58 +0000468#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
469#else
470#define SKEIN_UNROLL_1024 (0)
471#endif
472
473#if (SKEIN_UNROLL_1024 != 0)
474#if (RCNT % SKEIN_UNROLL_1024)
Jason Cooper60eb8172014-03-24 01:49:11 +0000475#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
Jason Cooper449bb812014-03-24 01:48:58 +0000476#endif
Jason Cooper39bd42b2014-03-24 01:49:09 +0000477 size_t r;
Jason Cooper60eb8172014-03-24 01:49:11 +0000478 u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot" */
Jason Cooper449bb812014-03-24 01:48:58 +0000479#else
Jason Cooper60eb8172014-03-24 01:49:11 +0000480 u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
Jason Cooper449bb812014-03-24 01:48:58 +0000481#endif
482
Jason Cooper60eb8172014-03-24 01:49:11 +0000483 /* local copy of vars, for speed */
484 u64 X00, X01, X02, X03, X04, X05, X06, X07,
485 X08, X09, X10, X11, X12, X13, X14, X15;
486 u64 w[WCNT]; /* local copy of input block */
Jason Cooper449bb812014-03-24 01:48:58 +0000487#ifdef SKEIN_DEBUG
Jason Cooper60eb8172014-03-24 01:49:11 +0000488 const u64 *Xptr[16]; /* use for debugging (help cc put Xn in regs) */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000489 Xptr[0] = &X00; Xptr[1] = &X01; Xptr[2] = &X02; Xptr[3] = &X03;
490 Xptr[4] = &X04; Xptr[5] = &X05; Xptr[6] = &X06; Xptr[7] = &X07;
491 Xptr[8] = &X08; Xptr[9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11;
492 Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15;
Jason Cooper449bb812014-03-24 01:48:58 +0000493#endif
494
Jason Cooper60eb8172014-03-24 01:49:11 +0000495 Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000496 ts[0] = ctx->h.T[0];
497 ts[1] = ctx->h.T[1];
498 do {
Jason Cooper60eb8172014-03-24 01:49:11 +0000499 /*
500 * this implementation only supports 2**64 input bytes
501 * (no carry out here)
502 */
503 ts[0] += byteCntAdd; /* update processed length */
Jason Cooper449bb812014-03-24 01:48:58 +0000504
Jason Cooper39bd42b2014-03-24 01:49:09 +0000505 /* precompute the key schedule for this block */
506 ks[0] = ctx->X[0];
507 ks[1] = ctx->X[1];
508 ks[2] = ctx->X[2];
509 ks[3] = ctx->X[3];
510 ks[4] = ctx->X[4];
511 ks[5] = ctx->X[5];
512 ks[6] = ctx->X[6];
513 ks[7] = ctx->X[7];
514 ks[8] = ctx->X[8];
515 ks[9] = ctx->X[9];
516 ks[10] = ctx->X[10];
517 ks[11] = ctx->X[11];
518 ks[12] = ctx->X[12];
519 ks[13] = ctx->X[13];
520 ks[14] = ctx->X[14];
521 ks[15] = ctx->X[15];
522 ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
523 ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
524 ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
525 ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
Jason Cooper449bb812014-03-24 01:48:58 +0000526
Jason Cooper39bd42b2014-03-24 01:49:09 +0000527 ts[2] = ts[0] ^ ts[1];
Jason Cooper449bb812014-03-24 01:48:58 +0000528
Jason Cooper60eb8172014-03-24 01:49:11 +0000529 /* get input block in little-endian format */
530 Skein_Get64_LSB_First(w, blkPtr, WCNT);
Jason Cooper39bd42b2014-03-24 01:49:09 +0000531 DebugSaveTweak(ctx);
532 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
Jason Cooper449bb812014-03-24 01:48:58 +0000533
Jason Cooper60eb8172014-03-24 01:49:11 +0000534 X00 = w[0] + ks[0]; /* do the first full key injection */
Jason Cooper39bd42b2014-03-24 01:49:09 +0000535 X01 = w[1] + ks[1];
536 X02 = w[2] + ks[2];
537 X03 = w[3] + ks[3];
538 X04 = w[4] + ks[4];
539 X05 = w[5] + ks[5];
540 X06 = w[6] + ks[6];
541 X07 = w[7] + ks[7];
542 X08 = w[8] + ks[8];
543 X09 = w[9] + ks[9];
544 X10 = w[10] + ks[10];
545 X11 = w[11] + ks[11];
546 X12 = w[12] + ks[12];
547 X13 = w[13] + ks[13] + ts[0];
548 X14 = w[14] + ks[14] + ts[1];
549 X15 = w[15] + ks[15];
Jason Cooper449bb812014-03-24 01:48:58 +0000550
Jason Cooper60eb8172014-03-24 01:49:11 +0000551 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
552 Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000553
Jason Cooper60eb8172014-03-24 01:49:11 +0000554#define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \
555 pF, ROT, rNum) \
556 X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
557 X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
558 X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4; \
559 X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6; \
560 X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8; \
561 X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA; \
562 X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC; \
563 X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE; \
Jason Cooper449bb812014-03-24 01:48:58 +0000564
Jason Cooper06a620f2014-03-24 01:49:10 +0000565#if SKEIN_UNROLL_1024 == 0
Jason Cooper60eb8172014-03-24 01:49:11 +0000566#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \
567 ROT, rn) \
568 Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \
569 pF, ROT, rn) \
570 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000571
Jason Cooper60eb8172014-03-24 01:49:11 +0000572#define I1024(R) \
573 /* inject the key schedule value */ \
574 X00 += ks[((R) + 1) % 17]; \
575 X01 += ks[((R) + 2) % 17]; \
576 X02 += ks[((R) + 3) % 17]; \
577 X03 += ks[((R) + 4) % 17]; \
578 X04 += ks[((R) + 5) % 17]; \
579 X05 += ks[((R) + 6) % 17]; \
580 X06 += ks[((R) + 7) % 17]; \
581 X07 += ks[((R) + 8) % 17]; \
582 X08 += ks[((R) + 9) % 17]; \
583 X09 += ks[((R) + 10) % 17]; \
584 X10 += ks[((R) + 11) % 17]; \
585 X11 += ks[((R) + 12) % 17]; \
586 X12 += ks[((R) + 13) % 17]; \
587 X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \
588 X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \
589 X15 += ks[((R) + 16) % 17] + (R) + 1; \
590 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
591#else /* looping version */
592#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \
593 ROT, rn) \
594 Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \
595 pF, ROT, rn) \
596 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000597
Jason Cooper60eb8172014-03-24 01:49:11 +0000598#define I1024(R) \
599 /* inject the key schedule value */ \
600 X00 += ks[r + (R) + 0]; \
601 X01 += ks[r + (R) + 1]; \
602 X02 += ks[r + (R) + 2]; \
603 X03 += ks[r + (R) + 3]; \
604 X04 += ks[r + (R) + 4]; \
605 X05 += ks[r + (R) + 5]; \
606 X06 += ks[r + (R) + 6]; \
607 X07 += ks[r + (R) + 7]; \
608 X08 += ks[r + (R) + 8]; \
609 X09 += ks[r + (R) + 9]; \
610 X10 += ks[r + (R) + 10]; \
611 X11 += ks[r + (R) + 11]; \
612 X12 += ks[r + (R) + 12]; \
613 X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \
614 X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \
615 X15 += ks[r + (R) + 15] + r + (R); \
616 /* rotate key schedule */ \
617 ks[r + (R) + 16] = ks[r + (R) - 1]; \
618 ts[r + (R) + 2] = ts[r + (R) - 1]; \
619 Skein_Show_R_Ptr(BLK_BITSi, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
Jason Cooper449bb812014-03-24 01:48:58 +0000620
Jason Cooper60eb8172014-03-24 01:49:11 +0000621 for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024)
Jason Cooper06a620f2014-03-24 01:49:10 +0000622#endif
Jason Cooper39bd42b2014-03-24 01:49:09 +0000623 {
Jason Cooper60eb8172014-03-24 01:49:11 +0000624#define R1024_8_rounds(R) \
625 R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \
626 R1024_0, 8*(R) + 1); \
627 R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \
628 R1024_1, 8*(R) + 2); \
629 R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \
630 R1024_2, 8*(R) + 3); \
631 R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \
632 R1024_3, 8*(R) + 4); \
633 I1024(2*(R)); \
634 R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \
635 R1024_4, 8*(R) + 5); \
636 R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \
637 R1024_5, 8*(R) + 6); \
638 R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \
639 R1024_6, 8*(R) + 7); \
640 R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \
641 R1024_7, 8*(R) + 8); \
642 I1024(2*(R)+1);
Jason Cooper449bb812014-03-24 01:48:58 +0000643
Jason Cooper39bd42b2014-03-24 01:49:09 +0000644 R1024_8_rounds(0);
Jason Cooper449bb812014-03-24 01:48:58 +0000645
Jason Cooper60eb8172014-03-24 01:49:11 +0000646#define R1024_Unroll_R(NN) \
647 ((SKEIN_UNROLL_1024 == 0 && \
648 SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \
649 (SKEIN_UNROLL_1024 > (NN)))
Jason Cooper449bb812014-03-24 01:48:58 +0000650
Jason Cooper39bd42b2014-03-24 01:49:09 +0000651 #if R1024_Unroll_R(1)
652 R1024_8_rounds(1);
653 #endif
654 #if R1024_Unroll_R(2)
655 R1024_8_rounds(2);
656 #endif
657 #if R1024_Unroll_R(3)
658 R1024_8_rounds(3);
659 #endif
660 #if R1024_Unroll_R(4)
661 R1024_8_rounds(4);
662 #endif
663 #if R1024_Unroll_R(5)
664 R1024_8_rounds(5);
665 #endif
666 #if R1024_Unroll_R(6)
667 R1024_8_rounds(6);
668 #endif
669 #if R1024_Unroll_R(7)
670 R1024_8_rounds(7);
671 #endif
672 #if R1024_Unroll_R(8)
673 R1024_8_rounds(8);
674 #endif
675 #if R1024_Unroll_R(9)
676 R1024_8_rounds(9);
677 #endif
678 #if R1024_Unroll_R(10)
679 R1024_8_rounds(10);
680 #endif
681 #if R1024_Unroll_R(11)
682 R1024_8_rounds(11);
683 #endif
684 #if R1024_Unroll_R(12)
685 R1024_8_rounds(12);
686 #endif
687 #if R1024_Unroll_R(13)
688 R1024_8_rounds(13);
689 #endif
690 #if R1024_Unroll_R(14)
691 R1024_8_rounds(14);
692 #endif
693#if (SKEIN_UNROLL_1024 > 14)
Jason Cooper449bb812014-03-24 01:48:58 +0000694#error "need more unrolling in Skein_1024_Process_Block"
695 #endif
Jason Cooper39bd42b2014-03-24 01:49:09 +0000696 }
Jason Cooper60eb8172014-03-24 01:49:11 +0000697 /* do the final "feedforward" xor, update context chaining */
Jason Cooper449bb812014-03-24 01:48:58 +0000698
Jason Cooper39bd42b2014-03-24 01:49:09 +0000699 ctx->X[0] = X00 ^ w[0];
700 ctx->X[1] = X01 ^ w[1];
701 ctx->X[2] = X02 ^ w[2];
702 ctx->X[3] = X03 ^ w[3];
703 ctx->X[4] = X04 ^ w[4];
704 ctx->X[5] = X05 ^ w[5];
705 ctx->X[6] = X06 ^ w[6];
706 ctx->X[7] = X07 ^ w[7];
707 ctx->X[8] = X08 ^ w[8];
708 ctx->X[9] = X09 ^ w[9];
709 ctx->X[10] = X10 ^ w[10];
710 ctx->X[11] = X11 ^ w[11];
711 ctx->X[12] = X12 ^ w[12];
712 ctx->X[13] = X13 ^ w[13];
713 ctx->X[14] = X14 ^ w[14];
714 ctx->X[15] = X15 ^ w[15];
Jason Cooper449bb812014-03-24 01:48:58 +0000715
Jason Cooper39bd42b2014-03-24 01:49:09 +0000716 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
717
718 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
719 blkPtr += SKEIN1024_BLOCK_BYTES;
720 }
721 while (--blkCnt);
722 ctx->h.T[0] = ts[0];
723 ctx->h.T[1] = ts[1];
724}
Jason Cooper449bb812014-03-24 01:48:58 +0000725
726#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
727size_t Skein1024_Process_Block_CodeSize(void)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000728{
729 return ((u8 *) Skein1024_Process_Block_CodeSize) -
730 ((u8 *) Skein1024_Process_Block);
731}
Jason Cooper2ab31bb2014-03-24 01:49:03 +0000732unsigned int Skein1024_Unroll_Cnt(void)
Jason Cooper39bd42b2014-03-24 01:49:09 +0000733{
734 return SKEIN_UNROLL_1024;
735}
Jason Cooper449bb812014-03-24 01:48:58 +0000736#endif
737#endif