blob: 823c946fff4efc5645c9ecb9bc46381652dc3d85 [file] [log] [blame]
Christian Heimes4a0270d2012-10-06 02:23:36 +02001/*
2The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3Michaƫl Peeters and Gilles Van Assche. For more information, feedback or
4questions, please refer to our website: http://keccak.noekeon.org/
5
6Implementation by the designers,
7hereby denoted as "the implementer".
8
9To the extent possible under law, the implementer has waived all copyright
10and related or neighboring rights to the source code in this file.
11http://creativecommons.org/publicdomain/zero/1.0/
12*/
13
14#define declareABCDE \
15 V128 Abage, Abegi, Abigo, Abogu, Abuga; \
16 V128 Akame, Akemi, Akimo, Akomu, Akuma; \
17 V128 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio; \
18 V64 Aba, Abe, Abi, Abo, Abu; \
19 V64 Aga, Age, Agi, Ago, Agu; \
20 V64 Aka, Ake, Aki, Ako, Aku; \
21 V64 Ama, Ame, Ami, Amo, Amu; \
22 V128 Asase, Asiso; \
23 V64 Asu; \
24 V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
25 V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
26 V128 Bsase, Bsesi, Bsiso, Bsosu, Bsusa; \
27 V128 Cae, Cei, Cio, Cou, Cua; \
28 V128 Dau, Dea, Die, Doi, Duo; \
29 V128 Dua, Dae, Dei, Dio, Dou; \
30 V128 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
31 V128 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
32 V128 Esase, Esiso; \
33 V64 Esu; \
34 V128 Zero;
35
36#define prepareTheta
37
38#define computeD \
39 Cua = GET64LOLO(Cua, Cae); \
40 Dei = XOR128(Cae, ROL6464same(Cio, 1)); \
41 Dou = XOR128(Cio, ROL6464same(Cua, 1)); \
42 Cei = GET64HILO(Cae, Cio); \
43 Dae = XOR128(Cua, ROL6464same(Cei, 1)); \
44 Dau = GET64LOHI(Dae, Dou); \
45 Dea = SWAP64(Dae); \
46 Die = SWAP64(Dei); \
47 Doi = GET64LOLO(Dou, Die); \
48 Duo = SWAP64(Dou);
49
50/* --- Theta Rho Pi Chi Iota Prepare-theta */
51/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
52#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
53 computeD \
54 \
55 Bbage = XOR128(GET64LOHI(A##bage, A##bogu), Dau); \
56 Bbage = ROL6464(Bbage, 0, 20); \
57 Bbegi = XOR128(GET64HILO(A##bage, A##kame), Dea); \
58 Bbegi = ROL6464(Bbegi, 44, 3); \
59 Bbigo = XOR128(GET64LOHI(A##kimo, A##kame), Die); \
60 Bbigo = ROL6464(Bbigo, 43, 45); \
61 E##bage = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
62 XOReq128(E##bage, CONST64(KeccakF1600RoundConstants[i])); \
63 Cae = E##bage; \
64 Bbogu = XOR128(GET64HILO(A##kimo, A##siso), Doi); \
65 Bbogu = ROL6464(Bbogu, 21, 61); \
66 E##begi = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
67 Cei = E##begi; \
68 Bbuga = XOR128(GET64LOLO(A##su, A##bogu), Duo); \
69 Bbuga = ROL6464(Bbuga, 14, 28); \
70 E##bigo = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
71 Cio = E##bigo; \
72 E##bogu = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
73 Cou = E##bogu; \
74 E##buga = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
75 Cua = E##buga; \
76\
77 Bkame = XOR128(GET64LOHI(A##begi, A##buga), Dea); \
78 Bkame = ROL6464(Bkame, 1, 36); \
79 Bkemi = XOR128(GET64HILO(A##begi, A##kemi), Die); \
80 Bkemi = ROL6464(Bkemi, 6, 10); \
81 Bkimo = XOR128(GET64LOHI(A##komu, A##kemi), Doi); \
82 Bkimo = ROL6464(Bkimo, 25, 15); \
83 E##kame = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
84 XOReq128(Cae, E##kame); \
85 Bkomu = XOR128(GET64HIHI(A##komu, A##siso), Duo); \
86 Bkomu = ROL6464(Bkomu, 8, 56); \
87 E##kemi = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
88 XOReq128(Cei, E##kemi); \
89 Bkuma = XOR128(GET64LOLO(A##sase, A##buga), Dau); \
90 Bkuma = ROL6464(Bkuma, 18, 27); \
91 E##kimo = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
92 XOReq128(Cio, E##kimo); \
93 E##komu = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
94 XOReq128(Cou, E##komu); \
95 E##kuma = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
96 XOReq128(Cua, E##kuma); \
97\
98 Bsase = XOR128(A##bigo, SWAP64(Doi)); \
99 Bsase = ROL6464(Bsase, 62, 55); \
100 Bsiso = XOR128(A##kuma, SWAP64(Dau)); \
101 Bsiso = ROL6464(Bsiso, 39, 41); \
102 Bsusa = XOR64(COPY64HI2LO(A##sase), Dei); \
103 Bsusa = ROL6464same(Bsusa, 2); \
104 Bsusa = GET64LOLO(Bsusa, Bsase); \
105 Bsesi = GET64HILO(Bsase, Bsiso); \
106 Bsosu = GET64HILO(Bsiso, Bsusa); \
107 E##sase = XOR128(Bsase, ANDnu128(Bsesi, Bsiso)); \
108 XOReq128(Cae, E##sase); \
109 E##siso = XOR128(Bsiso, ANDnu128(Bsosu, Bsusa)); \
110 XOReq128(Cio, E##siso); \
111 E##su = GET64LOLO(XOR128(Bsusa, ANDnu128(Bsase, Bsesi)), Zero); \
112 XOReq128(Cua, E##su); \
113\
114 Zero = ZERO128(); \
115 XOReq128(Cae, GET64HIHI(Cua, Zero)); \
116 XOReq128(Cae, GET64LOLO(Zero, Cei)); \
117 XOReq128(Cio, GET64HIHI(Cei, Zero)); \
118 XOReq128(Cio, GET64LOLO(Zero, Cou)); \
119 XOReq128(Cua, GET64HIHI(Cou, Zero)); \
120
121/* --- Theta Rho Pi Chi Iota */
122/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
123#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
124
125static const UINT64 KeccakF1600RoundConstants[24] = {
126 0x0000000000000001ULL,
127 0x0000000000008082ULL,
128 0x800000000000808aULL,
129 0x8000000080008000ULL,
130 0x000000000000808bULL,
131 0x0000000080000001ULL,
132 0x8000000080008081ULL,
133 0x8000000000008009ULL,
134 0x000000000000008aULL,
135 0x0000000000000088ULL,
136 0x0000000080008009ULL,
137 0x000000008000000aULL,
138 0x000000008000808bULL,
139 0x800000000000008bULL,
140 0x8000000000008089ULL,
141 0x8000000000008003ULL,
142 0x8000000000008002ULL,
143 0x8000000000000080ULL,
144 0x000000000000800aULL,
145 0x800000008000000aULL,
146 0x8000000080008081ULL,
147 0x8000000000008080ULL,
148 0x0000000080000001ULL,
149 0x8000000080008008ULL };
150
151#define copyFromStateAndXor576bits(X, state, input) \
152 X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
153 X##ba = X##bae; \
154 X##be = GET64HIHI(X##bae, X##bae); \
155 Cae = X##bae; \
156 X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
157 X##bi = X##bio; \
158 X##bo = GET64HIHI(X##bio, X##bio); \
159 Cio = X##bio; \
160 X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
161 Cua = X##bu; \
162 X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
163 X##ga = X##gae; \
164 X##buga = GET64LOLO(X##bu, X##ga); \
165 X##ge = GET64HIHI(X##gae, X##gae); \
166 X##bage = GET64LOLO(X##ba, X##ge); \
167 XOReq128(Cae, X##gae); \
168 X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
169 X##gi = X##gio; \
170 X##begi = GET64LOLO(X##be, X##gi); \
171 X##go = GET64HIHI(X##gio, X##gio); \
172 X##bigo = GET64LOLO(X##bi, X##go); \
173 XOReq128(Cio, X##gio); \
174 X##gu = LOAD64(state[ 9]); \
175 X##bogu = GET64LOLO(X##bo, X##gu); \
176 XOReq64(Cua, X##gu); \
177 X##kae = LOAD128(state[10]); \
178 X##ka = X##kae; \
179 X##ke = GET64HIHI(X##kae, X##kae); \
180 XOReq128(Cae, X##kae); \
181 X##kio = LOAD128(state[12]); \
182 X##ki = X##kio; \
183 X##ko = GET64HIHI(X##kio, X##kio); \
184 XOReq128(Cio, X##kio); \
185 X##kuma = LOAD128(state[14]); \
186 XOReq64(Cua, X##kuma); \
187 X##me = LOAD64(state[16]); \
188 X##kame = GET64LOLO(X##ka, X##me); \
189 XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
190 X##mio = LOAD128u(state[17]); \
191 X##mi = X##mio; \
192 X##kemi = GET64LOLO(X##ke, X##mi); \
193 X##mo = GET64HIHI(X##mio, X##mio); \
194 X##kimo = GET64LOLO(X##ki, X##mo); \
195 XOReq128(Cio, X##mio); \
196 X##mu = LOAD64(state[19]); \
197 X##komu = GET64LOLO(X##ko, X##mu); \
198 XOReq64(Cua, X##mu); \
199 X##sase = LOAD128(state[20]); \
200 XOReq128(Cae, X##sase); \
201 X##siso = LOAD128(state[22]); \
202 XOReq128(Cio, X##siso); \
203 X##su = LOAD64(state[24]); \
204 XOReq64(Cua, X##su); \
205
206#define copyFromStateAndXor832bits(X, state, input) \
207 X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
208 X##ba = X##bae; \
209 X##be = GET64HIHI(X##bae, X##bae); \
210 Cae = X##bae; \
211 X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
212 X##bi = X##bio; \
213 X##bo = GET64HIHI(X##bio, X##bio); \
214 Cio = X##bio; \
215 X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
216 Cua = X##bu; \
217 X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
218 X##ga = X##gae; \
219 X##buga = GET64LOLO(X##bu, X##ga); \
220 X##ge = GET64HIHI(X##gae, X##gae); \
221 X##bage = GET64LOLO(X##ba, X##ge); \
222 XOReq128(Cae, X##gae); \
223 X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
224 X##gi = X##gio; \
225 X##begi = GET64LOLO(X##be, X##gi); \
226 X##go = GET64HIHI(X##gio, X##gio); \
227 X##bigo = GET64LOLO(X##bi, X##go); \
228 XOReq128(Cio, X##gio); \
229 X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
230 X##bogu = GET64LOLO(X##bo, X##gu); \
231 XOReq64(Cua, X##gu); \
232 X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
233 X##ka = X##kae; \
234 X##ke = GET64HIHI(X##kae, X##kae); \
235 XOReq128(Cae, X##kae); \
236 X##kio = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
237 X##ki = X##kio; \
238 X##ko = GET64HIHI(X##kio, X##kio); \
239 XOReq128(Cio, X##kio); \
240 X##kuma = LOAD128(state[14]); \
241 XOReq64(Cua, X##kuma); \
242 X##me = LOAD64(state[16]); \
243 X##kame = GET64LOLO(X##ka, X##me); \
244 XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
245 X##mio = LOAD128u(state[17]); \
246 X##mi = X##mio; \
247 X##kemi = GET64LOLO(X##ke, X##mi); \
248 X##mo = GET64HIHI(X##mio, X##mio); \
249 X##kimo = GET64LOLO(X##ki, X##mo); \
250 XOReq128(Cio, X##mio); \
251 X##mu = LOAD64(state[19]); \
252 X##komu = GET64LOLO(X##ko, X##mu); \
253 XOReq64(Cua, X##mu); \
254 X##sase = LOAD128(state[20]); \
255 XOReq128(Cae, X##sase); \
256 X##siso = LOAD128(state[22]); \
257 XOReq128(Cio, X##siso); \
258 X##su = LOAD64(state[24]); \
259 XOReq64(Cua, X##su); \
260
261#define copyFromStateAndXor1024bits(X, state, input) \
262 X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
263 X##ba = X##bae; \
264 X##be = GET64HIHI(X##bae, X##bae); \
265 Cae = X##bae; \
266 X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
267 X##bi = X##bio; \
268 X##bo = GET64HIHI(X##bio, X##bio); \
269 Cio = X##bio; \
270 X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
271 Cua = X##bu; \
272 X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
273 X##ga = X##gae; \
274 X##buga = GET64LOLO(X##bu, X##ga); \
275 X##ge = GET64HIHI(X##gae, X##gae); \
276 X##bage = GET64LOLO(X##ba, X##ge); \
277 XOReq128(Cae, X##gae); \
278 X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
279 X##gi = X##gio; \
280 X##begi = GET64LOLO(X##be, X##gi); \
281 X##go = GET64HIHI(X##gio, X##gio); \
282 X##bigo = GET64LOLO(X##bi, X##go); \
283 XOReq128(Cio, X##gio); \
284 X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
285 X##bogu = GET64LOLO(X##bo, X##gu); \
286 XOReq64(Cua, X##gu); \
287 X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
288 X##ka = X##kae; \
289 X##ke = GET64HIHI(X##kae, X##kae); \
290 XOReq128(Cae, X##kae); \
291 X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
292 X##ki = X##kio; \
293 X##ko = GET64HIHI(X##kio, X##kio); \
294 XOReq128(Cio, X##kio); \
295 X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
296 XOReq64(Cua, X##kuma); \
297 X##me = LOAD64(state[16]); \
298 X##kame = GET64LOLO(X##ka, X##me); \
299 XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
300 X##mio = LOAD128u(state[17]); \
301 X##mi = X##mio; \
302 X##kemi = GET64LOLO(X##ke, X##mi); \
303 X##mo = GET64HIHI(X##mio, X##mio); \
304 X##kimo = GET64LOLO(X##ki, X##mo); \
305 XOReq128(Cio, X##mio); \
306 X##mu = LOAD64(state[19]); \
307 X##komu = GET64LOLO(X##ko, X##mu); \
308 XOReq64(Cua, X##mu); \
309 X##sase = LOAD128(state[20]); \
310 XOReq128(Cae, X##sase); \
311 X##siso = LOAD128(state[22]); \
312 XOReq128(Cio, X##siso); \
313 X##su = LOAD64(state[24]); \
314 XOReq64(Cua, X##su); \
315
316#define copyFromStateAndXor1088bits(X, state, input) \
317 X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
318 X##ba = X##bae; \
319 X##be = GET64HIHI(X##bae, X##bae); \
320 Cae = X##bae; \
321 X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
322 X##bi = X##bio; \
323 X##bo = GET64HIHI(X##bio, X##bio); \
324 Cio = X##bio; \
325 X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
326 Cua = X##bu; \
327 X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
328 X##ga = X##gae; \
329 X##buga = GET64LOLO(X##bu, X##ga); \
330 X##ge = GET64HIHI(X##gae, X##gae); \
331 X##bage = GET64LOLO(X##ba, X##ge); \
332 XOReq128(Cae, X##gae); \
333 X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
334 X##gi = X##gio; \
335 X##begi = GET64LOLO(X##be, X##gi); \
336 X##go = GET64HIHI(X##gio, X##gio); \
337 X##bigo = GET64LOLO(X##bi, X##go); \
338 XOReq128(Cio, X##gio); \
339 X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
340 X##bogu = GET64LOLO(X##bo, X##gu); \
341 XOReq64(Cua, X##gu); \
342 X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
343 X##ka = X##kae; \
344 X##ke = GET64HIHI(X##kae, X##kae); \
345 XOReq128(Cae, X##kae); \
346 X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
347 X##ki = X##kio; \
348 X##ko = GET64HIHI(X##kio, X##kio); \
349 XOReq128(Cio, X##kio); \
350 X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
351 XOReq64(Cua, X##kuma); \
352 X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
353 X##kame = GET64LOLO(X##ka, X##me); \
354 XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
355 X##mio = LOAD128u(state[17]); \
356 X##mi = X##mio; \
357 X##kemi = GET64LOLO(X##ke, X##mi); \
358 X##mo = GET64HIHI(X##mio, X##mio); \
359 X##kimo = GET64LOLO(X##ki, X##mo); \
360 XOReq128(Cio, X##mio); \
361 X##mu = LOAD64(state[19]); \
362 X##komu = GET64LOLO(X##ko, X##mu); \
363 XOReq64(Cua, X##mu); \
364 X##sase = LOAD128(state[20]); \
365 XOReq128(Cae, X##sase); \
366 X##siso = LOAD128(state[22]); \
367 XOReq128(Cio, X##siso); \
368 X##su = LOAD64(state[24]); \
369 XOReq64(Cua, X##su); \
370
371#define copyFromStateAndXor1152bits(X, state, input) \
372 X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
373 X##ba = X##bae; \
374 X##be = GET64HIHI(X##bae, X##bae); \
375 Cae = X##bae; \
376 X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
377 X##bi = X##bio; \
378 X##bo = GET64HIHI(X##bio, X##bio); \
379 Cio = X##bio; \
380 X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
381 Cua = X##bu; \
382 X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
383 X##ga = X##gae; \
384 X##buga = GET64LOLO(X##bu, X##ga); \
385 X##ge = GET64HIHI(X##gae, X##gae); \
386 X##bage = GET64LOLO(X##ba, X##ge); \
387 XOReq128(Cae, X##gae); \
388 X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
389 X##gi = X##gio; \
390 X##begi = GET64LOLO(X##be, X##gi); \
391 X##go = GET64HIHI(X##gio, X##gio); \
392 X##bigo = GET64LOLO(X##bi, X##go); \
393 XOReq128(Cio, X##gio); \
394 X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
395 X##bogu = GET64LOLO(X##bo, X##gu); \
396 XOReq64(Cua, X##gu); \
397 X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
398 X##ka = X##kae; \
399 X##ke = GET64HIHI(X##kae, X##kae); \
400 XOReq128(Cae, X##kae); \
401 X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
402 X##ki = X##kio; \
403 X##ko = GET64HIHI(X##kio, X##kio); \
404 XOReq128(Cio, X##kio); \
405 X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
406 XOReq64(Cua, X##kuma); \
407 X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
408 X##kame = GET64LOLO(X##ka, X##me); \
409 XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
410 X##mio = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
411 X##mi = X##mio; \
412 X##kemi = GET64LOLO(X##ke, X##mi); \
413 X##mo = GET64HIHI(X##mio, X##mio); \
414 X##kimo = GET64LOLO(X##ki, X##mo); \
415 XOReq128(Cio, X##mio); \
416 X##mu = LOAD64(state[19]); \
417 X##komu = GET64LOLO(X##ko, X##mu); \
418 XOReq64(Cua, X##mu); \
419 X##sase = LOAD128(state[20]); \
420 XOReq128(Cae, X##sase); \
421 X##siso = LOAD128(state[22]); \
422 XOReq128(Cio, X##siso); \
423 X##su = LOAD64(state[24]); \
424 XOReq64(Cua, X##su); \
425
426#define copyFromStateAndXor1344bits(X, state, input) \
427 X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
428 X##ba = X##bae; \
429 X##be = GET64HIHI(X##bae, X##bae); \
430 Cae = X##bae; \
431 X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
432 X##bi = X##bio; \
433 X##bo = GET64HIHI(X##bio, X##bio); \
434 Cio = X##bio; \
435 X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
436 Cua = X##bu; \
437 X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
438 X##ga = X##gae; \
439 X##buga = GET64LOLO(X##bu, X##ga); \
440 X##ge = GET64HIHI(X##gae, X##gae); \
441 X##bage = GET64LOLO(X##ba, X##ge); \
442 XOReq128(Cae, X##gae); \
443 X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
444 X##gi = X##gio; \
445 X##begi = GET64LOLO(X##be, X##gi); \
446 X##go = GET64HIHI(X##gio, X##gio); \
447 X##bigo = GET64LOLO(X##bi, X##go); \
448 XOReq128(Cio, X##gio); \
449 X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
450 X##bogu = GET64LOLO(X##bo, X##gu); \
451 XOReq64(Cua, X##gu); \
452 X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
453 X##ka = X##kae; \
454 X##ke = GET64HIHI(X##kae, X##kae); \
455 XOReq128(Cae, X##kae); \
456 X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
457 X##ki = X##kio; \
458 X##ko = GET64HIHI(X##kio, X##kio); \
459 XOReq128(Cio, X##kio); \
460 X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
461 XOReq64(Cua, X##kuma); \
462 X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
463 X##kame = GET64LOLO(X##ka, X##me); \
464 XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
465 X##mio = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
466 X##mi = X##mio; \
467 X##kemi = GET64LOLO(X##ke, X##mi); \
468 X##mo = GET64HIHI(X##mio, X##mio); \
469 X##kimo = GET64LOLO(X##ki, X##mo); \
470 XOReq128(Cio, X##mio); \
471 X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
472 X##komu = GET64LOLO(X##ko, X##mu); \
473 XOReq64(Cua, X##mu); \
474 X##sase = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
475 XOReq128(Cae, X##sase); \
476 X##siso = LOAD128(state[22]); \
477 XOReq128(Cio, X##siso); \
478 X##su = LOAD64(state[24]); \
479 XOReq64(Cua, X##su); \
480
481#define copyFromState(X, state) \
482 X##bae = LOAD128(state[ 0]); \
483 X##ba = X##bae; \
484 X##be = GET64HIHI(X##bae, X##bae); \
485 Cae = X##bae; \
486 X##bio = LOAD128(state[ 2]); \
487 X##bi = X##bio; \
488 X##bo = GET64HIHI(X##bio, X##bio); \
489 Cio = X##bio; \
490 X##bu = LOAD64(state[ 4]); \
491 Cua = X##bu; \
492 X##gae = LOAD128u(state[ 5]); \
493 X##ga = X##gae; \
494 X##buga = GET64LOLO(X##bu, X##ga); \
495 X##ge = GET64HIHI(X##gae, X##gae); \
496 X##bage = GET64LOLO(X##ba, X##ge); \
497 XOReq128(Cae, X##gae); \
498 X##gio = LOAD128u(state[ 7]); \
499 X##gi = X##gio; \
500 X##begi = GET64LOLO(X##be, X##gi); \
501 X##go = GET64HIHI(X##gio, X##gio); \
502 X##bigo = GET64LOLO(X##bi, X##go); \
503 XOReq128(Cio, X##gio); \
504 X##gu = LOAD64(state[ 9]); \
505 X##bogu = GET64LOLO(X##bo, X##gu); \
506 XOReq64(Cua, X##gu); \
507 X##kae = LOAD128(state[10]); \
508 X##ka = X##kae; \
509 X##ke = GET64HIHI(X##kae, X##kae); \
510 XOReq128(Cae, X##kae); \
511 X##kio = LOAD128(state[12]); \
512 X##ki = X##kio; \
513 X##ko = GET64HIHI(X##kio, X##kio); \
514 XOReq128(Cio, X##kio); \
515 X##kuma = LOAD128(state[14]); \
516 XOReq64(Cua, X##kuma); \
517 X##me = LOAD64(state[16]); \
518 X##kame = GET64LOLO(X##ka, X##me); \
519 XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
520 X##mio = LOAD128u(state[17]); \
521 X##mi = X##mio; \
522 X##kemi = GET64LOLO(X##ke, X##mi); \
523 X##mo = GET64HIHI(X##mio, X##mio); \
524 X##kimo = GET64LOLO(X##ki, X##mo); \
525 XOReq128(Cio, X##mio); \
526 X##mu = LOAD64(state[19]); \
527 X##komu = GET64LOLO(X##ko, X##mu); \
528 XOReq64(Cua, X##mu); \
529 X##sase = LOAD128(state[20]); \
530 XOReq128(Cae, X##sase); \
531 X##siso = LOAD128(state[22]); \
532 XOReq128(Cio, X##siso); \
533 X##su = LOAD64(state[24]); \
534 XOReq64(Cua, X##su); \
535
536#define copyToState(state, X) \
537 STORE64(state[ 0], X##bage); \
538 STORE64(state[ 1], X##begi); \
539 STORE64(state[ 2], X##bigo); \
540 STORE64(state[ 3], X##bogu); \
541 STORE128(state[ 4], X##buga); \
542 STORE64(state[ 6], COPY64HI2LO(X##bage)); \
543 STORE64(state[ 7], COPY64HI2LO(X##begi)); \
544 STORE64(state[ 8], COPY64HI2LO(X##bigo)); \
545 STORE64(state[ 9], COPY64HI2LO(X##bogu)); \
546 STORE64(state[10], X##kame); \
547 STORE64(state[11], X##kemi); \
548 STORE64(state[12], X##kimo); \
549 STORE64(state[13], X##komu); \
550 STORE128(state[14], X##kuma); \
551 STORE64(state[16], COPY64HI2LO(X##kame)); \
552 STORE64(state[17], COPY64HI2LO(X##kemi)); \
553 STORE64(state[18], COPY64HI2LO(X##kimo)); \
554 STORE64(state[19], COPY64HI2LO(X##komu)); \
555 STORE128(state[20], X##sase); \
556 STORE128(state[22], X##siso); \
557 STORE64(state[24], X##su); \
558
559#define copyStateVariables(X, Y) \
560 X##bage = Y##bage; \
561 X##begi = Y##begi; \
562 X##bigo = Y##bigo; \
563 X##bogu = Y##bogu; \
564 X##buga = Y##buga; \
565 X##kame = Y##kame; \
566 X##kemi = Y##kemi; \
567 X##kimo = Y##kimo; \
568 X##komu = Y##komu; \
569 X##kuma = Y##kuma; \
570 X##sase = Y##sase; \
571 X##siso = Y##siso; \
572 X##su = Y##su; \
573