Christian Heimes | 4a0270d | 2012-10-06 02:23:36 +0200 | [diff] [blame] | 1 | /* |
| 2 | The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, |
| 3 | Michaƫl Peeters and Gilles Van Assche. For more information, feedback or |
| 4 | questions, please refer to our website: http://keccak.noekeon.org/ |
| 5 | |
| 6 | Implementation by the designers, |
| 7 | hereby denoted as "the implementer". |
| 8 | |
| 9 | To the extent possible under law, the implementer has waived all copyright |
| 10 | and related or neighboring rights to the source code in this file. |
| 11 | http://creativecommons.org/publicdomain/zero/1.0/ |
| 12 | */ |
| 13 | |
| 14 | #define declareABCDE \ |
| 15 | V128 Abage, Abegi, Abigo, Abogu, Abuga; \ |
| 16 | V128 Akame, Akemi, Akimo, Akomu, Akuma; \ |
| 17 | V128 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio; \ |
| 18 | V64 Aba, Abe, Abi, Abo, Abu; \ |
| 19 | V64 Aga, Age, Agi, Ago, Agu; \ |
| 20 | V64 Aka, Ake, Aki, Ako, Aku; \ |
| 21 | V64 Ama, Ame, Ami, Amo, Amu; \ |
| 22 | V128 Asase, Asiso; \ |
| 23 | V64 Asu; \ |
| 24 | V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \ |
| 25 | V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \ |
| 26 | V128 Bsase, Bsesi, Bsiso, Bsosu, Bsusa; \ |
| 27 | V128 Cae, Cei, Cio, Cou, Cua; \ |
| 28 | V128 Dau, Dea, Die, Doi, Duo; \ |
| 29 | V128 Dua, Dae, Dei, Dio, Dou; \ |
| 30 | V128 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \ |
| 31 | V128 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \ |
| 32 | V128 Esase, Esiso; \ |
| 33 | V64 Esu; \ |
| 34 | V128 Zero; |
| 35 | |
| 36 | #define prepareTheta |
| 37 | |
| 38 | #define computeD \ |
| 39 | Cua = GET64LOLO(Cua, Cae); \ |
| 40 | Dei = XOR128(Cae, ROL6464same(Cio, 1)); \ |
| 41 | Dou = XOR128(Cio, ROL6464same(Cua, 1)); \ |
| 42 | Cei = GET64HILO(Cae, Cio); \ |
| 43 | Dae = XOR128(Cua, ROL6464same(Cei, 1)); \ |
| 44 | Dau = GET64LOHI(Dae, Dou); \ |
| 45 | Dea = SWAP64(Dae); \ |
| 46 | Die = SWAP64(Dei); \ |
| 47 | Doi = GET64LOLO(Dou, Die); \ |
| 48 | Duo = SWAP64(Dou); |
| 49 | |
| 50 | /* --- Theta Rho Pi Chi Iota Prepare-theta */ |
| 51 | /* --- 64-bit lanes mapped to 64-bit and 128-bit words */ |
| 52 | #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ |
| 53 | computeD \ |
| 54 | \ |
| 55 | Bbage = XOR128(GET64LOHI(A##bage, A##bogu), Dau); \ |
| 56 | Bbage = ROL6464(Bbage, 0, 20); \ |
| 57 | Bbegi = XOR128(GET64HILO(A##bage, A##kame), Dea); \ |
| 58 | Bbegi = ROL6464(Bbegi, 44, 3); \ |
| 59 | Bbigo = XOR128(GET64LOHI(A##kimo, A##kame), Die); \ |
| 60 | Bbigo = ROL6464(Bbigo, 43, 45); \ |
| 61 | E##bage = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \ |
| 62 | XOReq128(E##bage, CONST64(KeccakF1600RoundConstants[i])); \ |
| 63 | Cae = E##bage; \ |
| 64 | Bbogu = XOR128(GET64HILO(A##kimo, A##siso), Doi); \ |
| 65 | Bbogu = ROL6464(Bbogu, 21, 61); \ |
| 66 | E##begi = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \ |
| 67 | Cei = E##begi; \ |
| 68 | Bbuga = XOR128(GET64LOLO(A##su, A##bogu), Duo); \ |
| 69 | Bbuga = ROL6464(Bbuga, 14, 28); \ |
| 70 | E##bigo = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \ |
| 71 | Cio = E##bigo; \ |
| 72 | E##bogu = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \ |
| 73 | Cou = E##bogu; \ |
| 74 | E##buga = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \ |
| 75 | Cua = E##buga; \ |
| 76 | \ |
| 77 | Bkame = XOR128(GET64LOHI(A##begi, A##buga), Dea); \ |
| 78 | Bkame = ROL6464(Bkame, 1, 36); \ |
| 79 | Bkemi = XOR128(GET64HILO(A##begi, A##kemi), Die); \ |
| 80 | Bkemi = ROL6464(Bkemi, 6, 10); \ |
| 81 | Bkimo = XOR128(GET64LOHI(A##komu, A##kemi), Doi); \ |
| 82 | Bkimo = ROL6464(Bkimo, 25, 15); \ |
| 83 | E##kame = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \ |
| 84 | XOReq128(Cae, E##kame); \ |
| 85 | Bkomu = XOR128(GET64HIHI(A##komu, A##siso), Duo); \ |
| 86 | Bkomu = ROL6464(Bkomu, 8, 56); \ |
| 87 | E##kemi = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \ |
| 88 | XOReq128(Cei, E##kemi); \ |
| 89 | Bkuma = XOR128(GET64LOLO(A##sase, A##buga), Dau); \ |
| 90 | Bkuma = ROL6464(Bkuma, 18, 27); \ |
| 91 | E##kimo = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \ |
| 92 | XOReq128(Cio, E##kimo); \ |
| 93 | E##komu = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \ |
| 94 | XOReq128(Cou, E##komu); \ |
| 95 | E##kuma = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \ |
| 96 | XOReq128(Cua, E##kuma); \ |
| 97 | \ |
| 98 | Bsase = XOR128(A##bigo, SWAP64(Doi)); \ |
| 99 | Bsase = ROL6464(Bsase, 62, 55); \ |
| 100 | Bsiso = XOR128(A##kuma, SWAP64(Dau)); \ |
| 101 | Bsiso = ROL6464(Bsiso, 39, 41); \ |
| 102 | Bsusa = XOR64(COPY64HI2LO(A##sase), Dei); \ |
| 103 | Bsusa = ROL6464same(Bsusa, 2); \ |
| 104 | Bsusa = GET64LOLO(Bsusa, Bsase); \ |
| 105 | Bsesi = GET64HILO(Bsase, Bsiso); \ |
| 106 | Bsosu = GET64HILO(Bsiso, Bsusa); \ |
| 107 | E##sase = XOR128(Bsase, ANDnu128(Bsesi, Bsiso)); \ |
| 108 | XOReq128(Cae, E##sase); \ |
| 109 | E##siso = XOR128(Bsiso, ANDnu128(Bsosu, Bsusa)); \ |
| 110 | XOReq128(Cio, E##siso); \ |
| 111 | E##su = GET64LOLO(XOR128(Bsusa, ANDnu128(Bsase, Bsesi)), Zero); \ |
| 112 | XOReq128(Cua, E##su); \ |
| 113 | \ |
| 114 | Zero = ZERO128(); \ |
| 115 | XOReq128(Cae, GET64HIHI(Cua, Zero)); \ |
| 116 | XOReq128(Cae, GET64LOLO(Zero, Cei)); \ |
| 117 | XOReq128(Cio, GET64HIHI(Cei, Zero)); \ |
| 118 | XOReq128(Cio, GET64LOLO(Zero, Cou)); \ |
| 119 | XOReq128(Cua, GET64HIHI(Cou, Zero)); \ |
| 120 | |
| 121 | /* --- Theta Rho Pi Chi Iota */ |
| 122 | /* --- 64-bit lanes mapped to 64-bit and 128-bit words */ |
| 123 | #define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E) |
| 124 | |
| 125 | static const UINT64 KeccakF1600RoundConstants[24] = { |
| 126 | 0x0000000000000001ULL, |
| 127 | 0x0000000000008082ULL, |
| 128 | 0x800000000000808aULL, |
| 129 | 0x8000000080008000ULL, |
| 130 | 0x000000000000808bULL, |
| 131 | 0x0000000080000001ULL, |
| 132 | 0x8000000080008081ULL, |
| 133 | 0x8000000000008009ULL, |
| 134 | 0x000000000000008aULL, |
| 135 | 0x0000000000000088ULL, |
| 136 | 0x0000000080008009ULL, |
| 137 | 0x000000008000000aULL, |
| 138 | 0x000000008000808bULL, |
| 139 | 0x800000000000008bULL, |
| 140 | 0x8000000000008089ULL, |
| 141 | 0x8000000000008003ULL, |
| 142 | 0x8000000000008002ULL, |
| 143 | 0x8000000000000080ULL, |
| 144 | 0x000000000000800aULL, |
| 145 | 0x800000008000000aULL, |
| 146 | 0x8000000080008081ULL, |
| 147 | 0x8000000000008080ULL, |
| 148 | 0x0000000080000001ULL, |
| 149 | 0x8000000080008008ULL }; |
| 150 | |
| 151 | #define copyFromStateAndXor576bits(X, state, input) \ |
| 152 | X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ |
| 153 | X##ba = X##bae; \ |
| 154 | X##be = GET64HIHI(X##bae, X##bae); \ |
| 155 | Cae = X##bae; \ |
| 156 | X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ |
| 157 | X##bi = X##bio; \ |
| 158 | X##bo = GET64HIHI(X##bio, X##bio); \ |
| 159 | Cio = X##bio; \ |
| 160 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ |
| 161 | Cua = X##bu; \ |
| 162 | X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ |
| 163 | X##ga = X##gae; \ |
| 164 | X##buga = GET64LOLO(X##bu, X##ga); \ |
| 165 | X##ge = GET64HIHI(X##gae, X##gae); \ |
| 166 | X##bage = GET64LOLO(X##ba, X##ge); \ |
| 167 | XOReq128(Cae, X##gae); \ |
| 168 | X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ |
| 169 | X##gi = X##gio; \ |
| 170 | X##begi = GET64LOLO(X##be, X##gi); \ |
| 171 | X##go = GET64HIHI(X##gio, X##gio); \ |
| 172 | X##bigo = GET64LOLO(X##bi, X##go); \ |
| 173 | XOReq128(Cio, X##gio); \ |
| 174 | X##gu = LOAD64(state[ 9]); \ |
| 175 | X##bogu = GET64LOLO(X##bo, X##gu); \ |
| 176 | XOReq64(Cua, X##gu); \ |
| 177 | X##kae = LOAD128(state[10]); \ |
| 178 | X##ka = X##kae; \ |
| 179 | X##ke = GET64HIHI(X##kae, X##kae); \ |
| 180 | XOReq128(Cae, X##kae); \ |
| 181 | X##kio = LOAD128(state[12]); \ |
| 182 | X##ki = X##kio; \ |
| 183 | X##ko = GET64HIHI(X##kio, X##kio); \ |
| 184 | XOReq128(Cio, X##kio); \ |
| 185 | X##kuma = LOAD128(state[14]); \ |
| 186 | XOReq64(Cua, X##kuma); \ |
| 187 | X##me = LOAD64(state[16]); \ |
| 188 | X##kame = GET64LOLO(X##ka, X##me); \ |
| 189 | XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ |
| 190 | X##mio = LOAD128u(state[17]); \ |
| 191 | X##mi = X##mio; \ |
| 192 | X##kemi = GET64LOLO(X##ke, X##mi); \ |
| 193 | X##mo = GET64HIHI(X##mio, X##mio); \ |
| 194 | X##kimo = GET64LOLO(X##ki, X##mo); \ |
| 195 | XOReq128(Cio, X##mio); \ |
| 196 | X##mu = LOAD64(state[19]); \ |
| 197 | X##komu = GET64LOLO(X##ko, X##mu); \ |
| 198 | XOReq64(Cua, X##mu); \ |
| 199 | X##sase = LOAD128(state[20]); \ |
| 200 | XOReq128(Cae, X##sase); \ |
| 201 | X##siso = LOAD128(state[22]); \ |
| 202 | XOReq128(Cio, X##siso); \ |
| 203 | X##su = LOAD64(state[24]); \ |
| 204 | XOReq64(Cua, X##su); \ |
| 205 | |
| 206 | #define copyFromStateAndXor832bits(X, state, input) \ |
| 207 | X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ |
| 208 | X##ba = X##bae; \ |
| 209 | X##be = GET64HIHI(X##bae, X##bae); \ |
| 210 | Cae = X##bae; \ |
| 211 | X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ |
| 212 | X##bi = X##bio; \ |
| 213 | X##bo = GET64HIHI(X##bio, X##bio); \ |
| 214 | Cio = X##bio; \ |
| 215 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ |
| 216 | Cua = X##bu; \ |
| 217 | X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ |
| 218 | X##ga = X##gae; \ |
| 219 | X##buga = GET64LOLO(X##bu, X##ga); \ |
| 220 | X##ge = GET64HIHI(X##gae, X##gae); \ |
| 221 | X##bage = GET64LOLO(X##ba, X##ge); \ |
| 222 | XOReq128(Cae, X##gae); \ |
| 223 | X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ |
| 224 | X##gi = X##gio; \ |
| 225 | X##begi = GET64LOLO(X##be, X##gi); \ |
| 226 | X##go = GET64HIHI(X##gio, X##gio); \ |
| 227 | X##bigo = GET64LOLO(X##bi, X##go); \ |
| 228 | XOReq128(Cio, X##gio); \ |
| 229 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ |
| 230 | X##bogu = GET64LOLO(X##bo, X##gu); \ |
| 231 | XOReq64(Cua, X##gu); \ |
| 232 | X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ |
| 233 | X##ka = X##kae; \ |
| 234 | X##ke = GET64HIHI(X##kae, X##kae); \ |
| 235 | XOReq128(Cae, X##kae); \ |
| 236 | X##kio = XOR128(LOAD128(state[12]), LOAD64(input[12])); \ |
| 237 | X##ki = X##kio; \ |
| 238 | X##ko = GET64HIHI(X##kio, X##kio); \ |
| 239 | XOReq128(Cio, X##kio); \ |
| 240 | X##kuma = LOAD128(state[14]); \ |
| 241 | XOReq64(Cua, X##kuma); \ |
| 242 | X##me = LOAD64(state[16]); \ |
| 243 | X##kame = GET64LOLO(X##ka, X##me); \ |
| 244 | XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ |
| 245 | X##mio = LOAD128u(state[17]); \ |
| 246 | X##mi = X##mio; \ |
| 247 | X##kemi = GET64LOLO(X##ke, X##mi); \ |
| 248 | X##mo = GET64HIHI(X##mio, X##mio); \ |
| 249 | X##kimo = GET64LOLO(X##ki, X##mo); \ |
| 250 | XOReq128(Cio, X##mio); \ |
| 251 | X##mu = LOAD64(state[19]); \ |
| 252 | X##komu = GET64LOLO(X##ko, X##mu); \ |
| 253 | XOReq64(Cua, X##mu); \ |
| 254 | X##sase = LOAD128(state[20]); \ |
| 255 | XOReq128(Cae, X##sase); \ |
| 256 | X##siso = LOAD128(state[22]); \ |
| 257 | XOReq128(Cio, X##siso); \ |
| 258 | X##su = LOAD64(state[24]); \ |
| 259 | XOReq64(Cua, X##su); \ |
| 260 | |
| 261 | #define copyFromStateAndXor1024bits(X, state, input) \ |
| 262 | X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ |
| 263 | X##ba = X##bae; \ |
| 264 | X##be = GET64HIHI(X##bae, X##bae); \ |
| 265 | Cae = X##bae; \ |
| 266 | X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ |
| 267 | X##bi = X##bio; \ |
| 268 | X##bo = GET64HIHI(X##bio, X##bio); \ |
| 269 | Cio = X##bio; \ |
| 270 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ |
| 271 | Cua = X##bu; \ |
| 272 | X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ |
| 273 | X##ga = X##gae; \ |
| 274 | X##buga = GET64LOLO(X##bu, X##ga); \ |
| 275 | X##ge = GET64HIHI(X##gae, X##gae); \ |
| 276 | X##bage = GET64LOLO(X##ba, X##ge); \ |
| 277 | XOReq128(Cae, X##gae); \ |
| 278 | X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ |
| 279 | X##gi = X##gio; \ |
| 280 | X##begi = GET64LOLO(X##be, X##gi); \ |
| 281 | X##go = GET64HIHI(X##gio, X##gio); \ |
| 282 | X##bigo = GET64LOLO(X##bi, X##go); \ |
| 283 | XOReq128(Cio, X##gio); \ |
| 284 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ |
| 285 | X##bogu = GET64LOLO(X##bo, X##gu); \ |
| 286 | XOReq64(Cua, X##gu); \ |
| 287 | X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ |
| 288 | X##ka = X##kae; \ |
| 289 | X##ke = GET64HIHI(X##kae, X##kae); \ |
| 290 | XOReq128(Cae, X##kae); \ |
| 291 | X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ |
| 292 | X##ki = X##kio; \ |
| 293 | X##ko = GET64HIHI(X##kio, X##kio); \ |
| 294 | XOReq128(Cio, X##kio); \ |
| 295 | X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ |
| 296 | XOReq64(Cua, X##kuma); \ |
| 297 | X##me = LOAD64(state[16]); \ |
| 298 | X##kame = GET64LOLO(X##ka, X##me); \ |
| 299 | XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ |
| 300 | X##mio = LOAD128u(state[17]); \ |
| 301 | X##mi = X##mio; \ |
| 302 | X##kemi = GET64LOLO(X##ke, X##mi); \ |
| 303 | X##mo = GET64HIHI(X##mio, X##mio); \ |
| 304 | X##kimo = GET64LOLO(X##ki, X##mo); \ |
| 305 | XOReq128(Cio, X##mio); \ |
| 306 | X##mu = LOAD64(state[19]); \ |
| 307 | X##komu = GET64LOLO(X##ko, X##mu); \ |
| 308 | XOReq64(Cua, X##mu); \ |
| 309 | X##sase = LOAD128(state[20]); \ |
| 310 | XOReq128(Cae, X##sase); \ |
| 311 | X##siso = LOAD128(state[22]); \ |
| 312 | XOReq128(Cio, X##siso); \ |
| 313 | X##su = LOAD64(state[24]); \ |
| 314 | XOReq64(Cua, X##su); \ |
| 315 | |
| 316 | #define copyFromStateAndXor1088bits(X, state, input) \ |
| 317 | X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ |
| 318 | X##ba = X##bae; \ |
| 319 | X##be = GET64HIHI(X##bae, X##bae); \ |
| 320 | Cae = X##bae; \ |
| 321 | X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ |
| 322 | X##bi = X##bio; \ |
| 323 | X##bo = GET64HIHI(X##bio, X##bio); \ |
| 324 | Cio = X##bio; \ |
| 325 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ |
| 326 | Cua = X##bu; \ |
| 327 | X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ |
| 328 | X##ga = X##gae; \ |
| 329 | X##buga = GET64LOLO(X##bu, X##ga); \ |
| 330 | X##ge = GET64HIHI(X##gae, X##gae); \ |
| 331 | X##bage = GET64LOLO(X##ba, X##ge); \ |
| 332 | XOReq128(Cae, X##gae); \ |
| 333 | X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ |
| 334 | X##gi = X##gio; \ |
| 335 | X##begi = GET64LOLO(X##be, X##gi); \ |
| 336 | X##go = GET64HIHI(X##gio, X##gio); \ |
| 337 | X##bigo = GET64LOLO(X##bi, X##go); \ |
| 338 | XOReq128(Cio, X##gio); \ |
| 339 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ |
| 340 | X##bogu = GET64LOLO(X##bo, X##gu); \ |
| 341 | XOReq64(Cua, X##gu); \ |
| 342 | X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ |
| 343 | X##ka = X##kae; \ |
| 344 | X##ke = GET64HIHI(X##kae, X##kae); \ |
| 345 | XOReq128(Cae, X##kae); \ |
| 346 | X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ |
| 347 | X##ki = X##kio; \ |
| 348 | X##ko = GET64HIHI(X##kio, X##kio); \ |
| 349 | XOReq128(Cio, X##kio); \ |
| 350 | X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ |
| 351 | XOReq64(Cua, X##kuma); \ |
| 352 | X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ |
| 353 | X##kame = GET64LOLO(X##ka, X##me); \ |
| 354 | XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ |
| 355 | X##mio = LOAD128u(state[17]); \ |
| 356 | X##mi = X##mio; \ |
| 357 | X##kemi = GET64LOLO(X##ke, X##mi); \ |
| 358 | X##mo = GET64HIHI(X##mio, X##mio); \ |
| 359 | X##kimo = GET64LOLO(X##ki, X##mo); \ |
| 360 | XOReq128(Cio, X##mio); \ |
| 361 | X##mu = LOAD64(state[19]); \ |
| 362 | X##komu = GET64LOLO(X##ko, X##mu); \ |
| 363 | XOReq64(Cua, X##mu); \ |
| 364 | X##sase = LOAD128(state[20]); \ |
| 365 | XOReq128(Cae, X##sase); \ |
| 366 | X##siso = LOAD128(state[22]); \ |
| 367 | XOReq128(Cio, X##siso); \ |
| 368 | X##su = LOAD64(state[24]); \ |
| 369 | XOReq64(Cua, X##su); \ |
| 370 | |
| 371 | #define copyFromStateAndXor1152bits(X, state, input) \ |
| 372 | X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ |
| 373 | X##ba = X##bae; \ |
| 374 | X##be = GET64HIHI(X##bae, X##bae); \ |
| 375 | Cae = X##bae; \ |
| 376 | X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ |
| 377 | X##bi = X##bio; \ |
| 378 | X##bo = GET64HIHI(X##bio, X##bio); \ |
| 379 | Cio = X##bio; \ |
| 380 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ |
| 381 | Cua = X##bu; \ |
| 382 | X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ |
| 383 | X##ga = X##gae; \ |
| 384 | X##buga = GET64LOLO(X##bu, X##ga); \ |
| 385 | X##ge = GET64HIHI(X##gae, X##gae); \ |
| 386 | X##bage = GET64LOLO(X##ba, X##ge); \ |
| 387 | XOReq128(Cae, X##gae); \ |
| 388 | X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ |
| 389 | X##gi = X##gio; \ |
| 390 | X##begi = GET64LOLO(X##be, X##gi); \ |
| 391 | X##go = GET64HIHI(X##gio, X##gio); \ |
| 392 | X##bigo = GET64LOLO(X##bi, X##go); \ |
| 393 | XOReq128(Cio, X##gio); \ |
| 394 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ |
| 395 | X##bogu = GET64LOLO(X##bo, X##gu); \ |
| 396 | XOReq64(Cua, X##gu); \ |
| 397 | X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ |
| 398 | X##ka = X##kae; \ |
| 399 | X##ke = GET64HIHI(X##kae, X##kae); \ |
| 400 | XOReq128(Cae, X##kae); \ |
| 401 | X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ |
| 402 | X##ki = X##kio; \ |
| 403 | X##ko = GET64HIHI(X##kio, X##kio); \ |
| 404 | XOReq128(Cio, X##kio); \ |
| 405 | X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ |
| 406 | XOReq64(Cua, X##kuma); \ |
| 407 | X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ |
| 408 | X##kame = GET64LOLO(X##ka, X##me); \ |
| 409 | XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ |
| 410 | X##mio = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \ |
| 411 | X##mi = X##mio; \ |
| 412 | X##kemi = GET64LOLO(X##ke, X##mi); \ |
| 413 | X##mo = GET64HIHI(X##mio, X##mio); \ |
| 414 | X##kimo = GET64LOLO(X##ki, X##mo); \ |
| 415 | XOReq128(Cio, X##mio); \ |
| 416 | X##mu = LOAD64(state[19]); \ |
| 417 | X##komu = GET64LOLO(X##ko, X##mu); \ |
| 418 | XOReq64(Cua, X##mu); \ |
| 419 | X##sase = LOAD128(state[20]); \ |
| 420 | XOReq128(Cae, X##sase); \ |
| 421 | X##siso = LOAD128(state[22]); \ |
| 422 | XOReq128(Cio, X##siso); \ |
| 423 | X##su = LOAD64(state[24]); \ |
| 424 | XOReq64(Cua, X##su); \ |
| 425 | |
| 426 | #define copyFromStateAndXor1344bits(X, state, input) \ |
| 427 | X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \ |
| 428 | X##ba = X##bae; \ |
| 429 | X##be = GET64HIHI(X##bae, X##bae); \ |
| 430 | Cae = X##bae; \ |
| 431 | X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \ |
| 432 | X##bi = X##bio; \ |
| 433 | X##bo = GET64HIHI(X##bio, X##bio); \ |
| 434 | Cio = X##bio; \ |
| 435 | X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \ |
| 436 | Cua = X##bu; \ |
| 437 | X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \ |
| 438 | X##ga = X##gae; \ |
| 439 | X##buga = GET64LOLO(X##bu, X##ga); \ |
| 440 | X##ge = GET64HIHI(X##gae, X##gae); \ |
| 441 | X##bage = GET64LOLO(X##ba, X##ge); \ |
| 442 | XOReq128(Cae, X##gae); \ |
| 443 | X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \ |
| 444 | X##gi = X##gio; \ |
| 445 | X##begi = GET64LOLO(X##be, X##gi); \ |
| 446 | X##go = GET64HIHI(X##gio, X##gio); \ |
| 447 | X##bigo = GET64LOLO(X##bi, X##go); \ |
| 448 | XOReq128(Cio, X##gio); \ |
| 449 | X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \ |
| 450 | X##bogu = GET64LOLO(X##bo, X##gu); \ |
| 451 | XOReq64(Cua, X##gu); \ |
| 452 | X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \ |
| 453 | X##ka = X##kae; \ |
| 454 | X##ke = GET64HIHI(X##kae, X##kae); \ |
| 455 | XOReq128(Cae, X##kae); \ |
| 456 | X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \ |
| 457 | X##ki = X##kio; \ |
| 458 | X##ko = GET64HIHI(X##kio, X##kio); \ |
| 459 | XOReq128(Cio, X##kio); \ |
| 460 | X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \ |
| 461 | XOReq64(Cua, X##kuma); \ |
| 462 | X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \ |
| 463 | X##kame = GET64LOLO(X##ka, X##me); \ |
| 464 | XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ |
| 465 | X##mio = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \ |
| 466 | X##mi = X##mio; \ |
| 467 | X##kemi = GET64LOLO(X##ke, X##mi); \ |
| 468 | X##mo = GET64HIHI(X##mio, X##mio); \ |
| 469 | X##kimo = GET64LOLO(X##ki, X##mo); \ |
| 470 | XOReq128(Cio, X##mio); \ |
| 471 | X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \ |
| 472 | X##komu = GET64LOLO(X##ko, X##mu); \ |
| 473 | XOReq64(Cua, X##mu); \ |
| 474 | X##sase = XOR128(LOAD128(state[20]), LOAD64(input[20])); \ |
| 475 | XOReq128(Cae, X##sase); \ |
| 476 | X##siso = LOAD128(state[22]); \ |
| 477 | XOReq128(Cio, X##siso); \ |
| 478 | X##su = LOAD64(state[24]); \ |
| 479 | XOReq64(Cua, X##su); \ |
| 480 | |
| 481 | #define copyFromState(X, state) \ |
| 482 | X##bae = LOAD128(state[ 0]); \ |
| 483 | X##ba = X##bae; \ |
| 484 | X##be = GET64HIHI(X##bae, X##bae); \ |
| 485 | Cae = X##bae; \ |
| 486 | X##bio = LOAD128(state[ 2]); \ |
| 487 | X##bi = X##bio; \ |
| 488 | X##bo = GET64HIHI(X##bio, X##bio); \ |
| 489 | Cio = X##bio; \ |
| 490 | X##bu = LOAD64(state[ 4]); \ |
| 491 | Cua = X##bu; \ |
| 492 | X##gae = LOAD128u(state[ 5]); \ |
| 493 | X##ga = X##gae; \ |
| 494 | X##buga = GET64LOLO(X##bu, X##ga); \ |
| 495 | X##ge = GET64HIHI(X##gae, X##gae); \ |
| 496 | X##bage = GET64LOLO(X##ba, X##ge); \ |
| 497 | XOReq128(Cae, X##gae); \ |
| 498 | X##gio = LOAD128u(state[ 7]); \ |
| 499 | X##gi = X##gio; \ |
| 500 | X##begi = GET64LOLO(X##be, X##gi); \ |
| 501 | X##go = GET64HIHI(X##gio, X##gio); \ |
| 502 | X##bigo = GET64LOLO(X##bi, X##go); \ |
| 503 | XOReq128(Cio, X##gio); \ |
| 504 | X##gu = LOAD64(state[ 9]); \ |
| 505 | X##bogu = GET64LOLO(X##bo, X##gu); \ |
| 506 | XOReq64(Cua, X##gu); \ |
| 507 | X##kae = LOAD128(state[10]); \ |
| 508 | X##ka = X##kae; \ |
| 509 | X##ke = GET64HIHI(X##kae, X##kae); \ |
| 510 | XOReq128(Cae, X##kae); \ |
| 511 | X##kio = LOAD128(state[12]); \ |
| 512 | X##ki = X##kio; \ |
| 513 | X##ko = GET64HIHI(X##kio, X##kio); \ |
| 514 | XOReq128(Cio, X##kio); \ |
| 515 | X##kuma = LOAD128(state[14]); \ |
| 516 | XOReq64(Cua, X##kuma); \ |
| 517 | X##me = LOAD64(state[16]); \ |
| 518 | X##kame = GET64LOLO(X##ka, X##me); \ |
| 519 | XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \ |
| 520 | X##mio = LOAD128u(state[17]); \ |
| 521 | X##mi = X##mio; \ |
| 522 | X##kemi = GET64LOLO(X##ke, X##mi); \ |
| 523 | X##mo = GET64HIHI(X##mio, X##mio); \ |
| 524 | X##kimo = GET64LOLO(X##ki, X##mo); \ |
| 525 | XOReq128(Cio, X##mio); \ |
| 526 | X##mu = LOAD64(state[19]); \ |
| 527 | X##komu = GET64LOLO(X##ko, X##mu); \ |
| 528 | XOReq64(Cua, X##mu); \ |
| 529 | X##sase = LOAD128(state[20]); \ |
| 530 | XOReq128(Cae, X##sase); \ |
| 531 | X##siso = LOAD128(state[22]); \ |
| 532 | XOReq128(Cio, X##siso); \ |
| 533 | X##su = LOAD64(state[24]); \ |
| 534 | XOReq64(Cua, X##su); \ |
| 535 | |
| 536 | #define copyToState(state, X) \ |
| 537 | STORE64(state[ 0], X##bage); \ |
| 538 | STORE64(state[ 1], X##begi); \ |
| 539 | STORE64(state[ 2], X##bigo); \ |
| 540 | STORE64(state[ 3], X##bogu); \ |
| 541 | STORE128(state[ 4], X##buga); \ |
| 542 | STORE64(state[ 6], COPY64HI2LO(X##bage)); \ |
| 543 | STORE64(state[ 7], COPY64HI2LO(X##begi)); \ |
| 544 | STORE64(state[ 8], COPY64HI2LO(X##bigo)); \ |
| 545 | STORE64(state[ 9], COPY64HI2LO(X##bogu)); \ |
| 546 | STORE64(state[10], X##kame); \ |
| 547 | STORE64(state[11], X##kemi); \ |
| 548 | STORE64(state[12], X##kimo); \ |
| 549 | STORE64(state[13], X##komu); \ |
| 550 | STORE128(state[14], X##kuma); \ |
| 551 | STORE64(state[16], COPY64HI2LO(X##kame)); \ |
| 552 | STORE64(state[17], COPY64HI2LO(X##kemi)); \ |
| 553 | STORE64(state[18], COPY64HI2LO(X##kimo)); \ |
| 554 | STORE64(state[19], COPY64HI2LO(X##komu)); \ |
| 555 | STORE128(state[20], X##sase); \ |
| 556 | STORE128(state[22], X##siso); \ |
| 557 | STORE64(state[24], X##su); \ |
| 558 | |
| 559 | #define copyStateVariables(X, Y) \ |
| 560 | X##bage = Y##bage; \ |
| 561 | X##begi = Y##begi; \ |
| 562 | X##bigo = Y##bigo; \ |
| 563 | X##bogu = Y##bogu; \ |
| 564 | X##buga = Y##buga; \ |
| 565 | X##kame = Y##kame; \ |
| 566 | X##kemi = Y##kemi; \ |
| 567 | X##kimo = Y##kimo; \ |
| 568 | X##komu = Y##komu; \ |
| 569 | X##kuma = Y##kuma; \ |
| 570 | X##sase = Y##sase; \ |
| 571 | X##siso = Y##siso; \ |
| 572 | X##su = Y##su; \ |
| 573 | |