Tan Swee Heng | 9a7dafb | 2007-12-18 00:04:40 +0800 | [diff] [blame] | 1 | # enter ECRYPT_encrypt_bytes |
| 2 | .text |
| 3 | .p2align 5 |
| 4 | .globl ECRYPT_encrypt_bytes |
| 5 | ECRYPT_encrypt_bytes: |
| 6 | mov %rsp,%r11 |
| 7 | and $31,%r11 |
| 8 | add $256,%r11 |
| 9 | sub %r11,%rsp |
| 10 | # x = arg1 |
| 11 | mov %rdi,%r8 |
| 12 | # m = arg2 |
| 13 | mov %rsi,%rsi |
| 14 | # out = arg3 |
| 15 | mov %rdx,%rdi |
| 16 | # bytes = arg4 |
| 17 | mov %rcx,%rdx |
| 18 | # unsigned>? bytes - 0 |
| 19 | cmp $0,%rdx |
| 20 | # comment:fp stack unchanged by jump |
| 21 | # goto done if !unsigned> |
| 22 | jbe ._done |
| 23 | # comment:fp stack unchanged by fallthrough |
| 24 | # start: |
| 25 | ._start: |
| 26 | # r11_stack = r11 |
| 27 | movq %r11,0(%rsp) |
| 28 | # r12_stack = r12 |
| 29 | movq %r12,8(%rsp) |
| 30 | # r13_stack = r13 |
| 31 | movq %r13,16(%rsp) |
| 32 | # r14_stack = r14 |
| 33 | movq %r14,24(%rsp) |
| 34 | # r15_stack = r15 |
| 35 | movq %r15,32(%rsp) |
| 36 | # rbx_stack = rbx |
| 37 | movq %rbx,40(%rsp) |
| 38 | # rbp_stack = rbp |
| 39 | movq %rbp,48(%rsp) |
| 40 | # in0 = *(uint64 *) (x + 0) |
| 41 | movq 0(%r8),%rcx |
| 42 | # in2 = *(uint64 *) (x + 8) |
| 43 | movq 8(%r8),%r9 |
| 44 | # in4 = *(uint64 *) (x + 16) |
| 45 | movq 16(%r8),%rax |
| 46 | # in6 = *(uint64 *) (x + 24) |
| 47 | movq 24(%r8),%r10 |
| 48 | # in8 = *(uint64 *) (x + 32) |
| 49 | movq 32(%r8),%r11 |
| 50 | # in10 = *(uint64 *) (x + 40) |
| 51 | movq 40(%r8),%r12 |
| 52 | # in12 = *(uint64 *) (x + 48) |
| 53 | movq 48(%r8),%r13 |
| 54 | # in14 = *(uint64 *) (x + 56) |
| 55 | movq 56(%r8),%r14 |
| 56 | # j0 = in0 |
| 57 | movq %rcx,56(%rsp) |
| 58 | # j2 = in2 |
| 59 | movq %r9,64(%rsp) |
| 60 | # j4 = in4 |
| 61 | movq %rax,72(%rsp) |
| 62 | # j6 = in6 |
| 63 | movq %r10,80(%rsp) |
| 64 | # j8 = in8 |
| 65 | movq %r11,88(%rsp) |
| 66 | # j10 = in10 |
| 67 | movq %r12,96(%rsp) |
| 68 | # j12 = in12 |
| 69 | movq %r13,104(%rsp) |
| 70 | # j14 = in14 |
| 71 | movq %r14,112(%rsp) |
| 72 | # x_backup = x |
| 73 | movq %r8,120(%rsp) |
| 74 | # bytesatleast1: |
| 75 | ._bytesatleast1: |
| 76 | # unsigned<? bytes - 64 |
| 77 | cmp $64,%rdx |
| 78 | # comment:fp stack unchanged by jump |
| 79 | # goto nocopy if !unsigned< |
| 80 | jae ._nocopy |
| 81 | # ctarget = out |
| 82 | movq %rdi,128(%rsp) |
| 83 | # out = &tmp |
| 84 | leaq 192(%rsp),%rdi |
| 85 | # i = bytes |
| 86 | mov %rdx,%rcx |
| 87 | # while (i) { *out++ = *m++; --i } |
| 88 | rep movsb |
| 89 | # out = &tmp |
| 90 | leaq 192(%rsp),%rdi |
| 91 | # m = &tmp |
| 92 | leaq 192(%rsp),%rsi |
| 93 | # comment:fp stack unchanged by fallthrough |
| 94 | # nocopy: |
| 95 | ._nocopy: |
| 96 | # out_backup = out |
| 97 | movq %rdi,136(%rsp) |
| 98 | # m_backup = m |
| 99 | movq %rsi,144(%rsp) |
| 100 | # bytes_backup = bytes |
| 101 | movq %rdx,152(%rsp) |
| 102 | # x1 = j0 |
| 103 | movq 56(%rsp),%rdi |
| 104 | # x0 = x1 |
| 105 | mov %rdi,%rdx |
| 106 | # (uint64) x1 >>= 32 |
| 107 | shr $32,%rdi |
| 108 | # x3 = j2 |
| 109 | movq 64(%rsp),%rsi |
| 110 | # x2 = x3 |
| 111 | mov %rsi,%rcx |
| 112 | # (uint64) x3 >>= 32 |
| 113 | shr $32,%rsi |
| 114 | # x5 = j4 |
| 115 | movq 72(%rsp),%r8 |
| 116 | # x4 = x5 |
| 117 | mov %r8,%r9 |
| 118 | # (uint64) x5 >>= 32 |
| 119 | shr $32,%r8 |
| 120 | # x5_stack = x5 |
| 121 | movq %r8,160(%rsp) |
| 122 | # x7 = j6 |
| 123 | movq 80(%rsp),%r8 |
| 124 | # x6 = x7 |
| 125 | mov %r8,%rax |
| 126 | # (uint64) x7 >>= 32 |
| 127 | shr $32,%r8 |
| 128 | # x9 = j8 |
| 129 | movq 88(%rsp),%r10 |
| 130 | # x8 = x9 |
| 131 | mov %r10,%r11 |
| 132 | # (uint64) x9 >>= 32 |
| 133 | shr $32,%r10 |
| 134 | # x11 = j10 |
| 135 | movq 96(%rsp),%r12 |
| 136 | # x10 = x11 |
| 137 | mov %r12,%r13 |
| 138 | # x10_stack = x10 |
| 139 | movq %r13,168(%rsp) |
| 140 | # (uint64) x11 >>= 32 |
| 141 | shr $32,%r12 |
| 142 | # x13 = j12 |
| 143 | movq 104(%rsp),%r13 |
| 144 | # x12 = x13 |
| 145 | mov %r13,%r14 |
| 146 | # (uint64) x13 >>= 32 |
| 147 | shr $32,%r13 |
| 148 | # x15 = j14 |
| 149 | movq 112(%rsp),%r15 |
| 150 | # x14 = x15 |
| 151 | mov %r15,%rbx |
| 152 | # (uint64) x15 >>= 32 |
| 153 | shr $32,%r15 |
| 154 | # x15_stack = x15 |
| 155 | movq %r15,176(%rsp) |
| 156 | # i = 20 |
| 157 | mov $20,%r15 |
| 158 | # mainloop: |
| 159 | ._mainloop: |
| 160 | # i_backup = i |
| 161 | movq %r15,184(%rsp) |
| 162 | # x5 = x5_stack |
| 163 | movq 160(%rsp),%r15 |
| 164 | # a = x12 + x0 |
| 165 | lea (%r14,%rdx),%rbp |
| 166 | # (uint32) a <<<= 7 |
| 167 | rol $7,%ebp |
| 168 | # x4 ^= a |
| 169 | xor %rbp,%r9 |
| 170 | # b = x1 + x5 |
| 171 | lea (%rdi,%r15),%rbp |
| 172 | # (uint32) b <<<= 7 |
| 173 | rol $7,%ebp |
| 174 | # x9 ^= b |
| 175 | xor %rbp,%r10 |
| 176 | # a = x0 + x4 |
| 177 | lea (%rdx,%r9),%rbp |
| 178 | # (uint32) a <<<= 9 |
| 179 | rol $9,%ebp |
| 180 | # x8 ^= a |
| 181 | xor %rbp,%r11 |
| 182 | # b = x5 + x9 |
| 183 | lea (%r15,%r10),%rbp |
| 184 | # (uint32) b <<<= 9 |
| 185 | rol $9,%ebp |
| 186 | # x13 ^= b |
| 187 | xor %rbp,%r13 |
| 188 | # a = x4 + x8 |
| 189 | lea (%r9,%r11),%rbp |
| 190 | # (uint32) a <<<= 13 |
| 191 | rol $13,%ebp |
| 192 | # x12 ^= a |
| 193 | xor %rbp,%r14 |
| 194 | # b = x9 + x13 |
| 195 | lea (%r10,%r13),%rbp |
| 196 | # (uint32) b <<<= 13 |
| 197 | rol $13,%ebp |
| 198 | # x1 ^= b |
| 199 | xor %rbp,%rdi |
| 200 | # a = x8 + x12 |
| 201 | lea (%r11,%r14),%rbp |
| 202 | # (uint32) a <<<= 18 |
| 203 | rol $18,%ebp |
| 204 | # x0 ^= a |
| 205 | xor %rbp,%rdx |
| 206 | # b = x13 + x1 |
| 207 | lea (%r13,%rdi),%rbp |
| 208 | # (uint32) b <<<= 18 |
| 209 | rol $18,%ebp |
| 210 | # x5 ^= b |
| 211 | xor %rbp,%r15 |
| 212 | # x10 = x10_stack |
| 213 | movq 168(%rsp),%rbp |
| 214 | # x5_stack = x5 |
| 215 | movq %r15,160(%rsp) |
| 216 | # c = x6 + x10 |
| 217 | lea (%rax,%rbp),%r15 |
| 218 | # (uint32) c <<<= 7 |
| 219 | rol $7,%r15d |
| 220 | # x14 ^= c |
| 221 | xor %r15,%rbx |
| 222 | # c = x10 + x14 |
| 223 | lea (%rbp,%rbx),%r15 |
| 224 | # (uint32) c <<<= 9 |
| 225 | rol $9,%r15d |
| 226 | # x2 ^= c |
| 227 | xor %r15,%rcx |
| 228 | # c = x14 + x2 |
| 229 | lea (%rbx,%rcx),%r15 |
| 230 | # (uint32) c <<<= 13 |
| 231 | rol $13,%r15d |
| 232 | # x6 ^= c |
| 233 | xor %r15,%rax |
| 234 | # c = x2 + x6 |
| 235 | lea (%rcx,%rax),%r15 |
| 236 | # (uint32) c <<<= 18 |
| 237 | rol $18,%r15d |
| 238 | # x10 ^= c |
| 239 | xor %r15,%rbp |
| 240 | # x15 = x15_stack |
| 241 | movq 176(%rsp),%r15 |
| 242 | # x10_stack = x10 |
| 243 | movq %rbp,168(%rsp) |
| 244 | # d = x11 + x15 |
| 245 | lea (%r12,%r15),%rbp |
| 246 | # (uint32) d <<<= 7 |
| 247 | rol $7,%ebp |
| 248 | # x3 ^= d |
| 249 | xor %rbp,%rsi |
| 250 | # d = x15 + x3 |
| 251 | lea (%r15,%rsi),%rbp |
| 252 | # (uint32) d <<<= 9 |
| 253 | rol $9,%ebp |
| 254 | # x7 ^= d |
| 255 | xor %rbp,%r8 |
| 256 | # d = x3 + x7 |
| 257 | lea (%rsi,%r8),%rbp |
| 258 | # (uint32) d <<<= 13 |
| 259 | rol $13,%ebp |
| 260 | # x11 ^= d |
| 261 | xor %rbp,%r12 |
| 262 | # d = x7 + x11 |
| 263 | lea (%r8,%r12),%rbp |
| 264 | # (uint32) d <<<= 18 |
| 265 | rol $18,%ebp |
| 266 | # x15 ^= d |
| 267 | xor %rbp,%r15 |
| 268 | # x15_stack = x15 |
| 269 | movq %r15,176(%rsp) |
| 270 | # x5 = x5_stack |
| 271 | movq 160(%rsp),%r15 |
| 272 | # a = x3 + x0 |
| 273 | lea (%rsi,%rdx),%rbp |
| 274 | # (uint32) a <<<= 7 |
| 275 | rol $7,%ebp |
| 276 | # x1 ^= a |
| 277 | xor %rbp,%rdi |
| 278 | # b = x4 + x5 |
| 279 | lea (%r9,%r15),%rbp |
| 280 | # (uint32) b <<<= 7 |
| 281 | rol $7,%ebp |
| 282 | # x6 ^= b |
| 283 | xor %rbp,%rax |
| 284 | # a = x0 + x1 |
| 285 | lea (%rdx,%rdi),%rbp |
| 286 | # (uint32) a <<<= 9 |
| 287 | rol $9,%ebp |
| 288 | # x2 ^= a |
| 289 | xor %rbp,%rcx |
| 290 | # b = x5 + x6 |
| 291 | lea (%r15,%rax),%rbp |
| 292 | # (uint32) b <<<= 9 |
| 293 | rol $9,%ebp |
| 294 | # x7 ^= b |
| 295 | xor %rbp,%r8 |
| 296 | # a = x1 + x2 |
| 297 | lea (%rdi,%rcx),%rbp |
| 298 | # (uint32) a <<<= 13 |
| 299 | rol $13,%ebp |
| 300 | # x3 ^= a |
| 301 | xor %rbp,%rsi |
| 302 | # b = x6 + x7 |
| 303 | lea (%rax,%r8),%rbp |
| 304 | # (uint32) b <<<= 13 |
| 305 | rol $13,%ebp |
| 306 | # x4 ^= b |
| 307 | xor %rbp,%r9 |
| 308 | # a = x2 + x3 |
| 309 | lea (%rcx,%rsi),%rbp |
| 310 | # (uint32) a <<<= 18 |
| 311 | rol $18,%ebp |
| 312 | # x0 ^= a |
| 313 | xor %rbp,%rdx |
| 314 | # b = x7 + x4 |
| 315 | lea (%r8,%r9),%rbp |
| 316 | # (uint32) b <<<= 18 |
| 317 | rol $18,%ebp |
| 318 | # x5 ^= b |
| 319 | xor %rbp,%r15 |
| 320 | # x10 = x10_stack |
| 321 | movq 168(%rsp),%rbp |
| 322 | # x5_stack = x5 |
| 323 | movq %r15,160(%rsp) |
| 324 | # c = x9 + x10 |
| 325 | lea (%r10,%rbp),%r15 |
| 326 | # (uint32) c <<<= 7 |
| 327 | rol $7,%r15d |
| 328 | # x11 ^= c |
| 329 | xor %r15,%r12 |
| 330 | # c = x10 + x11 |
| 331 | lea (%rbp,%r12),%r15 |
| 332 | # (uint32) c <<<= 9 |
| 333 | rol $9,%r15d |
| 334 | # x8 ^= c |
| 335 | xor %r15,%r11 |
| 336 | # c = x11 + x8 |
| 337 | lea (%r12,%r11),%r15 |
| 338 | # (uint32) c <<<= 13 |
| 339 | rol $13,%r15d |
| 340 | # x9 ^= c |
| 341 | xor %r15,%r10 |
| 342 | # c = x8 + x9 |
| 343 | lea (%r11,%r10),%r15 |
| 344 | # (uint32) c <<<= 18 |
| 345 | rol $18,%r15d |
| 346 | # x10 ^= c |
| 347 | xor %r15,%rbp |
| 348 | # x15 = x15_stack |
| 349 | movq 176(%rsp),%r15 |
| 350 | # x10_stack = x10 |
| 351 | movq %rbp,168(%rsp) |
| 352 | # d = x14 + x15 |
| 353 | lea (%rbx,%r15),%rbp |
| 354 | # (uint32) d <<<= 7 |
| 355 | rol $7,%ebp |
| 356 | # x12 ^= d |
| 357 | xor %rbp,%r14 |
| 358 | # d = x15 + x12 |
| 359 | lea (%r15,%r14),%rbp |
| 360 | # (uint32) d <<<= 9 |
| 361 | rol $9,%ebp |
| 362 | # x13 ^= d |
| 363 | xor %rbp,%r13 |
| 364 | # d = x12 + x13 |
| 365 | lea (%r14,%r13),%rbp |
| 366 | # (uint32) d <<<= 13 |
| 367 | rol $13,%ebp |
| 368 | # x14 ^= d |
| 369 | xor %rbp,%rbx |
| 370 | # d = x13 + x14 |
| 371 | lea (%r13,%rbx),%rbp |
| 372 | # (uint32) d <<<= 18 |
| 373 | rol $18,%ebp |
| 374 | # x15 ^= d |
| 375 | xor %rbp,%r15 |
| 376 | # x15_stack = x15 |
| 377 | movq %r15,176(%rsp) |
| 378 | # x5 = x5_stack |
| 379 | movq 160(%rsp),%r15 |
| 380 | # a = x12 + x0 |
| 381 | lea (%r14,%rdx),%rbp |
| 382 | # (uint32) a <<<= 7 |
| 383 | rol $7,%ebp |
| 384 | # x4 ^= a |
| 385 | xor %rbp,%r9 |
| 386 | # b = x1 + x5 |
| 387 | lea (%rdi,%r15),%rbp |
| 388 | # (uint32) b <<<= 7 |
| 389 | rol $7,%ebp |
| 390 | # x9 ^= b |
| 391 | xor %rbp,%r10 |
| 392 | # a = x0 + x4 |
| 393 | lea (%rdx,%r9),%rbp |
| 394 | # (uint32) a <<<= 9 |
| 395 | rol $9,%ebp |
| 396 | # x8 ^= a |
| 397 | xor %rbp,%r11 |
| 398 | # b = x5 + x9 |
| 399 | lea (%r15,%r10),%rbp |
| 400 | # (uint32) b <<<= 9 |
| 401 | rol $9,%ebp |
| 402 | # x13 ^= b |
| 403 | xor %rbp,%r13 |
| 404 | # a = x4 + x8 |
| 405 | lea (%r9,%r11),%rbp |
| 406 | # (uint32) a <<<= 13 |
| 407 | rol $13,%ebp |
| 408 | # x12 ^= a |
| 409 | xor %rbp,%r14 |
| 410 | # b = x9 + x13 |
| 411 | lea (%r10,%r13),%rbp |
| 412 | # (uint32) b <<<= 13 |
| 413 | rol $13,%ebp |
| 414 | # x1 ^= b |
| 415 | xor %rbp,%rdi |
| 416 | # a = x8 + x12 |
| 417 | lea (%r11,%r14),%rbp |
| 418 | # (uint32) a <<<= 18 |
| 419 | rol $18,%ebp |
| 420 | # x0 ^= a |
| 421 | xor %rbp,%rdx |
| 422 | # b = x13 + x1 |
| 423 | lea (%r13,%rdi),%rbp |
| 424 | # (uint32) b <<<= 18 |
| 425 | rol $18,%ebp |
| 426 | # x5 ^= b |
| 427 | xor %rbp,%r15 |
| 428 | # x10 = x10_stack |
| 429 | movq 168(%rsp),%rbp |
| 430 | # x5_stack = x5 |
| 431 | movq %r15,160(%rsp) |
| 432 | # c = x6 + x10 |
| 433 | lea (%rax,%rbp),%r15 |
| 434 | # (uint32) c <<<= 7 |
| 435 | rol $7,%r15d |
| 436 | # x14 ^= c |
| 437 | xor %r15,%rbx |
| 438 | # c = x10 + x14 |
| 439 | lea (%rbp,%rbx),%r15 |
| 440 | # (uint32) c <<<= 9 |
| 441 | rol $9,%r15d |
| 442 | # x2 ^= c |
| 443 | xor %r15,%rcx |
| 444 | # c = x14 + x2 |
| 445 | lea (%rbx,%rcx),%r15 |
| 446 | # (uint32) c <<<= 13 |
| 447 | rol $13,%r15d |
| 448 | # x6 ^= c |
| 449 | xor %r15,%rax |
| 450 | # c = x2 + x6 |
| 451 | lea (%rcx,%rax),%r15 |
| 452 | # (uint32) c <<<= 18 |
| 453 | rol $18,%r15d |
| 454 | # x10 ^= c |
| 455 | xor %r15,%rbp |
| 456 | # x15 = x15_stack |
| 457 | movq 176(%rsp),%r15 |
| 458 | # x10_stack = x10 |
| 459 | movq %rbp,168(%rsp) |
| 460 | # d = x11 + x15 |
| 461 | lea (%r12,%r15),%rbp |
| 462 | # (uint32) d <<<= 7 |
| 463 | rol $7,%ebp |
| 464 | # x3 ^= d |
| 465 | xor %rbp,%rsi |
| 466 | # d = x15 + x3 |
| 467 | lea (%r15,%rsi),%rbp |
| 468 | # (uint32) d <<<= 9 |
| 469 | rol $9,%ebp |
| 470 | # x7 ^= d |
| 471 | xor %rbp,%r8 |
| 472 | # d = x3 + x7 |
| 473 | lea (%rsi,%r8),%rbp |
| 474 | # (uint32) d <<<= 13 |
| 475 | rol $13,%ebp |
| 476 | # x11 ^= d |
| 477 | xor %rbp,%r12 |
| 478 | # d = x7 + x11 |
| 479 | lea (%r8,%r12),%rbp |
| 480 | # (uint32) d <<<= 18 |
| 481 | rol $18,%ebp |
| 482 | # x15 ^= d |
| 483 | xor %rbp,%r15 |
| 484 | # x15_stack = x15 |
| 485 | movq %r15,176(%rsp) |
| 486 | # x5 = x5_stack |
| 487 | movq 160(%rsp),%r15 |
| 488 | # a = x3 + x0 |
| 489 | lea (%rsi,%rdx),%rbp |
| 490 | # (uint32) a <<<= 7 |
| 491 | rol $7,%ebp |
| 492 | # x1 ^= a |
| 493 | xor %rbp,%rdi |
| 494 | # b = x4 + x5 |
| 495 | lea (%r9,%r15),%rbp |
| 496 | # (uint32) b <<<= 7 |
| 497 | rol $7,%ebp |
| 498 | # x6 ^= b |
| 499 | xor %rbp,%rax |
| 500 | # a = x0 + x1 |
| 501 | lea (%rdx,%rdi),%rbp |
| 502 | # (uint32) a <<<= 9 |
| 503 | rol $9,%ebp |
| 504 | # x2 ^= a |
| 505 | xor %rbp,%rcx |
| 506 | # b = x5 + x6 |
| 507 | lea (%r15,%rax),%rbp |
| 508 | # (uint32) b <<<= 9 |
| 509 | rol $9,%ebp |
| 510 | # x7 ^= b |
| 511 | xor %rbp,%r8 |
| 512 | # a = x1 + x2 |
| 513 | lea (%rdi,%rcx),%rbp |
| 514 | # (uint32) a <<<= 13 |
| 515 | rol $13,%ebp |
| 516 | # x3 ^= a |
| 517 | xor %rbp,%rsi |
| 518 | # b = x6 + x7 |
| 519 | lea (%rax,%r8),%rbp |
| 520 | # (uint32) b <<<= 13 |
| 521 | rol $13,%ebp |
| 522 | # x4 ^= b |
| 523 | xor %rbp,%r9 |
| 524 | # a = x2 + x3 |
| 525 | lea (%rcx,%rsi),%rbp |
| 526 | # (uint32) a <<<= 18 |
| 527 | rol $18,%ebp |
| 528 | # x0 ^= a |
| 529 | xor %rbp,%rdx |
| 530 | # b = x7 + x4 |
| 531 | lea (%r8,%r9),%rbp |
| 532 | # (uint32) b <<<= 18 |
| 533 | rol $18,%ebp |
| 534 | # x5 ^= b |
| 535 | xor %rbp,%r15 |
| 536 | # x10 = x10_stack |
| 537 | movq 168(%rsp),%rbp |
| 538 | # x5_stack = x5 |
| 539 | movq %r15,160(%rsp) |
| 540 | # c = x9 + x10 |
| 541 | lea (%r10,%rbp),%r15 |
| 542 | # (uint32) c <<<= 7 |
| 543 | rol $7,%r15d |
| 544 | # x11 ^= c |
| 545 | xor %r15,%r12 |
| 546 | # c = x10 + x11 |
| 547 | lea (%rbp,%r12),%r15 |
| 548 | # (uint32) c <<<= 9 |
| 549 | rol $9,%r15d |
| 550 | # x8 ^= c |
| 551 | xor %r15,%r11 |
| 552 | # c = x11 + x8 |
| 553 | lea (%r12,%r11),%r15 |
| 554 | # (uint32) c <<<= 13 |
| 555 | rol $13,%r15d |
| 556 | # x9 ^= c |
| 557 | xor %r15,%r10 |
| 558 | # c = x8 + x9 |
| 559 | lea (%r11,%r10),%r15 |
| 560 | # (uint32) c <<<= 18 |
| 561 | rol $18,%r15d |
| 562 | # x10 ^= c |
| 563 | xor %r15,%rbp |
| 564 | # x15 = x15_stack |
| 565 | movq 176(%rsp),%r15 |
| 566 | # x10_stack = x10 |
| 567 | movq %rbp,168(%rsp) |
| 568 | # d = x14 + x15 |
| 569 | lea (%rbx,%r15),%rbp |
| 570 | # (uint32) d <<<= 7 |
| 571 | rol $7,%ebp |
| 572 | # x12 ^= d |
| 573 | xor %rbp,%r14 |
| 574 | # d = x15 + x12 |
| 575 | lea (%r15,%r14),%rbp |
| 576 | # (uint32) d <<<= 9 |
| 577 | rol $9,%ebp |
| 578 | # x13 ^= d |
| 579 | xor %rbp,%r13 |
| 580 | # d = x12 + x13 |
| 581 | lea (%r14,%r13),%rbp |
| 582 | # (uint32) d <<<= 13 |
| 583 | rol $13,%ebp |
| 584 | # x14 ^= d |
| 585 | xor %rbp,%rbx |
| 586 | # d = x13 + x14 |
| 587 | lea (%r13,%rbx),%rbp |
| 588 | # (uint32) d <<<= 18 |
| 589 | rol $18,%ebp |
| 590 | # x15 ^= d |
| 591 | xor %rbp,%r15 |
| 592 | # x15_stack = x15 |
| 593 | movq %r15,176(%rsp) |
| 594 | # i = i_backup |
| 595 | movq 184(%rsp),%r15 |
| 596 | # unsigned>? i -= 4 |
| 597 | sub $4,%r15 |
| 598 | # comment:fp stack unchanged by jump |
| 599 | # goto mainloop if unsigned> |
| 600 | ja ._mainloop |
| 601 | # (uint32) x2 += j2 |
| 602 | addl 64(%rsp),%ecx |
| 603 | # x3 <<= 32 |
| 604 | shl $32,%rsi |
| 605 | # x3 += j2 |
| 606 | addq 64(%rsp),%rsi |
| 607 | # (uint64) x3 >>= 32 |
| 608 | shr $32,%rsi |
| 609 | # x3 <<= 32 |
| 610 | shl $32,%rsi |
| 611 | # x2 += x3 |
| 612 | add %rsi,%rcx |
| 613 | # (uint32) x6 += j6 |
| 614 | addl 80(%rsp),%eax |
| 615 | # x7 <<= 32 |
| 616 | shl $32,%r8 |
| 617 | # x7 += j6 |
| 618 | addq 80(%rsp),%r8 |
| 619 | # (uint64) x7 >>= 32 |
| 620 | shr $32,%r8 |
| 621 | # x7 <<= 32 |
| 622 | shl $32,%r8 |
| 623 | # x6 += x7 |
| 624 | add %r8,%rax |
| 625 | # (uint32) x8 += j8 |
| 626 | addl 88(%rsp),%r11d |
| 627 | # x9 <<= 32 |
| 628 | shl $32,%r10 |
| 629 | # x9 += j8 |
| 630 | addq 88(%rsp),%r10 |
| 631 | # (uint64) x9 >>= 32 |
| 632 | shr $32,%r10 |
| 633 | # x9 <<= 32 |
| 634 | shl $32,%r10 |
| 635 | # x8 += x9 |
| 636 | add %r10,%r11 |
| 637 | # (uint32) x12 += j12 |
| 638 | addl 104(%rsp),%r14d |
| 639 | # x13 <<= 32 |
| 640 | shl $32,%r13 |
| 641 | # x13 += j12 |
| 642 | addq 104(%rsp),%r13 |
| 643 | # (uint64) x13 >>= 32 |
| 644 | shr $32,%r13 |
| 645 | # x13 <<= 32 |
| 646 | shl $32,%r13 |
| 647 | # x12 += x13 |
| 648 | add %r13,%r14 |
| 649 | # (uint32) x0 += j0 |
| 650 | addl 56(%rsp),%edx |
| 651 | # x1 <<= 32 |
| 652 | shl $32,%rdi |
| 653 | # x1 += j0 |
| 654 | addq 56(%rsp),%rdi |
| 655 | # (uint64) x1 >>= 32 |
| 656 | shr $32,%rdi |
| 657 | # x1 <<= 32 |
| 658 | shl $32,%rdi |
| 659 | # x0 += x1 |
| 660 | add %rdi,%rdx |
| 661 | # x5 = x5_stack |
| 662 | movq 160(%rsp),%rdi |
| 663 | # (uint32) x4 += j4 |
| 664 | addl 72(%rsp),%r9d |
| 665 | # x5 <<= 32 |
| 666 | shl $32,%rdi |
| 667 | # x5 += j4 |
| 668 | addq 72(%rsp),%rdi |
| 669 | # (uint64) x5 >>= 32 |
| 670 | shr $32,%rdi |
| 671 | # x5 <<= 32 |
| 672 | shl $32,%rdi |
| 673 | # x4 += x5 |
| 674 | add %rdi,%r9 |
| 675 | # x10 = x10_stack |
| 676 | movq 168(%rsp),%r8 |
| 677 | # (uint32) x10 += j10 |
| 678 | addl 96(%rsp),%r8d |
| 679 | # x11 <<= 32 |
| 680 | shl $32,%r12 |
| 681 | # x11 += j10 |
| 682 | addq 96(%rsp),%r12 |
| 683 | # (uint64) x11 >>= 32 |
| 684 | shr $32,%r12 |
| 685 | # x11 <<= 32 |
| 686 | shl $32,%r12 |
| 687 | # x10 += x11 |
| 688 | add %r12,%r8 |
| 689 | # x15 = x15_stack |
| 690 | movq 176(%rsp),%rdi |
| 691 | # (uint32) x14 += j14 |
| 692 | addl 112(%rsp),%ebx |
| 693 | # x15 <<= 32 |
| 694 | shl $32,%rdi |
| 695 | # x15 += j14 |
| 696 | addq 112(%rsp),%rdi |
| 697 | # (uint64) x15 >>= 32 |
| 698 | shr $32,%rdi |
| 699 | # x15 <<= 32 |
| 700 | shl $32,%rdi |
| 701 | # x14 += x15 |
| 702 | add %rdi,%rbx |
| 703 | # out = out_backup |
| 704 | movq 136(%rsp),%rdi |
| 705 | # m = m_backup |
| 706 | movq 144(%rsp),%rsi |
| 707 | # x0 ^= *(uint64 *) (m + 0) |
| 708 | xorq 0(%rsi),%rdx |
| 709 | # *(uint64 *) (out + 0) = x0 |
| 710 | movq %rdx,0(%rdi) |
| 711 | # x2 ^= *(uint64 *) (m + 8) |
| 712 | xorq 8(%rsi),%rcx |
| 713 | # *(uint64 *) (out + 8) = x2 |
| 714 | movq %rcx,8(%rdi) |
| 715 | # x4 ^= *(uint64 *) (m + 16) |
| 716 | xorq 16(%rsi),%r9 |
| 717 | # *(uint64 *) (out + 16) = x4 |
| 718 | movq %r9,16(%rdi) |
| 719 | # x6 ^= *(uint64 *) (m + 24) |
| 720 | xorq 24(%rsi),%rax |
| 721 | # *(uint64 *) (out + 24) = x6 |
| 722 | movq %rax,24(%rdi) |
| 723 | # x8 ^= *(uint64 *) (m + 32) |
| 724 | xorq 32(%rsi),%r11 |
| 725 | # *(uint64 *) (out + 32) = x8 |
| 726 | movq %r11,32(%rdi) |
| 727 | # x10 ^= *(uint64 *) (m + 40) |
| 728 | xorq 40(%rsi),%r8 |
| 729 | # *(uint64 *) (out + 40) = x10 |
| 730 | movq %r8,40(%rdi) |
| 731 | # x12 ^= *(uint64 *) (m + 48) |
| 732 | xorq 48(%rsi),%r14 |
| 733 | # *(uint64 *) (out + 48) = x12 |
| 734 | movq %r14,48(%rdi) |
| 735 | # x14 ^= *(uint64 *) (m + 56) |
| 736 | xorq 56(%rsi),%rbx |
| 737 | # *(uint64 *) (out + 56) = x14 |
| 738 | movq %rbx,56(%rdi) |
| 739 | # bytes = bytes_backup |
| 740 | movq 152(%rsp),%rdx |
| 741 | # in8 = j8 |
| 742 | movq 88(%rsp),%rcx |
| 743 | # in8 += 1 |
| 744 | add $1,%rcx |
| 745 | # j8 = in8 |
| 746 | movq %rcx,88(%rsp) |
| 747 | # unsigned>? unsigned<? bytes - 64 |
| 748 | cmp $64,%rdx |
| 749 | # comment:fp stack unchanged by jump |
| 750 | # goto bytesatleast65 if unsigned> |
| 751 | ja ._bytesatleast65 |
| 752 | # comment:fp stack unchanged by jump |
| 753 | # goto bytesatleast64 if !unsigned< |
| 754 | jae ._bytesatleast64 |
| 755 | # m = out |
| 756 | mov %rdi,%rsi |
| 757 | # out = ctarget |
| 758 | movq 128(%rsp),%rdi |
| 759 | # i = bytes |
| 760 | mov %rdx,%rcx |
| 761 | # while (i) { *out++ = *m++; --i } |
| 762 | rep movsb |
| 763 | # comment:fp stack unchanged by fallthrough |
| 764 | # bytesatleast64: |
| 765 | ._bytesatleast64: |
| 766 | # x = x_backup |
| 767 | movq 120(%rsp),%rdi |
| 768 | # in8 = j8 |
| 769 | movq 88(%rsp),%rsi |
| 770 | # *(uint64 *) (x + 32) = in8 |
| 771 | movq %rsi,32(%rdi) |
| 772 | # r11 = r11_stack |
| 773 | movq 0(%rsp),%r11 |
| 774 | # r12 = r12_stack |
| 775 | movq 8(%rsp),%r12 |
| 776 | # r13 = r13_stack |
| 777 | movq 16(%rsp),%r13 |
| 778 | # r14 = r14_stack |
| 779 | movq 24(%rsp),%r14 |
| 780 | # r15 = r15_stack |
| 781 | movq 32(%rsp),%r15 |
| 782 | # rbx = rbx_stack |
| 783 | movq 40(%rsp),%rbx |
| 784 | # rbp = rbp_stack |
| 785 | movq 48(%rsp),%rbp |
| 786 | # comment:fp stack unchanged by fallthrough |
| 787 | # done: |
| 788 | ._done: |
| 789 | # leave |
| 790 | add %r11,%rsp |
| 791 | mov %rdi,%rax |
| 792 | mov %rsi,%rdx |
| 793 | ret |
| 794 | # bytesatleast65: |
| 795 | ._bytesatleast65: |
| 796 | # bytes -= 64 |
| 797 | sub $64,%rdx |
| 798 | # out += 64 |
| 799 | add $64,%rdi |
| 800 | # m += 64 |
| 801 | add $64,%rsi |
| 802 | # comment:fp stack unchanged by jump |
| 803 | # goto bytesatleast1 |
| 804 | jmp ._bytesatleast1 |
| 805 | # enter ECRYPT_keysetup |
| 806 | .text |
| 807 | .p2align 5 |
| 808 | .globl ECRYPT_keysetup |
| 809 | ECRYPT_keysetup: |
| 810 | mov %rsp,%r11 |
| 811 | and $31,%r11 |
| 812 | add $256,%r11 |
| 813 | sub %r11,%rsp |
| 814 | # k = arg2 |
| 815 | mov %rsi,%rsi |
| 816 | # kbits = arg3 |
| 817 | mov %rdx,%rdx |
| 818 | # x = arg1 |
| 819 | mov %rdi,%rdi |
| 820 | # in0 = *(uint64 *) (k + 0) |
| 821 | movq 0(%rsi),%r8 |
| 822 | # in2 = *(uint64 *) (k + 8) |
| 823 | movq 8(%rsi),%r9 |
| 824 | # *(uint64 *) (x + 4) = in0 |
| 825 | movq %r8,4(%rdi) |
| 826 | # *(uint64 *) (x + 12) = in2 |
| 827 | movq %r9,12(%rdi) |
| 828 | # unsigned<? kbits - 256 |
| 829 | cmp $256,%rdx |
| 830 | # comment:fp stack unchanged by jump |
| 831 | # goto kbits128 if unsigned< |
| 832 | jb ._kbits128 |
| 833 | # kbits256: |
| 834 | ._kbits256: |
| 835 | # in10 = *(uint64 *) (k + 16) |
| 836 | movq 16(%rsi),%rdx |
| 837 | # in12 = *(uint64 *) (k + 24) |
| 838 | movq 24(%rsi),%rsi |
| 839 | # *(uint64 *) (x + 44) = in10 |
| 840 | movq %rdx,44(%rdi) |
| 841 | # *(uint64 *) (x + 52) = in12 |
| 842 | movq %rsi,52(%rdi) |
| 843 | # in0 = 1634760805 |
| 844 | mov $1634760805,%rsi |
| 845 | # in4 = 857760878 |
| 846 | mov $857760878,%rdx |
| 847 | # in10 = 2036477234 |
| 848 | mov $2036477234,%rcx |
| 849 | # in14 = 1797285236 |
| 850 | mov $1797285236,%r8 |
| 851 | # *(uint32 *) (x + 0) = in0 |
| 852 | movl %esi,0(%rdi) |
| 853 | # *(uint32 *) (x + 20) = in4 |
| 854 | movl %edx,20(%rdi) |
| 855 | # *(uint32 *) (x + 40) = in10 |
| 856 | movl %ecx,40(%rdi) |
| 857 | # *(uint32 *) (x + 60) = in14 |
| 858 | movl %r8d,60(%rdi) |
| 859 | # comment:fp stack unchanged by jump |
| 860 | # goto keysetupdone |
| 861 | jmp ._keysetupdone |
| 862 | # kbits128: |
| 863 | ._kbits128: |
| 864 | # in10 = *(uint64 *) (k + 0) |
| 865 | movq 0(%rsi),%rdx |
| 866 | # in12 = *(uint64 *) (k + 8) |
| 867 | movq 8(%rsi),%rsi |
| 868 | # *(uint64 *) (x + 44) = in10 |
| 869 | movq %rdx,44(%rdi) |
| 870 | # *(uint64 *) (x + 52) = in12 |
| 871 | movq %rsi,52(%rdi) |
| 872 | # in0 = 1634760805 |
| 873 | mov $1634760805,%rsi |
| 874 | # in4 = 824206446 |
| 875 | mov $824206446,%rdx |
| 876 | # in10 = 2036477238 |
| 877 | mov $2036477238,%rcx |
| 878 | # in14 = 1797285236 |
| 879 | mov $1797285236,%r8 |
| 880 | # *(uint32 *) (x + 0) = in0 |
| 881 | movl %esi,0(%rdi) |
| 882 | # *(uint32 *) (x + 20) = in4 |
| 883 | movl %edx,20(%rdi) |
| 884 | # *(uint32 *) (x + 40) = in10 |
| 885 | movl %ecx,40(%rdi) |
| 886 | # *(uint32 *) (x + 60) = in14 |
| 887 | movl %r8d,60(%rdi) |
| 888 | # keysetupdone: |
| 889 | ._keysetupdone: |
| 890 | # leave |
| 891 | add %r11,%rsp |
| 892 | mov %rdi,%rax |
| 893 | mov %rsi,%rdx |
| 894 | ret |
| 895 | # enter ECRYPT_ivsetup |
| 896 | .text |
| 897 | .p2align 5 |
| 898 | .globl ECRYPT_ivsetup |
| 899 | ECRYPT_ivsetup: |
| 900 | mov %rsp,%r11 |
| 901 | and $31,%r11 |
| 902 | add $256,%r11 |
| 903 | sub %r11,%rsp |
| 904 | # iv = arg2 |
| 905 | mov %rsi,%rsi |
| 906 | # x = arg1 |
| 907 | mov %rdi,%rdi |
| 908 | # in6 = *(uint64 *) (iv + 0) |
| 909 | movq 0(%rsi),%rsi |
| 910 | # in8 = 0 |
| 911 | mov $0,%r8 |
| 912 | # *(uint64 *) (x + 24) = in6 |
| 913 | movq %rsi,24(%rdi) |
| 914 | # *(uint64 *) (x + 32) = in8 |
| 915 | movq %r8,32(%rdi) |
| 916 | # leave |
| 917 | add %r11,%rsp |
| 918 | mov %rdi,%rax |
| 919 | mov %rsi,%rdx |
| 920 | ret |