Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 1 | default rel |
| 2 | %define XMMWORD |
| 3 | %define YMMWORD |
| 4 | %define ZMMWORD |
| 5 | section .text code align=64 |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 6 | |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 7 | |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 8 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 9 | ALIGN 32 |
| 10 | _aesni_ctr32_ghash_6x: |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 11 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 12 | vmovdqu xmm2,XMMWORD[32+r11] |
| 13 | sub rdx,6 |
| 14 | vpxor xmm4,xmm4,xmm4 |
| 15 | vmovdqu xmm15,XMMWORD[((0-128))+rcx] |
| 16 | vpaddb xmm10,xmm1,xmm2 |
| 17 | vpaddb xmm11,xmm10,xmm2 |
| 18 | vpaddb xmm12,xmm11,xmm2 |
| 19 | vpaddb xmm13,xmm12,xmm2 |
| 20 | vpaddb xmm14,xmm13,xmm2 |
| 21 | vpxor xmm9,xmm1,xmm15 |
| 22 | vmovdqu XMMWORD[(16+8)+rsp],xmm4 |
| 23 | jmp NEAR $L$oop6x |
| 24 | |
| 25 | ALIGN 32 |
| 26 | $L$oop6x: |
| 27 | add ebx,100663296 |
| 28 | jc NEAR $L$handle_ctr32 |
| 29 | vmovdqu xmm3,XMMWORD[((0-32))+r9] |
| 30 | vpaddb xmm1,xmm14,xmm2 |
| 31 | vpxor xmm10,xmm10,xmm15 |
| 32 | vpxor xmm11,xmm11,xmm15 |
| 33 | |
| 34 | $L$resume_ctr32: |
| 35 | vmovdqu XMMWORD[r8],xmm1 |
| 36 | vpclmulqdq xmm5,xmm7,xmm3,0x10 |
| 37 | vpxor xmm12,xmm12,xmm15 |
| 38 | vmovups xmm2,XMMWORD[((16-128))+rcx] |
| 39 | vpclmulqdq xmm6,xmm7,xmm3,0x01 |
| 40 | |
| 41 | |
| 42 | |
| 43 | |
| 44 | |
| 45 | |
| 46 | |
| 47 | |
| 48 | |
| 49 | |
| 50 | |
| 51 | |
| 52 | |
| 53 | |
| 54 | |
| 55 | |
| 56 | |
| 57 | xor r12,r12 |
| 58 | cmp r15,r14 |
| 59 | |
| 60 | vaesenc xmm9,xmm9,xmm2 |
| 61 | vmovdqu xmm0,XMMWORD[((48+8))+rsp] |
| 62 | vpxor xmm13,xmm13,xmm15 |
| 63 | vpclmulqdq xmm1,xmm7,xmm3,0x00 |
| 64 | vaesenc xmm10,xmm10,xmm2 |
| 65 | vpxor xmm14,xmm14,xmm15 |
| 66 | setnc r12b |
| 67 | vpclmulqdq xmm7,xmm7,xmm3,0x11 |
| 68 | vaesenc xmm11,xmm11,xmm2 |
| 69 | vmovdqu xmm3,XMMWORD[((16-32))+r9] |
| 70 | neg r12 |
| 71 | vaesenc xmm12,xmm12,xmm2 |
| 72 | vpxor xmm6,xmm6,xmm5 |
| 73 | vpclmulqdq xmm5,xmm0,xmm3,0x00 |
| 74 | vpxor xmm8,xmm8,xmm4 |
| 75 | vaesenc xmm13,xmm13,xmm2 |
| 76 | vpxor xmm4,xmm1,xmm5 |
| 77 | and r12,0x60 |
| 78 | vmovups xmm15,XMMWORD[((32-128))+rcx] |
| 79 | vpclmulqdq xmm1,xmm0,xmm3,0x10 |
| 80 | vaesenc xmm14,xmm14,xmm2 |
| 81 | |
| 82 | vpclmulqdq xmm2,xmm0,xmm3,0x01 |
| 83 | lea r14,[r12*1+r14] |
| 84 | vaesenc xmm9,xmm9,xmm15 |
| 85 | vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] |
| 86 | vpclmulqdq xmm3,xmm0,xmm3,0x11 |
| 87 | vmovdqu xmm0,XMMWORD[((64+8))+rsp] |
| 88 | vaesenc xmm10,xmm10,xmm15 |
| 89 | movbe r13,QWORD[88+r14] |
| 90 | vaesenc xmm11,xmm11,xmm15 |
| 91 | movbe r12,QWORD[80+r14] |
| 92 | vaesenc xmm12,xmm12,xmm15 |
| 93 | mov QWORD[((32+8))+rsp],r13 |
| 94 | vaesenc xmm13,xmm13,xmm15 |
| 95 | mov QWORD[((40+8))+rsp],r12 |
| 96 | vmovdqu xmm5,XMMWORD[((48-32))+r9] |
| 97 | vaesenc xmm14,xmm14,xmm15 |
| 98 | |
| 99 | vmovups xmm15,XMMWORD[((48-128))+rcx] |
| 100 | vpxor xmm6,xmm6,xmm1 |
| 101 | vpclmulqdq xmm1,xmm0,xmm5,0x00 |
| 102 | vaesenc xmm9,xmm9,xmm15 |
| 103 | vpxor xmm6,xmm6,xmm2 |
| 104 | vpclmulqdq xmm2,xmm0,xmm5,0x10 |
| 105 | vaesenc xmm10,xmm10,xmm15 |
| 106 | vpxor xmm7,xmm7,xmm3 |
| 107 | vpclmulqdq xmm3,xmm0,xmm5,0x01 |
| 108 | vaesenc xmm11,xmm11,xmm15 |
| 109 | vpclmulqdq xmm5,xmm0,xmm5,0x11 |
| 110 | vmovdqu xmm0,XMMWORD[((80+8))+rsp] |
| 111 | vaesenc xmm12,xmm12,xmm15 |
| 112 | vaesenc xmm13,xmm13,xmm15 |
| 113 | vpxor xmm4,xmm4,xmm1 |
| 114 | vmovdqu xmm1,XMMWORD[((64-32))+r9] |
| 115 | vaesenc xmm14,xmm14,xmm15 |
| 116 | |
| 117 | vmovups xmm15,XMMWORD[((64-128))+rcx] |
| 118 | vpxor xmm6,xmm6,xmm2 |
| 119 | vpclmulqdq xmm2,xmm0,xmm1,0x00 |
| 120 | vaesenc xmm9,xmm9,xmm15 |
| 121 | vpxor xmm6,xmm6,xmm3 |
| 122 | vpclmulqdq xmm3,xmm0,xmm1,0x10 |
| 123 | vaesenc xmm10,xmm10,xmm15 |
| 124 | movbe r13,QWORD[72+r14] |
| 125 | vpxor xmm7,xmm7,xmm5 |
| 126 | vpclmulqdq xmm5,xmm0,xmm1,0x01 |
| 127 | vaesenc xmm11,xmm11,xmm15 |
| 128 | movbe r12,QWORD[64+r14] |
| 129 | vpclmulqdq xmm1,xmm0,xmm1,0x11 |
| 130 | vmovdqu xmm0,XMMWORD[((96+8))+rsp] |
| 131 | vaesenc xmm12,xmm12,xmm15 |
| 132 | mov QWORD[((48+8))+rsp],r13 |
| 133 | vaesenc xmm13,xmm13,xmm15 |
| 134 | mov QWORD[((56+8))+rsp],r12 |
| 135 | vpxor xmm4,xmm4,xmm2 |
| 136 | vmovdqu xmm2,XMMWORD[((96-32))+r9] |
| 137 | vaesenc xmm14,xmm14,xmm15 |
| 138 | |
| 139 | vmovups xmm15,XMMWORD[((80-128))+rcx] |
| 140 | vpxor xmm6,xmm6,xmm3 |
| 141 | vpclmulqdq xmm3,xmm0,xmm2,0x00 |
| 142 | vaesenc xmm9,xmm9,xmm15 |
| 143 | vpxor xmm6,xmm6,xmm5 |
| 144 | vpclmulqdq xmm5,xmm0,xmm2,0x10 |
| 145 | vaesenc xmm10,xmm10,xmm15 |
| 146 | movbe r13,QWORD[56+r14] |
| 147 | vpxor xmm7,xmm7,xmm1 |
| 148 | vpclmulqdq xmm1,xmm0,xmm2,0x01 |
| 149 | vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] |
| 150 | vaesenc xmm11,xmm11,xmm15 |
| 151 | movbe r12,QWORD[48+r14] |
| 152 | vpclmulqdq xmm2,xmm0,xmm2,0x11 |
| 153 | vaesenc xmm12,xmm12,xmm15 |
| 154 | mov QWORD[((64+8))+rsp],r13 |
| 155 | vaesenc xmm13,xmm13,xmm15 |
| 156 | mov QWORD[((72+8))+rsp],r12 |
| 157 | vpxor xmm4,xmm4,xmm3 |
| 158 | vmovdqu xmm3,XMMWORD[((112-32))+r9] |
| 159 | vaesenc xmm14,xmm14,xmm15 |
| 160 | |
| 161 | vmovups xmm15,XMMWORD[((96-128))+rcx] |
| 162 | vpxor xmm6,xmm6,xmm5 |
| 163 | vpclmulqdq xmm5,xmm8,xmm3,0x10 |
| 164 | vaesenc xmm9,xmm9,xmm15 |
| 165 | vpxor xmm6,xmm6,xmm1 |
| 166 | vpclmulqdq xmm1,xmm8,xmm3,0x01 |
| 167 | vaesenc xmm10,xmm10,xmm15 |
| 168 | movbe r13,QWORD[40+r14] |
| 169 | vpxor xmm7,xmm7,xmm2 |
| 170 | vpclmulqdq xmm2,xmm8,xmm3,0x00 |
| 171 | vaesenc xmm11,xmm11,xmm15 |
| 172 | movbe r12,QWORD[32+r14] |
| 173 | vpclmulqdq xmm8,xmm8,xmm3,0x11 |
| 174 | vaesenc xmm12,xmm12,xmm15 |
| 175 | mov QWORD[((80+8))+rsp],r13 |
| 176 | vaesenc xmm13,xmm13,xmm15 |
| 177 | mov QWORD[((88+8))+rsp],r12 |
| 178 | vpxor xmm6,xmm6,xmm5 |
| 179 | vaesenc xmm14,xmm14,xmm15 |
| 180 | vpxor xmm6,xmm6,xmm1 |
| 181 | |
| 182 | vmovups xmm15,XMMWORD[((112-128))+rcx] |
| 183 | vpslldq xmm5,xmm6,8 |
| 184 | vpxor xmm4,xmm4,xmm2 |
| 185 | vmovdqu xmm3,XMMWORD[16+r11] |
| 186 | |
| 187 | vaesenc xmm9,xmm9,xmm15 |
| 188 | vpxor xmm7,xmm7,xmm8 |
| 189 | vaesenc xmm10,xmm10,xmm15 |
| 190 | vpxor xmm4,xmm4,xmm5 |
| 191 | movbe r13,QWORD[24+r14] |
| 192 | vaesenc xmm11,xmm11,xmm15 |
| 193 | movbe r12,QWORD[16+r14] |
| 194 | vpalignr xmm0,xmm4,xmm4,8 |
| 195 | vpclmulqdq xmm4,xmm4,xmm3,0x10 |
| 196 | mov QWORD[((96+8))+rsp],r13 |
| 197 | vaesenc xmm12,xmm12,xmm15 |
| 198 | mov QWORD[((104+8))+rsp],r12 |
| 199 | vaesenc xmm13,xmm13,xmm15 |
| 200 | vmovups xmm1,XMMWORD[((128-128))+rcx] |
| 201 | vaesenc xmm14,xmm14,xmm15 |
| 202 | |
| 203 | vaesenc xmm9,xmm9,xmm1 |
| 204 | vmovups xmm15,XMMWORD[((144-128))+rcx] |
| 205 | vaesenc xmm10,xmm10,xmm1 |
| 206 | vpsrldq xmm6,xmm6,8 |
| 207 | vaesenc xmm11,xmm11,xmm1 |
| 208 | vpxor xmm7,xmm7,xmm6 |
| 209 | vaesenc xmm12,xmm12,xmm1 |
| 210 | vpxor xmm4,xmm4,xmm0 |
| 211 | movbe r13,QWORD[8+r14] |
| 212 | vaesenc xmm13,xmm13,xmm1 |
| 213 | movbe r12,QWORD[r14] |
| 214 | vaesenc xmm14,xmm14,xmm1 |
| 215 | vmovups xmm1,XMMWORD[((160-128))+rcx] |
| 216 | cmp ebp,11 |
| 217 | jb NEAR $L$enc_tail |
| 218 | |
| 219 | vaesenc xmm9,xmm9,xmm15 |
| 220 | vaesenc xmm10,xmm10,xmm15 |
| 221 | vaesenc xmm11,xmm11,xmm15 |
| 222 | vaesenc xmm12,xmm12,xmm15 |
| 223 | vaesenc xmm13,xmm13,xmm15 |
| 224 | vaesenc xmm14,xmm14,xmm15 |
| 225 | |
| 226 | vaesenc xmm9,xmm9,xmm1 |
| 227 | vaesenc xmm10,xmm10,xmm1 |
| 228 | vaesenc xmm11,xmm11,xmm1 |
| 229 | vaesenc xmm12,xmm12,xmm1 |
| 230 | vaesenc xmm13,xmm13,xmm1 |
| 231 | vmovups xmm15,XMMWORD[((176-128))+rcx] |
| 232 | vaesenc xmm14,xmm14,xmm1 |
| 233 | vmovups xmm1,XMMWORD[((192-128))+rcx] |
| 234 | je NEAR $L$enc_tail |
| 235 | |
| 236 | vaesenc xmm9,xmm9,xmm15 |
| 237 | vaesenc xmm10,xmm10,xmm15 |
| 238 | vaesenc xmm11,xmm11,xmm15 |
| 239 | vaesenc xmm12,xmm12,xmm15 |
| 240 | vaesenc xmm13,xmm13,xmm15 |
| 241 | vaesenc xmm14,xmm14,xmm15 |
| 242 | |
| 243 | vaesenc xmm9,xmm9,xmm1 |
| 244 | vaesenc xmm10,xmm10,xmm1 |
| 245 | vaesenc xmm11,xmm11,xmm1 |
| 246 | vaesenc xmm12,xmm12,xmm1 |
| 247 | vaesenc xmm13,xmm13,xmm1 |
| 248 | vmovups xmm15,XMMWORD[((208-128))+rcx] |
| 249 | vaesenc xmm14,xmm14,xmm1 |
| 250 | vmovups xmm1,XMMWORD[((224-128))+rcx] |
| 251 | jmp NEAR $L$enc_tail |
| 252 | |
| 253 | ALIGN 32 |
| 254 | $L$handle_ctr32: |
| 255 | vmovdqu xmm0,XMMWORD[r11] |
| 256 | vpshufb xmm6,xmm1,xmm0 |
| 257 | vmovdqu xmm5,XMMWORD[48+r11] |
| 258 | vpaddd xmm10,xmm6,XMMWORD[64+r11] |
| 259 | vpaddd xmm11,xmm6,xmm5 |
| 260 | vmovdqu xmm3,XMMWORD[((0-32))+r9] |
| 261 | vpaddd xmm12,xmm10,xmm5 |
| 262 | vpshufb xmm10,xmm10,xmm0 |
| 263 | vpaddd xmm13,xmm11,xmm5 |
| 264 | vpshufb xmm11,xmm11,xmm0 |
| 265 | vpxor xmm10,xmm10,xmm15 |
| 266 | vpaddd xmm14,xmm12,xmm5 |
| 267 | vpshufb xmm12,xmm12,xmm0 |
| 268 | vpxor xmm11,xmm11,xmm15 |
| 269 | vpaddd xmm1,xmm13,xmm5 |
| 270 | vpshufb xmm13,xmm13,xmm0 |
| 271 | vpshufb xmm14,xmm14,xmm0 |
| 272 | vpshufb xmm1,xmm1,xmm0 |
| 273 | jmp NEAR $L$resume_ctr32 |
| 274 | |
| 275 | ALIGN 32 |
| 276 | $L$enc_tail: |
| 277 | vaesenc xmm9,xmm9,xmm15 |
| 278 | vmovdqu XMMWORD[(16+8)+rsp],xmm7 |
| 279 | vpalignr xmm8,xmm4,xmm4,8 |
| 280 | vaesenc xmm10,xmm10,xmm15 |
| 281 | vpclmulqdq xmm4,xmm4,xmm3,0x10 |
| 282 | vpxor xmm2,xmm1,XMMWORD[rdi] |
| 283 | vaesenc xmm11,xmm11,xmm15 |
| 284 | vpxor xmm0,xmm1,XMMWORD[16+rdi] |
| 285 | vaesenc xmm12,xmm12,xmm15 |
| 286 | vpxor xmm5,xmm1,XMMWORD[32+rdi] |
| 287 | vaesenc xmm13,xmm13,xmm15 |
| 288 | vpxor xmm6,xmm1,XMMWORD[48+rdi] |
| 289 | vaesenc xmm14,xmm14,xmm15 |
| 290 | vpxor xmm7,xmm1,XMMWORD[64+rdi] |
| 291 | vpxor xmm3,xmm1,XMMWORD[80+rdi] |
| 292 | vmovdqu xmm1,XMMWORD[r8] |
| 293 | |
| 294 | vaesenclast xmm9,xmm9,xmm2 |
| 295 | vmovdqu xmm2,XMMWORD[32+r11] |
| 296 | vaesenclast xmm10,xmm10,xmm0 |
| 297 | vpaddb xmm0,xmm1,xmm2 |
| 298 | mov QWORD[((112+8))+rsp],r13 |
| 299 | lea rdi,[96+rdi] |
| 300 | vaesenclast xmm11,xmm11,xmm5 |
| 301 | vpaddb xmm5,xmm0,xmm2 |
| 302 | mov QWORD[((120+8))+rsp],r12 |
| 303 | lea rsi,[96+rsi] |
| 304 | vmovdqu xmm15,XMMWORD[((0-128))+rcx] |
| 305 | vaesenclast xmm12,xmm12,xmm6 |
| 306 | vpaddb xmm6,xmm5,xmm2 |
| 307 | vaesenclast xmm13,xmm13,xmm7 |
| 308 | vpaddb xmm7,xmm6,xmm2 |
| 309 | vaesenclast xmm14,xmm14,xmm3 |
| 310 | vpaddb xmm3,xmm7,xmm2 |
| 311 | |
| 312 | add r10,0x60 |
| 313 | sub rdx,0x6 |
| 314 | jc NEAR $L$6x_done |
| 315 | |
| 316 | vmovups XMMWORD[(-96)+rsi],xmm9 |
| 317 | vpxor xmm9,xmm1,xmm15 |
| 318 | vmovups XMMWORD[(-80)+rsi],xmm10 |
| 319 | vmovdqa xmm10,xmm0 |
| 320 | vmovups XMMWORD[(-64)+rsi],xmm11 |
| 321 | vmovdqa xmm11,xmm5 |
| 322 | vmovups XMMWORD[(-48)+rsi],xmm12 |
| 323 | vmovdqa xmm12,xmm6 |
| 324 | vmovups XMMWORD[(-32)+rsi],xmm13 |
| 325 | vmovdqa xmm13,xmm7 |
| 326 | vmovups XMMWORD[(-16)+rsi],xmm14 |
| 327 | vmovdqa xmm14,xmm3 |
| 328 | vmovdqu xmm7,XMMWORD[((32+8))+rsp] |
| 329 | jmp NEAR $L$oop6x |
| 330 | |
| 331 | $L$6x_done: |
| 332 | vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] |
| 333 | vpxor xmm8,xmm8,xmm4 |
| 334 | |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 335 | DB 0F3h,0C3h ;repret |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 336 | |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 337 | |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 338 | global aesni_gcm_decrypt |
| 339 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 340 | ALIGN 32 |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 341 | aesni_gcm_decrypt: |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 342 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 343 | mov QWORD[16+rsp],rsi |
| 344 | mov rax,rsp |
| 345 | $L$SEH_begin_aesni_gcm_decrypt: |
| 346 | mov rdi,rcx |
| 347 | mov rsi,rdx |
| 348 | mov rdx,r8 |
| 349 | mov rcx,r9 |
| 350 | mov r8,QWORD[40+rsp] |
| 351 | mov r9,QWORD[48+rsp] |
| 352 | |
| 353 | |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 354 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 355 | xor r10,r10 |
| 356 | |
| 357 | |
| 358 | |
| 359 | cmp rdx,0x60 |
| 360 | jb NEAR $L$gcm_dec_abort |
| 361 | |
| 362 | lea rax,[rsp] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 363 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 364 | push rbx |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 365 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 366 | push rbp |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 367 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 368 | push r12 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 369 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 370 | push r13 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 371 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 372 | push r14 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 373 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 374 | push r15 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 375 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 376 | lea rsp,[((-168))+rsp] |
| 377 | movaps XMMWORD[(-216)+rax],xmm6 |
| 378 | movaps XMMWORD[(-200)+rax],xmm7 |
| 379 | movaps XMMWORD[(-184)+rax],xmm8 |
| 380 | movaps XMMWORD[(-168)+rax],xmm9 |
| 381 | movaps XMMWORD[(-152)+rax],xmm10 |
| 382 | movaps XMMWORD[(-136)+rax],xmm11 |
| 383 | movaps XMMWORD[(-120)+rax],xmm12 |
| 384 | movaps XMMWORD[(-104)+rax],xmm13 |
| 385 | movaps XMMWORD[(-88)+rax],xmm14 |
| 386 | movaps XMMWORD[(-72)+rax],xmm15 |
| 387 | $L$gcm_dec_body: |
| 388 | vzeroupper |
| 389 | |
| 390 | vmovdqu xmm1,XMMWORD[r8] |
| 391 | add rsp,-128 |
| 392 | mov ebx,DWORD[12+r8] |
| 393 | lea r11,[$L$bswap_mask] |
| 394 | lea r14,[((-128))+rcx] |
| 395 | mov r15,0xf80 |
| 396 | vmovdqu xmm8,XMMWORD[r9] |
| 397 | and rsp,-128 |
| 398 | vmovdqu xmm0,XMMWORD[r11] |
| 399 | lea rcx,[128+rcx] |
| 400 | lea r9,[((32+32))+r9] |
| 401 | mov ebp,DWORD[((240-128))+rcx] |
| 402 | vpshufb xmm8,xmm8,xmm0 |
| 403 | |
| 404 | and r14,r15 |
| 405 | and r15,rsp |
| 406 | sub r15,r14 |
| 407 | jc NEAR $L$dec_no_key_aliasing |
| 408 | cmp r15,768 |
| 409 | jnc NEAR $L$dec_no_key_aliasing |
| 410 | sub rsp,r15 |
| 411 | $L$dec_no_key_aliasing: |
| 412 | |
| 413 | vmovdqu xmm7,XMMWORD[80+rdi] |
| 414 | lea r14,[rdi] |
| 415 | vmovdqu xmm4,XMMWORD[64+rdi] |
| 416 | |
| 417 | |
| 418 | |
| 419 | |
| 420 | |
| 421 | |
| 422 | |
| 423 | lea r15,[((-192))+rdx*1+rdi] |
| 424 | |
| 425 | vmovdqu xmm5,XMMWORD[48+rdi] |
| 426 | shr rdx,4 |
| 427 | xor r10,r10 |
| 428 | vmovdqu xmm6,XMMWORD[32+rdi] |
| 429 | vpshufb xmm7,xmm7,xmm0 |
| 430 | vmovdqu xmm2,XMMWORD[16+rdi] |
| 431 | vpshufb xmm4,xmm4,xmm0 |
| 432 | vmovdqu xmm3,XMMWORD[rdi] |
| 433 | vpshufb xmm5,xmm5,xmm0 |
| 434 | vmovdqu XMMWORD[48+rsp],xmm4 |
| 435 | vpshufb xmm6,xmm6,xmm0 |
| 436 | vmovdqu XMMWORD[64+rsp],xmm5 |
| 437 | vpshufb xmm2,xmm2,xmm0 |
| 438 | vmovdqu XMMWORD[80+rsp],xmm6 |
| 439 | vpshufb xmm3,xmm3,xmm0 |
| 440 | vmovdqu XMMWORD[96+rsp],xmm2 |
| 441 | vmovdqu XMMWORD[112+rsp],xmm3 |
| 442 | |
| 443 | call _aesni_ctr32_ghash_6x |
| 444 | |
| 445 | vmovups XMMWORD[(-96)+rsi],xmm9 |
| 446 | vmovups XMMWORD[(-80)+rsi],xmm10 |
| 447 | vmovups XMMWORD[(-64)+rsi],xmm11 |
| 448 | vmovups XMMWORD[(-48)+rsi],xmm12 |
| 449 | vmovups XMMWORD[(-32)+rsi],xmm13 |
| 450 | vmovups XMMWORD[(-16)+rsi],xmm14 |
| 451 | |
| 452 | vpshufb xmm8,xmm8,XMMWORD[r11] |
| 453 | vmovdqu XMMWORD[(-64)+r9],xmm8 |
| 454 | |
| 455 | vzeroupper |
| 456 | movaps xmm6,XMMWORD[((-216))+rax] |
| 457 | movaps xmm7,XMMWORD[((-200))+rax] |
| 458 | movaps xmm8,XMMWORD[((-184))+rax] |
| 459 | movaps xmm9,XMMWORD[((-168))+rax] |
| 460 | movaps xmm10,XMMWORD[((-152))+rax] |
| 461 | movaps xmm11,XMMWORD[((-136))+rax] |
| 462 | movaps xmm12,XMMWORD[((-120))+rax] |
| 463 | movaps xmm13,XMMWORD[((-104))+rax] |
| 464 | movaps xmm14,XMMWORD[((-88))+rax] |
| 465 | movaps xmm15,XMMWORD[((-72))+rax] |
| 466 | mov r15,QWORD[((-48))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 467 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 468 | mov r14,QWORD[((-40))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 469 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 470 | mov r13,QWORD[((-32))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 471 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 472 | mov r12,QWORD[((-24))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 473 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 474 | mov rbp,QWORD[((-16))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 475 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 476 | mov rbx,QWORD[((-8))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 477 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 478 | lea rsp,[rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 479 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 480 | $L$gcm_dec_abort: |
| 481 | mov rax,r10 |
| 482 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 483 | mov rsi,QWORD[16+rsp] |
| 484 | DB 0F3h,0C3h ;repret |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 485 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 486 | $L$SEH_end_aesni_gcm_decrypt: |
| 487 | |
| 488 | ALIGN 32 |
| 489 | _aesni_ctr32_6x: |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 490 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 491 | vmovdqu xmm4,XMMWORD[((0-128))+rcx] |
| 492 | vmovdqu xmm2,XMMWORD[32+r11] |
| 493 | lea r13,[((-1))+rbp] |
| 494 | vmovups xmm15,XMMWORD[((16-128))+rcx] |
| 495 | lea r12,[((32-128))+rcx] |
| 496 | vpxor xmm9,xmm1,xmm4 |
| 497 | add ebx,100663296 |
| 498 | jc NEAR $L$handle_ctr32_2 |
| 499 | vpaddb xmm10,xmm1,xmm2 |
| 500 | vpaddb xmm11,xmm10,xmm2 |
| 501 | vpxor xmm10,xmm10,xmm4 |
| 502 | vpaddb xmm12,xmm11,xmm2 |
| 503 | vpxor xmm11,xmm11,xmm4 |
| 504 | vpaddb xmm13,xmm12,xmm2 |
| 505 | vpxor xmm12,xmm12,xmm4 |
| 506 | vpaddb xmm14,xmm13,xmm2 |
| 507 | vpxor xmm13,xmm13,xmm4 |
| 508 | vpaddb xmm1,xmm14,xmm2 |
| 509 | vpxor xmm14,xmm14,xmm4 |
| 510 | jmp NEAR $L$oop_ctr32 |
| 511 | |
| 512 | ALIGN 16 |
| 513 | $L$oop_ctr32: |
| 514 | vaesenc xmm9,xmm9,xmm15 |
| 515 | vaesenc xmm10,xmm10,xmm15 |
| 516 | vaesenc xmm11,xmm11,xmm15 |
| 517 | vaesenc xmm12,xmm12,xmm15 |
| 518 | vaesenc xmm13,xmm13,xmm15 |
| 519 | vaesenc xmm14,xmm14,xmm15 |
| 520 | vmovups xmm15,XMMWORD[r12] |
| 521 | lea r12,[16+r12] |
| 522 | dec r13d |
| 523 | jnz NEAR $L$oop_ctr32 |
| 524 | |
| 525 | vmovdqu xmm3,XMMWORD[r12] |
| 526 | vaesenc xmm9,xmm9,xmm15 |
| 527 | vpxor xmm4,xmm3,XMMWORD[rdi] |
| 528 | vaesenc xmm10,xmm10,xmm15 |
| 529 | vpxor xmm5,xmm3,XMMWORD[16+rdi] |
| 530 | vaesenc xmm11,xmm11,xmm15 |
| 531 | vpxor xmm6,xmm3,XMMWORD[32+rdi] |
| 532 | vaesenc xmm12,xmm12,xmm15 |
| 533 | vpxor xmm8,xmm3,XMMWORD[48+rdi] |
| 534 | vaesenc xmm13,xmm13,xmm15 |
| 535 | vpxor xmm2,xmm3,XMMWORD[64+rdi] |
| 536 | vaesenc xmm14,xmm14,xmm15 |
| 537 | vpxor xmm3,xmm3,XMMWORD[80+rdi] |
| 538 | lea rdi,[96+rdi] |
| 539 | |
| 540 | vaesenclast xmm9,xmm9,xmm4 |
| 541 | vaesenclast xmm10,xmm10,xmm5 |
| 542 | vaesenclast xmm11,xmm11,xmm6 |
| 543 | vaesenclast xmm12,xmm12,xmm8 |
| 544 | vaesenclast xmm13,xmm13,xmm2 |
| 545 | vaesenclast xmm14,xmm14,xmm3 |
| 546 | vmovups XMMWORD[rsi],xmm9 |
| 547 | vmovups XMMWORD[16+rsi],xmm10 |
| 548 | vmovups XMMWORD[32+rsi],xmm11 |
| 549 | vmovups XMMWORD[48+rsi],xmm12 |
| 550 | vmovups XMMWORD[64+rsi],xmm13 |
| 551 | vmovups XMMWORD[80+rsi],xmm14 |
| 552 | lea rsi,[96+rsi] |
| 553 | |
| 554 | DB 0F3h,0C3h ;repret |
| 555 | ALIGN 32 |
| 556 | $L$handle_ctr32_2: |
| 557 | vpshufb xmm6,xmm1,xmm0 |
| 558 | vmovdqu xmm5,XMMWORD[48+r11] |
| 559 | vpaddd xmm10,xmm6,XMMWORD[64+r11] |
| 560 | vpaddd xmm11,xmm6,xmm5 |
| 561 | vpaddd xmm12,xmm10,xmm5 |
| 562 | vpshufb xmm10,xmm10,xmm0 |
| 563 | vpaddd xmm13,xmm11,xmm5 |
| 564 | vpshufb xmm11,xmm11,xmm0 |
| 565 | vpxor xmm10,xmm10,xmm4 |
| 566 | vpaddd xmm14,xmm12,xmm5 |
| 567 | vpshufb xmm12,xmm12,xmm0 |
| 568 | vpxor xmm11,xmm11,xmm4 |
| 569 | vpaddd xmm1,xmm13,xmm5 |
| 570 | vpshufb xmm13,xmm13,xmm0 |
| 571 | vpxor xmm12,xmm12,xmm4 |
| 572 | vpshufb xmm14,xmm14,xmm0 |
| 573 | vpxor xmm13,xmm13,xmm4 |
| 574 | vpshufb xmm1,xmm1,xmm0 |
| 575 | vpxor xmm14,xmm14,xmm4 |
| 576 | jmp NEAR $L$oop_ctr32 |
| 577 | |
| 578 | |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 579 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 580 | global aesni_gcm_encrypt |
| 581 | |
| 582 | ALIGN 32 |
| 583 | aesni_gcm_encrypt: |
| 584 | mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 585 | mov QWORD[16+rsp],rsi |
| 586 | mov rax,rsp |
| 587 | $L$SEH_begin_aesni_gcm_encrypt: |
| 588 | mov rdi,rcx |
| 589 | mov rsi,rdx |
| 590 | mov rdx,r8 |
| 591 | mov rcx,r9 |
| 592 | mov r8,QWORD[40+rsp] |
| 593 | mov r9,QWORD[48+rsp] |
| 594 | |
| 595 | |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 596 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 597 | xor r10,r10 |
| 598 | |
| 599 | |
| 600 | |
| 601 | |
| 602 | cmp rdx,0x60*3 |
| 603 | jb NEAR $L$gcm_enc_abort |
| 604 | |
| 605 | lea rax,[rsp] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 606 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 607 | push rbx |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 608 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 609 | push rbp |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 610 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 611 | push r12 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 612 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 613 | push r13 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 614 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 615 | push r14 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 616 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 617 | push r15 |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 618 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 619 | lea rsp,[((-168))+rsp] |
| 620 | movaps XMMWORD[(-216)+rax],xmm6 |
| 621 | movaps XMMWORD[(-200)+rax],xmm7 |
| 622 | movaps XMMWORD[(-184)+rax],xmm8 |
| 623 | movaps XMMWORD[(-168)+rax],xmm9 |
| 624 | movaps XMMWORD[(-152)+rax],xmm10 |
| 625 | movaps XMMWORD[(-136)+rax],xmm11 |
| 626 | movaps XMMWORD[(-120)+rax],xmm12 |
| 627 | movaps XMMWORD[(-104)+rax],xmm13 |
| 628 | movaps XMMWORD[(-88)+rax],xmm14 |
| 629 | movaps XMMWORD[(-72)+rax],xmm15 |
| 630 | $L$gcm_enc_body: |
| 631 | vzeroupper |
| 632 | |
| 633 | vmovdqu xmm1,XMMWORD[r8] |
| 634 | add rsp,-128 |
| 635 | mov ebx,DWORD[12+r8] |
| 636 | lea r11,[$L$bswap_mask] |
| 637 | lea r14,[((-128))+rcx] |
| 638 | mov r15,0xf80 |
| 639 | lea rcx,[128+rcx] |
| 640 | vmovdqu xmm0,XMMWORD[r11] |
| 641 | and rsp,-128 |
| 642 | mov ebp,DWORD[((240-128))+rcx] |
| 643 | |
| 644 | and r14,r15 |
| 645 | and r15,rsp |
| 646 | sub r15,r14 |
| 647 | jc NEAR $L$enc_no_key_aliasing |
| 648 | cmp r15,768 |
| 649 | jnc NEAR $L$enc_no_key_aliasing |
| 650 | sub rsp,r15 |
| 651 | $L$enc_no_key_aliasing: |
| 652 | |
| 653 | lea r14,[rsi] |
| 654 | |
| 655 | |
| 656 | |
| 657 | |
| 658 | |
| 659 | |
| 660 | |
| 661 | |
| 662 | lea r15,[((-192))+rdx*1+rsi] |
| 663 | |
| 664 | shr rdx,4 |
| 665 | |
| 666 | call _aesni_ctr32_6x |
| 667 | vpshufb xmm8,xmm9,xmm0 |
| 668 | vpshufb xmm2,xmm10,xmm0 |
| 669 | vmovdqu XMMWORD[112+rsp],xmm8 |
| 670 | vpshufb xmm4,xmm11,xmm0 |
| 671 | vmovdqu XMMWORD[96+rsp],xmm2 |
| 672 | vpshufb xmm5,xmm12,xmm0 |
| 673 | vmovdqu XMMWORD[80+rsp],xmm4 |
| 674 | vpshufb xmm6,xmm13,xmm0 |
| 675 | vmovdqu XMMWORD[64+rsp],xmm5 |
| 676 | vpshufb xmm7,xmm14,xmm0 |
| 677 | vmovdqu XMMWORD[48+rsp],xmm6 |
| 678 | |
| 679 | call _aesni_ctr32_6x |
| 680 | |
| 681 | vmovdqu xmm8,XMMWORD[r9] |
| 682 | lea r9,[((32+32))+r9] |
| 683 | sub rdx,12 |
| 684 | mov r10,0x60*2 |
| 685 | vpshufb xmm8,xmm8,xmm0 |
| 686 | |
| 687 | call _aesni_ctr32_ghash_6x |
| 688 | vmovdqu xmm7,XMMWORD[32+rsp] |
| 689 | vmovdqu xmm0,XMMWORD[r11] |
| 690 | vmovdqu xmm3,XMMWORD[((0-32))+r9] |
| 691 | vpunpckhqdq xmm1,xmm7,xmm7 |
| 692 | vmovdqu xmm15,XMMWORD[((32-32))+r9] |
| 693 | vmovups XMMWORD[(-96)+rsi],xmm9 |
| 694 | vpshufb xmm9,xmm9,xmm0 |
| 695 | vpxor xmm1,xmm1,xmm7 |
| 696 | vmovups XMMWORD[(-80)+rsi],xmm10 |
| 697 | vpshufb xmm10,xmm10,xmm0 |
| 698 | vmovups XMMWORD[(-64)+rsi],xmm11 |
| 699 | vpshufb xmm11,xmm11,xmm0 |
| 700 | vmovups XMMWORD[(-48)+rsi],xmm12 |
| 701 | vpshufb xmm12,xmm12,xmm0 |
| 702 | vmovups XMMWORD[(-32)+rsi],xmm13 |
| 703 | vpshufb xmm13,xmm13,xmm0 |
| 704 | vmovups XMMWORD[(-16)+rsi],xmm14 |
| 705 | vpshufb xmm14,xmm14,xmm0 |
| 706 | vmovdqu XMMWORD[16+rsp],xmm9 |
| 707 | vmovdqu xmm6,XMMWORD[48+rsp] |
| 708 | vmovdqu xmm0,XMMWORD[((16-32))+r9] |
| 709 | vpunpckhqdq xmm2,xmm6,xmm6 |
| 710 | vpclmulqdq xmm5,xmm7,xmm3,0x00 |
| 711 | vpxor xmm2,xmm2,xmm6 |
| 712 | vpclmulqdq xmm7,xmm7,xmm3,0x11 |
| 713 | vpclmulqdq xmm1,xmm1,xmm15,0x00 |
| 714 | |
| 715 | vmovdqu xmm9,XMMWORD[64+rsp] |
| 716 | vpclmulqdq xmm4,xmm6,xmm0,0x00 |
| 717 | vmovdqu xmm3,XMMWORD[((48-32))+r9] |
| 718 | vpxor xmm4,xmm4,xmm5 |
| 719 | vpunpckhqdq xmm5,xmm9,xmm9 |
| 720 | vpclmulqdq xmm6,xmm6,xmm0,0x11 |
| 721 | vpxor xmm5,xmm5,xmm9 |
| 722 | vpxor xmm6,xmm6,xmm7 |
| 723 | vpclmulqdq xmm2,xmm2,xmm15,0x10 |
| 724 | vmovdqu xmm15,XMMWORD[((80-32))+r9] |
| 725 | vpxor xmm2,xmm2,xmm1 |
| 726 | |
| 727 | vmovdqu xmm1,XMMWORD[80+rsp] |
| 728 | vpclmulqdq xmm7,xmm9,xmm3,0x00 |
| 729 | vmovdqu xmm0,XMMWORD[((64-32))+r9] |
| 730 | vpxor xmm7,xmm7,xmm4 |
| 731 | vpunpckhqdq xmm4,xmm1,xmm1 |
| 732 | vpclmulqdq xmm9,xmm9,xmm3,0x11 |
| 733 | vpxor xmm4,xmm4,xmm1 |
| 734 | vpxor xmm9,xmm9,xmm6 |
| 735 | vpclmulqdq xmm5,xmm5,xmm15,0x00 |
| 736 | vpxor xmm5,xmm5,xmm2 |
| 737 | |
| 738 | vmovdqu xmm2,XMMWORD[96+rsp] |
| 739 | vpclmulqdq xmm6,xmm1,xmm0,0x00 |
| 740 | vmovdqu xmm3,XMMWORD[((96-32))+r9] |
| 741 | vpxor xmm6,xmm6,xmm7 |
| 742 | vpunpckhqdq xmm7,xmm2,xmm2 |
| 743 | vpclmulqdq xmm1,xmm1,xmm0,0x11 |
| 744 | vpxor xmm7,xmm7,xmm2 |
| 745 | vpxor xmm1,xmm1,xmm9 |
| 746 | vpclmulqdq xmm4,xmm4,xmm15,0x10 |
| 747 | vmovdqu xmm15,XMMWORD[((128-32))+r9] |
| 748 | vpxor xmm4,xmm4,xmm5 |
| 749 | |
| 750 | vpxor xmm8,xmm8,XMMWORD[112+rsp] |
| 751 | vpclmulqdq xmm5,xmm2,xmm3,0x00 |
| 752 | vmovdqu xmm0,XMMWORD[((112-32))+r9] |
| 753 | vpunpckhqdq xmm9,xmm8,xmm8 |
| 754 | vpxor xmm5,xmm5,xmm6 |
| 755 | vpclmulqdq xmm2,xmm2,xmm3,0x11 |
| 756 | vpxor xmm9,xmm9,xmm8 |
| 757 | vpxor xmm2,xmm2,xmm1 |
| 758 | vpclmulqdq xmm7,xmm7,xmm15,0x00 |
| 759 | vpxor xmm4,xmm7,xmm4 |
| 760 | |
| 761 | vpclmulqdq xmm6,xmm8,xmm0,0x00 |
| 762 | vmovdqu xmm3,XMMWORD[((0-32))+r9] |
| 763 | vpunpckhqdq xmm1,xmm14,xmm14 |
| 764 | vpclmulqdq xmm8,xmm8,xmm0,0x11 |
| 765 | vpxor xmm1,xmm1,xmm14 |
| 766 | vpxor xmm5,xmm6,xmm5 |
| 767 | vpclmulqdq xmm9,xmm9,xmm15,0x10 |
| 768 | vmovdqu xmm15,XMMWORD[((32-32))+r9] |
| 769 | vpxor xmm7,xmm8,xmm2 |
| 770 | vpxor xmm6,xmm9,xmm4 |
| 771 | |
| 772 | vmovdqu xmm0,XMMWORD[((16-32))+r9] |
| 773 | vpxor xmm9,xmm7,xmm5 |
| 774 | vpclmulqdq xmm4,xmm14,xmm3,0x00 |
| 775 | vpxor xmm6,xmm6,xmm9 |
| 776 | vpunpckhqdq xmm2,xmm13,xmm13 |
| 777 | vpclmulqdq xmm14,xmm14,xmm3,0x11 |
| 778 | vpxor xmm2,xmm2,xmm13 |
| 779 | vpslldq xmm9,xmm6,8 |
| 780 | vpclmulqdq xmm1,xmm1,xmm15,0x00 |
| 781 | vpxor xmm8,xmm5,xmm9 |
| 782 | vpsrldq xmm6,xmm6,8 |
| 783 | vpxor xmm7,xmm7,xmm6 |
| 784 | |
| 785 | vpclmulqdq xmm5,xmm13,xmm0,0x00 |
| 786 | vmovdqu xmm3,XMMWORD[((48-32))+r9] |
| 787 | vpxor xmm5,xmm5,xmm4 |
| 788 | vpunpckhqdq xmm9,xmm12,xmm12 |
| 789 | vpclmulqdq xmm13,xmm13,xmm0,0x11 |
| 790 | vpxor xmm9,xmm9,xmm12 |
| 791 | vpxor xmm13,xmm13,xmm14 |
| 792 | vpalignr xmm14,xmm8,xmm8,8 |
| 793 | vpclmulqdq xmm2,xmm2,xmm15,0x10 |
| 794 | vmovdqu xmm15,XMMWORD[((80-32))+r9] |
| 795 | vpxor xmm2,xmm2,xmm1 |
| 796 | |
| 797 | vpclmulqdq xmm4,xmm12,xmm3,0x00 |
| 798 | vmovdqu xmm0,XMMWORD[((64-32))+r9] |
| 799 | vpxor xmm4,xmm4,xmm5 |
| 800 | vpunpckhqdq xmm1,xmm11,xmm11 |
| 801 | vpclmulqdq xmm12,xmm12,xmm3,0x11 |
| 802 | vpxor xmm1,xmm1,xmm11 |
| 803 | vpxor xmm12,xmm12,xmm13 |
| 804 | vxorps xmm7,xmm7,XMMWORD[16+rsp] |
| 805 | vpclmulqdq xmm9,xmm9,xmm15,0x00 |
| 806 | vpxor xmm9,xmm9,xmm2 |
| 807 | |
| 808 | vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 |
| 809 | vxorps xmm8,xmm8,xmm14 |
| 810 | |
| 811 | vpclmulqdq xmm5,xmm11,xmm0,0x00 |
| 812 | vmovdqu xmm3,XMMWORD[((96-32))+r9] |
| 813 | vpxor xmm5,xmm5,xmm4 |
| 814 | vpunpckhqdq xmm2,xmm10,xmm10 |
| 815 | vpclmulqdq xmm11,xmm11,xmm0,0x11 |
| 816 | vpxor xmm2,xmm2,xmm10 |
| 817 | vpalignr xmm14,xmm8,xmm8,8 |
| 818 | vpxor xmm11,xmm11,xmm12 |
| 819 | vpclmulqdq xmm1,xmm1,xmm15,0x10 |
| 820 | vmovdqu xmm15,XMMWORD[((128-32))+r9] |
| 821 | vpxor xmm1,xmm1,xmm9 |
| 822 | |
| 823 | vxorps xmm14,xmm14,xmm7 |
| 824 | vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 |
| 825 | vxorps xmm8,xmm8,xmm14 |
| 826 | |
| 827 | vpclmulqdq xmm4,xmm10,xmm3,0x00 |
| 828 | vmovdqu xmm0,XMMWORD[((112-32))+r9] |
| 829 | vpxor xmm4,xmm4,xmm5 |
| 830 | vpunpckhqdq xmm9,xmm8,xmm8 |
| 831 | vpclmulqdq xmm10,xmm10,xmm3,0x11 |
| 832 | vpxor xmm9,xmm9,xmm8 |
| 833 | vpxor xmm10,xmm10,xmm11 |
| 834 | vpclmulqdq xmm2,xmm2,xmm15,0x00 |
| 835 | vpxor xmm2,xmm2,xmm1 |
| 836 | |
| 837 | vpclmulqdq xmm5,xmm8,xmm0,0x00 |
| 838 | vpclmulqdq xmm7,xmm8,xmm0,0x11 |
| 839 | vpxor xmm5,xmm5,xmm4 |
| 840 | vpclmulqdq xmm6,xmm9,xmm15,0x10 |
| 841 | vpxor xmm7,xmm7,xmm10 |
| 842 | vpxor xmm6,xmm6,xmm2 |
| 843 | |
| 844 | vpxor xmm4,xmm7,xmm5 |
| 845 | vpxor xmm6,xmm6,xmm4 |
| 846 | vpslldq xmm1,xmm6,8 |
| 847 | vmovdqu xmm3,XMMWORD[16+r11] |
| 848 | vpsrldq xmm6,xmm6,8 |
| 849 | vpxor xmm8,xmm5,xmm1 |
| 850 | vpxor xmm7,xmm7,xmm6 |
| 851 | |
| 852 | vpalignr xmm2,xmm8,xmm8,8 |
| 853 | vpclmulqdq xmm8,xmm8,xmm3,0x10 |
| 854 | vpxor xmm8,xmm8,xmm2 |
| 855 | |
| 856 | vpalignr xmm2,xmm8,xmm8,8 |
| 857 | vpclmulqdq xmm8,xmm8,xmm3,0x10 |
| 858 | vpxor xmm2,xmm2,xmm7 |
| 859 | vpxor xmm8,xmm8,xmm2 |
| 860 | vpshufb xmm8,xmm8,XMMWORD[r11] |
| 861 | vmovdqu XMMWORD[(-64)+r9],xmm8 |
| 862 | |
| 863 | vzeroupper |
| 864 | movaps xmm6,XMMWORD[((-216))+rax] |
| 865 | movaps xmm7,XMMWORD[((-200))+rax] |
| 866 | movaps xmm8,XMMWORD[((-184))+rax] |
| 867 | movaps xmm9,XMMWORD[((-168))+rax] |
| 868 | movaps xmm10,XMMWORD[((-152))+rax] |
| 869 | movaps xmm11,XMMWORD[((-136))+rax] |
| 870 | movaps xmm12,XMMWORD[((-120))+rax] |
| 871 | movaps xmm13,XMMWORD[((-104))+rax] |
| 872 | movaps xmm14,XMMWORD[((-88))+rax] |
| 873 | movaps xmm15,XMMWORD[((-72))+rax] |
| 874 | mov r15,QWORD[((-48))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 875 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 876 | mov r14,QWORD[((-40))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 877 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 878 | mov r13,QWORD[((-32))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 879 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 880 | mov r12,QWORD[((-24))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 881 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 882 | mov rbp,QWORD[((-16))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 883 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 884 | mov rbx,QWORD[((-8))+rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 885 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 886 | lea rsp,[rax] |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 887 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 888 | $L$gcm_enc_abort: |
| 889 | mov rax,r10 |
| 890 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 891 | mov rsi,QWORD[16+rsp] |
| 892 | DB 0F3h,0C3h ;repret |
Robert Sloan | d5c2215 | 2017-11-13 09:22:12 -0800 | [diff] [blame] | 893 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 894 | $L$SEH_end_aesni_gcm_encrypt: |
| 895 | ALIGN 64 |
| 896 | $L$bswap_mask: |
| 897 | DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 898 | $L$poly: |
| 899 | DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 |
| 900 | $L$one_msb: |
| 901 | DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 |
| 902 | $L$two_lsb: |
| 903 | DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
| 904 | $L$one_lsb: |
| 905 | DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 |
| 906 | DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 |
| 907 | DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 |
| 908 | DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 |
| 909 | DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |
| 910 | ALIGN 64 |
| 911 | EXTERN __imp_RtlVirtualUnwind |
| 912 | |
| 913 | ALIGN 16 |
| 914 | gcm_se_handler: |
| 915 | push rsi |
| 916 | push rdi |
| 917 | push rbx |
| 918 | push rbp |
| 919 | push r12 |
| 920 | push r13 |
| 921 | push r14 |
| 922 | push r15 |
| 923 | pushfq |
| 924 | sub rsp,64 |
| 925 | |
| 926 | mov rax,QWORD[120+r8] |
| 927 | mov rbx,QWORD[248+r8] |
| 928 | |
| 929 | mov rsi,QWORD[8+r9] |
| 930 | mov r11,QWORD[56+r9] |
| 931 | |
| 932 | mov r10d,DWORD[r11] |
| 933 | lea r10,[r10*1+rsi] |
| 934 | cmp rbx,r10 |
| 935 | jb NEAR $L$common_seh_tail |
| 936 | |
| 937 | mov rax,QWORD[152+r8] |
| 938 | |
| 939 | mov r10d,DWORD[4+r11] |
| 940 | lea r10,[r10*1+rsi] |
| 941 | cmp rbx,r10 |
| 942 | jae NEAR $L$common_seh_tail |
| 943 | |
| 944 | mov rax,QWORD[120+r8] |
| 945 | |
| 946 | mov r15,QWORD[((-48))+rax] |
| 947 | mov r14,QWORD[((-40))+rax] |
| 948 | mov r13,QWORD[((-32))+rax] |
| 949 | mov r12,QWORD[((-24))+rax] |
| 950 | mov rbp,QWORD[((-16))+rax] |
| 951 | mov rbx,QWORD[((-8))+rax] |
| 952 | mov QWORD[240+r8],r15 |
| 953 | mov QWORD[232+r8],r14 |
| 954 | mov QWORD[224+r8],r13 |
| 955 | mov QWORD[216+r8],r12 |
| 956 | mov QWORD[160+r8],rbp |
| 957 | mov QWORD[144+r8],rbx |
| 958 | |
| 959 | lea rsi,[((-216))+rax] |
| 960 | lea rdi,[512+r8] |
| 961 | mov ecx,20 |
| 962 | DD 0xa548f3fc |
| 963 | |
| 964 | $L$common_seh_tail: |
| 965 | mov rdi,QWORD[8+rax] |
| 966 | mov rsi,QWORD[16+rax] |
| 967 | mov QWORD[152+r8],rax |
| 968 | mov QWORD[168+r8],rsi |
| 969 | mov QWORD[176+r8],rdi |
| 970 | |
| 971 | mov rdi,QWORD[40+r9] |
| 972 | mov rsi,r8 |
| 973 | mov ecx,154 |
| 974 | DD 0xa548f3fc |
| 975 | |
| 976 | mov rsi,r9 |
| 977 | xor rcx,rcx |
| 978 | mov rdx,QWORD[8+rsi] |
| 979 | mov r8,QWORD[rsi] |
| 980 | mov r9,QWORD[16+rsi] |
| 981 | mov r10,QWORD[40+rsi] |
| 982 | lea r11,[56+rsi] |
| 983 | lea r12,[24+rsi] |
| 984 | mov QWORD[32+rsp],r10 |
| 985 | mov QWORD[40+rsp],r11 |
| 986 | mov QWORD[48+rsp],r12 |
| 987 | mov QWORD[56+rsp],rcx |
| 988 | call QWORD[__imp_RtlVirtualUnwind] |
| 989 | |
| 990 | mov eax,1 |
| 991 | add rsp,64 |
| 992 | popfq |
| 993 | pop r15 |
| 994 | pop r14 |
| 995 | pop r13 |
| 996 | pop r12 |
| 997 | pop rbp |
| 998 | pop rbx |
| 999 | pop rdi |
| 1000 | pop rsi |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 1001 | DB 0F3h,0C3h ;repret |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 1002 | |
Steven Valdez | b0b45c6 | 2017-01-17 16:23:54 -0500 | [diff] [blame] | 1003 | |
| 1004 | section .pdata rdata align=4 |
| 1005 | ALIGN 4 |
| 1006 | DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase |
| 1007 | DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase |
| 1008 | DD $L$SEH_gcm_dec_info wrt ..imagebase |
| 1009 | |
| 1010 | DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase |
| 1011 | DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase |
| 1012 | DD $L$SEH_gcm_enc_info wrt ..imagebase |
| 1013 | section .xdata rdata align=8 |
| 1014 | ALIGN 8 |
| 1015 | $L$SEH_gcm_dec_info: |
| 1016 | DB 9,0,0,0 |
| 1017 | DD gcm_se_handler wrt ..imagebase |
| 1018 | DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase |
| 1019 | $L$SEH_gcm_enc_info: |
| 1020 | DB 9,0,0,0 |
| 1021 | DD gcm_se_handler wrt ..imagebase |
| 1022 | DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase |