Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 1 | #if defined(__i386__) |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 2 | .text |
| 3 | .globl bn_mul_mont |
| 4 | .hidden bn_mul_mont |
| 5 | .type bn_mul_mont,@function |
| 6 | .align 16 |
| 7 | bn_mul_mont: |
| 8 | .L_bn_mul_mont_begin: |
| 9 | pushl %ebp |
| 10 | pushl %ebx |
| 11 | pushl %esi |
| 12 | pushl %edi |
| 13 | xorl %eax,%eax |
| 14 | movl 40(%esp),%edi |
| 15 | cmpl $4,%edi |
| 16 | jl .L000just_leave |
| 17 | leal 20(%esp),%esi |
| 18 | leal 24(%esp),%edx |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 19 | addl $2,%edi |
| 20 | negl %edi |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 21 | leal -32(%esp,%edi,4),%ebp |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 22 | negl %edi |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 23 | movl %ebp,%eax |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 24 | subl %edx,%eax |
| 25 | andl $2047,%eax |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 26 | subl %eax,%ebp |
| 27 | xorl %ebp,%edx |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 28 | andl $2048,%edx |
| 29 | xorl $2048,%edx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 30 | subl %edx,%ebp |
| 31 | andl $-64,%ebp |
| 32 | movl %esp,%eax |
| 33 | subl %ebp,%eax |
| 34 | andl $-4096,%eax |
| 35 | movl %esp,%edx |
| 36 | leal (%ebp,%eax,1),%esp |
| 37 | movl (%esp),%eax |
| 38 | cmpl %ebp,%esp |
| 39 | ja .L001page_walk |
| 40 | jmp .L002page_walk_done |
| 41 | .align 16 |
| 42 | .L001page_walk: |
| 43 | leal -4096(%esp),%esp |
| 44 | movl (%esp),%eax |
| 45 | cmpl %ebp,%esp |
| 46 | ja .L001page_walk |
| 47 | .L002page_walk_done: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 48 | movl (%esi),%eax |
| 49 | movl 4(%esi),%ebx |
| 50 | movl 8(%esi),%ecx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 51 | movl 12(%esi),%ebp |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 52 | movl 16(%esi),%esi |
| 53 | movl (%esi),%esi |
| 54 | movl %eax,4(%esp) |
| 55 | movl %ebx,8(%esp) |
| 56 | movl %ecx,12(%esp) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 57 | movl %ebp,16(%esp) |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 58 | movl %esi,20(%esp) |
| 59 | leal -3(%edi),%ebx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 60 | movl %edx,24(%esp) |
| 61 | call .L003PIC_me_up |
| 62 | .L003PIC_me_up: |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 63 | popl %eax |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 64 | leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 65 | btl $26,(%eax) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 66 | jnc .L004non_sse2 |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 67 | movl $-1,%eax |
| 68 | movd %eax,%mm7 |
| 69 | movl 8(%esp),%esi |
| 70 | movl 12(%esp),%edi |
| 71 | movl 16(%esp),%ebp |
| 72 | xorl %edx,%edx |
| 73 | xorl %ecx,%ecx |
| 74 | movd (%edi),%mm4 |
| 75 | movd (%esi),%mm5 |
| 76 | movd (%ebp),%mm3 |
| 77 | pmuludq %mm4,%mm5 |
| 78 | movq %mm5,%mm2 |
| 79 | movq %mm5,%mm0 |
| 80 | pand %mm7,%mm0 |
| 81 | pmuludq 20(%esp),%mm5 |
| 82 | pmuludq %mm5,%mm3 |
| 83 | paddq %mm0,%mm3 |
| 84 | movd 4(%ebp),%mm1 |
| 85 | movd 4(%esi),%mm0 |
| 86 | psrlq $32,%mm2 |
| 87 | psrlq $32,%mm3 |
| 88 | incl %ecx |
| 89 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 90 | .L0051st: |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 91 | pmuludq %mm4,%mm0 |
| 92 | pmuludq %mm5,%mm1 |
| 93 | paddq %mm0,%mm2 |
| 94 | paddq %mm1,%mm3 |
| 95 | movq %mm2,%mm0 |
| 96 | pand %mm7,%mm0 |
| 97 | movd 4(%ebp,%ecx,4),%mm1 |
| 98 | paddq %mm0,%mm3 |
| 99 | movd 4(%esi,%ecx,4),%mm0 |
| 100 | psrlq $32,%mm2 |
| 101 | movd %mm3,28(%esp,%ecx,4) |
| 102 | psrlq $32,%mm3 |
| 103 | leal 1(%ecx),%ecx |
| 104 | cmpl %ebx,%ecx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 105 | jl .L0051st |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 106 | pmuludq %mm4,%mm0 |
| 107 | pmuludq %mm5,%mm1 |
| 108 | paddq %mm0,%mm2 |
| 109 | paddq %mm1,%mm3 |
| 110 | movq %mm2,%mm0 |
| 111 | pand %mm7,%mm0 |
| 112 | paddq %mm0,%mm3 |
| 113 | movd %mm3,28(%esp,%ecx,4) |
| 114 | psrlq $32,%mm2 |
| 115 | psrlq $32,%mm3 |
| 116 | paddq %mm2,%mm3 |
| 117 | movq %mm3,32(%esp,%ebx,4) |
| 118 | incl %edx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 119 | .L006outer: |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 120 | xorl %ecx,%ecx |
| 121 | movd (%edi,%edx,4),%mm4 |
| 122 | movd (%esi),%mm5 |
| 123 | movd 32(%esp),%mm6 |
| 124 | movd (%ebp),%mm3 |
| 125 | pmuludq %mm4,%mm5 |
| 126 | paddq %mm6,%mm5 |
| 127 | movq %mm5,%mm0 |
| 128 | movq %mm5,%mm2 |
| 129 | pand %mm7,%mm0 |
| 130 | pmuludq 20(%esp),%mm5 |
| 131 | pmuludq %mm5,%mm3 |
| 132 | paddq %mm0,%mm3 |
| 133 | movd 36(%esp),%mm6 |
| 134 | movd 4(%ebp),%mm1 |
| 135 | movd 4(%esi),%mm0 |
| 136 | psrlq $32,%mm2 |
| 137 | psrlq $32,%mm3 |
| 138 | paddq %mm6,%mm2 |
| 139 | incl %ecx |
| 140 | decl %ebx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 141 | .L007inner: |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 142 | pmuludq %mm4,%mm0 |
| 143 | pmuludq %mm5,%mm1 |
| 144 | paddq %mm0,%mm2 |
| 145 | paddq %mm1,%mm3 |
| 146 | movq %mm2,%mm0 |
| 147 | movd 36(%esp,%ecx,4),%mm6 |
| 148 | pand %mm7,%mm0 |
| 149 | movd 4(%ebp,%ecx,4),%mm1 |
| 150 | paddq %mm0,%mm3 |
| 151 | movd 4(%esi,%ecx,4),%mm0 |
| 152 | psrlq $32,%mm2 |
| 153 | movd %mm3,28(%esp,%ecx,4) |
| 154 | psrlq $32,%mm3 |
| 155 | paddq %mm6,%mm2 |
| 156 | decl %ebx |
| 157 | leal 1(%ecx),%ecx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 158 | jnz .L007inner |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 159 | movl %ecx,%ebx |
| 160 | pmuludq %mm4,%mm0 |
| 161 | pmuludq %mm5,%mm1 |
| 162 | paddq %mm0,%mm2 |
| 163 | paddq %mm1,%mm3 |
| 164 | movq %mm2,%mm0 |
| 165 | pand %mm7,%mm0 |
| 166 | paddq %mm0,%mm3 |
| 167 | movd %mm3,28(%esp,%ecx,4) |
| 168 | psrlq $32,%mm2 |
| 169 | psrlq $32,%mm3 |
| 170 | movd 36(%esp,%ebx,4),%mm6 |
| 171 | paddq %mm2,%mm3 |
| 172 | paddq %mm6,%mm3 |
| 173 | movq %mm3,32(%esp,%ebx,4) |
| 174 | leal 1(%edx),%edx |
| 175 | cmpl %ebx,%edx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 176 | jle .L006outer |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 177 | emms |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 178 | jmp .L008common_tail |
Adam Langley | e9ada86 | 2015-05-11 17:20:37 -0700 | [diff] [blame] | 179 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 180 | .L004non_sse2: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 181 | movl 8(%esp),%esi |
| 182 | leal 1(%ebx),%ebp |
| 183 | movl 12(%esp),%edi |
| 184 | xorl %ecx,%ecx |
| 185 | movl %esi,%edx |
| 186 | andl $1,%ebp |
| 187 | subl %edi,%edx |
| 188 | leal 4(%edi,%ebx,4),%eax |
| 189 | orl %edx,%ebp |
| 190 | movl (%edi),%edi |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 191 | jz .L009bn_sqr_mont |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 192 | movl %eax,28(%esp) |
| 193 | movl (%esi),%eax |
| 194 | xorl %edx,%edx |
| 195 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 196 | .L010mull: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 197 | movl %edx,%ebp |
| 198 | mull %edi |
| 199 | addl %eax,%ebp |
| 200 | leal 1(%ecx),%ecx |
| 201 | adcl $0,%edx |
| 202 | movl (%esi,%ecx,4),%eax |
| 203 | cmpl %ebx,%ecx |
| 204 | movl %ebp,28(%esp,%ecx,4) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 205 | jl .L010mull |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 206 | movl %edx,%ebp |
| 207 | mull %edi |
| 208 | movl 20(%esp),%edi |
| 209 | addl %ebp,%eax |
| 210 | movl 16(%esp),%esi |
| 211 | adcl $0,%edx |
| 212 | imull 32(%esp),%edi |
| 213 | movl %eax,32(%esp,%ebx,4) |
| 214 | xorl %ecx,%ecx |
| 215 | movl %edx,36(%esp,%ebx,4) |
| 216 | movl %ecx,40(%esp,%ebx,4) |
| 217 | movl (%esi),%eax |
| 218 | mull %edi |
| 219 | addl 32(%esp),%eax |
| 220 | movl 4(%esi),%eax |
| 221 | adcl $0,%edx |
| 222 | incl %ecx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 223 | jmp .L0112ndmadd |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 224 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 225 | .L0121stmadd: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 226 | movl %edx,%ebp |
| 227 | mull %edi |
| 228 | addl 32(%esp,%ecx,4),%ebp |
| 229 | leal 1(%ecx),%ecx |
| 230 | adcl $0,%edx |
| 231 | addl %eax,%ebp |
| 232 | movl (%esi,%ecx,4),%eax |
| 233 | adcl $0,%edx |
| 234 | cmpl %ebx,%ecx |
| 235 | movl %ebp,28(%esp,%ecx,4) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 236 | jl .L0121stmadd |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 237 | movl %edx,%ebp |
| 238 | mull %edi |
| 239 | addl 32(%esp,%ebx,4),%eax |
| 240 | movl 20(%esp),%edi |
| 241 | adcl $0,%edx |
| 242 | movl 16(%esp),%esi |
| 243 | addl %eax,%ebp |
| 244 | adcl $0,%edx |
| 245 | imull 32(%esp),%edi |
| 246 | xorl %ecx,%ecx |
| 247 | addl 36(%esp,%ebx,4),%edx |
| 248 | movl %ebp,32(%esp,%ebx,4) |
| 249 | adcl $0,%ecx |
| 250 | movl (%esi),%eax |
| 251 | movl %edx,36(%esp,%ebx,4) |
| 252 | movl %ecx,40(%esp,%ebx,4) |
| 253 | mull %edi |
| 254 | addl 32(%esp),%eax |
| 255 | movl 4(%esi),%eax |
| 256 | adcl $0,%edx |
| 257 | movl $1,%ecx |
| 258 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 259 | .L0112ndmadd: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 260 | movl %edx,%ebp |
| 261 | mull %edi |
| 262 | addl 32(%esp,%ecx,4),%ebp |
| 263 | leal 1(%ecx),%ecx |
| 264 | adcl $0,%edx |
| 265 | addl %eax,%ebp |
| 266 | movl (%esi,%ecx,4),%eax |
| 267 | adcl $0,%edx |
| 268 | cmpl %ebx,%ecx |
| 269 | movl %ebp,24(%esp,%ecx,4) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 270 | jl .L0112ndmadd |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 271 | movl %edx,%ebp |
| 272 | mull %edi |
| 273 | addl 32(%esp,%ebx,4),%ebp |
| 274 | adcl $0,%edx |
| 275 | addl %eax,%ebp |
| 276 | adcl $0,%edx |
| 277 | movl %ebp,28(%esp,%ebx,4) |
| 278 | xorl %eax,%eax |
| 279 | movl 12(%esp),%ecx |
| 280 | addl 36(%esp,%ebx,4),%edx |
| 281 | adcl 40(%esp,%ebx,4),%eax |
| 282 | leal 4(%ecx),%ecx |
| 283 | movl %edx,32(%esp,%ebx,4) |
| 284 | cmpl 28(%esp),%ecx |
| 285 | movl %eax,36(%esp,%ebx,4) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 286 | je .L008common_tail |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 287 | movl (%ecx),%edi |
| 288 | movl 8(%esp),%esi |
| 289 | movl %ecx,12(%esp) |
| 290 | xorl %ecx,%ecx |
| 291 | xorl %edx,%edx |
| 292 | movl (%esi),%eax |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 293 | jmp .L0121stmadd |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 294 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 295 | .L009bn_sqr_mont: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 296 | movl %ebx,(%esp) |
| 297 | movl %ecx,12(%esp) |
| 298 | movl %edi,%eax |
| 299 | mull %edi |
| 300 | movl %eax,32(%esp) |
| 301 | movl %edx,%ebx |
| 302 | shrl $1,%edx |
| 303 | andl $1,%ebx |
| 304 | incl %ecx |
| 305 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 306 | .L013sqr: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 307 | movl (%esi,%ecx,4),%eax |
| 308 | movl %edx,%ebp |
| 309 | mull %edi |
| 310 | addl %ebp,%eax |
| 311 | leal 1(%ecx),%ecx |
| 312 | adcl $0,%edx |
| 313 | leal (%ebx,%eax,2),%ebp |
| 314 | shrl $31,%eax |
| 315 | cmpl (%esp),%ecx |
| 316 | movl %eax,%ebx |
| 317 | movl %ebp,28(%esp,%ecx,4) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 318 | jl .L013sqr |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 319 | movl (%esi,%ecx,4),%eax |
| 320 | movl %edx,%ebp |
| 321 | mull %edi |
| 322 | addl %ebp,%eax |
| 323 | movl 20(%esp),%edi |
| 324 | adcl $0,%edx |
| 325 | movl 16(%esp),%esi |
| 326 | leal (%ebx,%eax,2),%ebp |
| 327 | imull 32(%esp),%edi |
| 328 | shrl $31,%eax |
| 329 | movl %ebp,32(%esp,%ecx,4) |
| 330 | leal (%eax,%edx,2),%ebp |
| 331 | movl (%esi),%eax |
| 332 | shrl $31,%edx |
| 333 | movl %ebp,36(%esp,%ecx,4) |
| 334 | movl %edx,40(%esp,%ecx,4) |
| 335 | mull %edi |
| 336 | addl 32(%esp),%eax |
| 337 | movl %ecx,%ebx |
| 338 | adcl $0,%edx |
| 339 | movl 4(%esi),%eax |
| 340 | movl $1,%ecx |
| 341 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 342 | .L0143rdmadd: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 343 | movl %edx,%ebp |
| 344 | mull %edi |
| 345 | addl 32(%esp,%ecx,4),%ebp |
| 346 | adcl $0,%edx |
| 347 | addl %eax,%ebp |
| 348 | movl 4(%esi,%ecx,4),%eax |
| 349 | adcl $0,%edx |
| 350 | movl %ebp,28(%esp,%ecx,4) |
| 351 | movl %edx,%ebp |
| 352 | mull %edi |
| 353 | addl 36(%esp,%ecx,4),%ebp |
| 354 | leal 2(%ecx),%ecx |
| 355 | adcl $0,%edx |
| 356 | addl %eax,%ebp |
| 357 | movl (%esi,%ecx,4),%eax |
| 358 | adcl $0,%edx |
| 359 | cmpl %ebx,%ecx |
| 360 | movl %ebp,24(%esp,%ecx,4) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 361 | jl .L0143rdmadd |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 362 | movl %edx,%ebp |
| 363 | mull %edi |
| 364 | addl 32(%esp,%ebx,4),%ebp |
| 365 | adcl $0,%edx |
| 366 | addl %eax,%ebp |
| 367 | adcl $0,%edx |
| 368 | movl %ebp,28(%esp,%ebx,4) |
| 369 | movl 12(%esp),%ecx |
| 370 | xorl %eax,%eax |
| 371 | movl 8(%esp),%esi |
| 372 | addl 36(%esp,%ebx,4),%edx |
| 373 | adcl 40(%esp,%ebx,4),%eax |
| 374 | movl %edx,32(%esp,%ebx,4) |
| 375 | cmpl %ebx,%ecx |
| 376 | movl %eax,36(%esp,%ebx,4) |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 377 | je .L008common_tail |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 378 | movl 4(%esi,%ecx,4),%edi |
| 379 | leal 1(%ecx),%ecx |
| 380 | movl %edi,%eax |
| 381 | movl %ecx,12(%esp) |
| 382 | mull %edi |
| 383 | addl 32(%esp,%ecx,4),%eax |
| 384 | adcl $0,%edx |
| 385 | movl %eax,32(%esp,%ecx,4) |
| 386 | xorl %ebp,%ebp |
| 387 | cmpl %ebx,%ecx |
| 388 | leal 1(%ecx),%ecx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 389 | je .L015sqrlast |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 390 | movl %edx,%ebx |
| 391 | shrl $1,%edx |
| 392 | andl $1,%ebx |
| 393 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 394 | .L016sqradd: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 395 | movl (%esi,%ecx,4),%eax |
| 396 | movl %edx,%ebp |
| 397 | mull %edi |
| 398 | addl %ebp,%eax |
| 399 | leal (%eax,%eax,1),%ebp |
| 400 | adcl $0,%edx |
| 401 | shrl $31,%eax |
| 402 | addl 32(%esp,%ecx,4),%ebp |
| 403 | leal 1(%ecx),%ecx |
| 404 | adcl $0,%eax |
| 405 | addl %ebx,%ebp |
| 406 | adcl $0,%eax |
| 407 | cmpl (%esp),%ecx |
| 408 | movl %ebp,28(%esp,%ecx,4) |
| 409 | movl %eax,%ebx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 410 | jle .L016sqradd |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 411 | movl %edx,%ebp |
| 412 | addl %edx,%edx |
| 413 | shrl $31,%ebp |
| 414 | addl %ebx,%edx |
| 415 | adcl $0,%ebp |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 416 | .L015sqrlast: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 417 | movl 20(%esp),%edi |
| 418 | movl 16(%esp),%esi |
| 419 | imull 32(%esp),%edi |
| 420 | addl 32(%esp,%ecx,4),%edx |
| 421 | movl (%esi),%eax |
| 422 | adcl $0,%ebp |
| 423 | movl %edx,32(%esp,%ecx,4) |
| 424 | movl %ebp,36(%esp,%ecx,4) |
| 425 | mull %edi |
| 426 | addl 32(%esp),%eax |
| 427 | leal -1(%ecx),%ebx |
| 428 | adcl $0,%edx |
| 429 | movl $1,%ecx |
| 430 | movl 4(%esi),%eax |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 431 | jmp .L0143rdmadd |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 432 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 433 | .L008common_tail: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 434 | movl 16(%esp),%ebp |
| 435 | movl 4(%esp),%edi |
| 436 | leal 32(%esp),%esi |
| 437 | movl (%esi),%eax |
| 438 | movl %ebx,%ecx |
| 439 | xorl %edx,%edx |
| 440 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 441 | .L017sub: |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 442 | sbbl (%ebp,%edx,4),%eax |
| 443 | movl %eax,(%edi,%edx,4) |
| 444 | decl %ecx |
| 445 | movl 4(%esi,%edx,4),%eax |
| 446 | leal 1(%edx),%edx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 447 | jge .L017sub |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 448 | sbbl $0,%eax |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 449 | andl %eax,%esi |
| 450 | notl %eax |
| 451 | movl %edi,%ebp |
| 452 | andl %eax,%ebp |
| 453 | orl %ebp,%esi |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 454 | .align 16 |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 455 | .L018copy: |
| 456 | movl (%esi,%ebx,4),%eax |
| 457 | movl %eax,(%edi,%ebx,4) |
| 458 | movl %ecx,32(%esp,%ebx,4) |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 459 | decl %ebx |
Robert Sloan | a94fe05 | 2017-02-21 08:49:28 -0800 | [diff] [blame] | 460 | jge .L018copy |
Adam Langley | d9e397b | 2015-01-22 14:27:53 -0800 | [diff] [blame] | 461 | movl 24(%esp),%esp |
| 462 | movl $1,%eax |
| 463 | .L000just_leave: |
| 464 | popl %edi |
| 465 | popl %esi |
| 466 | popl %ebx |
| 467 | popl %ebp |
| 468 | ret |
| 469 | .size bn_mul_mont,.-.L_bn_mul_mont_begin |
| 470 | .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 |
| 471 | .byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 |
| 472 | .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 |
| 473 | .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 |
| 474 | .byte 111,114,103,62,0 |
| 475 | #endif |