blob: dd93341d8f029ca38b205f8f745a9afea0dcf983 [file] [log] [blame]
Adam Langleye9ada862015-05-11 17:20:37 -07001default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
Adam Langleyd9e397b2015-01-22 14:27:53 -08006
Adam Langleyd9e397b2015-01-22 14:27:53 -08007
Adam Langleye9ada862015-05-11 17:20:37 -07008EXTERN OPENSSL_ia32cap_P
9
10global bn_mul_mont
Adam Langleyd9e397b2015-01-22 14:27:53 -080011
12ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070013bn_mul_mont:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -080016 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -070017$L$SEH_begin_bn_mul_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -080018 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -070022 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -080024
25
Robert Sloana94fe052017-02-21 08:49:28 -080026
27 mov r9d,r9d
28 mov rax,rsp
29
Adam Langleyd9e397b2015-01-22 14:27:53 -080030 test r9d,3
Adam Langleye9ada862015-05-11 17:20:37 -070031 jnz NEAR $L$mul_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080032 cmp r9d,8
Adam Langleye9ada862015-05-11 17:20:37 -070033 jb NEAR $L$mul_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080034 cmp rdx,rsi
Adam Langleye9ada862015-05-11 17:20:37 -070035 jne NEAR $L$mul4x_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080036 test r9d,7
Adam Langleye9ada862015-05-11 17:20:37 -070037 jz NEAR $L$sqr8x_enter
38 jmp NEAR $L$mul4x_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080039
40ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070041$L$mul_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -080042 push rbx
Robert Sloana94fe052017-02-21 08:49:28 -080043
Adam Langleyd9e397b2015-01-22 14:27:53 -080044 push rbp
Robert Sloana94fe052017-02-21 08:49:28 -080045
Adam Langleyd9e397b2015-01-22 14:27:53 -080046 push r12
Robert Sloana94fe052017-02-21 08:49:28 -080047
Adam Langleyd9e397b2015-01-22 14:27:53 -080048 push r13
Robert Sloana94fe052017-02-21 08:49:28 -080049
Adam Langleyd9e397b2015-01-22 14:27:53 -080050 push r14
Robert Sloana94fe052017-02-21 08:49:28 -080051
Adam Langleyd9e397b2015-01-22 14:27:53 -080052 push r15
53
Adam Langleyd9e397b2015-01-22 14:27:53 -080054
Robert Sloana94fe052017-02-21 08:49:28 -080055 neg r9
56 mov r11,rsp
57 lea r10,[((-16))+r9*8+rsp]
58 neg r9
59 and r10,-1024
60
61
62
63
64
65
66
67
68
69 sub r11,r10
70 and r11,-4096
71 lea rsp,[r11*1+r10]
72 mov r11,QWORD[rsp]
73 cmp rsp,r10
74 ja NEAR $L$mul_page_walk
75 jmp NEAR $L$mul_page_walk_done
76
77ALIGN 16
78$L$mul_page_walk:
79 lea rsp,[((-4096))+rsp]
80 mov r11,QWORD[rsp]
81 cmp rsp,r10
82 ja NEAR $L$mul_page_walk
83$L$mul_page_walk_done:
84
85 mov QWORD[8+r9*8+rsp],rax
86
Adam Langleye9ada862015-05-11 17:20:37 -070087$L$mul_body:
Adam Langleyd9e397b2015-01-22 14:27:53 -080088 mov r12,rdx
Adam Langleye9ada862015-05-11 17:20:37 -070089 mov r8,QWORD[r8]
90 mov rbx,QWORD[r12]
91 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -080092
93 xor r14,r14
94 xor r15,r15
95
96 mov rbp,r8
97 mul rbx
98 mov r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -070099 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800100
101 imul rbp,r10
102 mov r11,rdx
103
104 mul rbp
105 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700106 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800107 adc rdx,0
108 mov r13,rdx
109
Adam Langleye9ada862015-05-11 17:20:37 -0700110 lea r15,[1+r15]
111 jmp NEAR $L$1st_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800112
113ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700114$L$1st:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800115 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700116 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800117 adc rdx,0
118 add r13,r11
119 mov r11,r10
120 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700121 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800122 mov r13,rdx
123
Adam Langleye9ada862015-05-11 17:20:37 -0700124$L$1st_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800125 mul rbx
126 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700127 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800128 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700129 lea r15,[1+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800130 mov r10,rdx
131
132 mul rbp
133 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700134 jne NEAR $L$1st
Adam Langleyd9e397b2015-01-22 14:27:53 -0800135
136 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700137 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800138 adc rdx,0
139 add r13,r11
140 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700141 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800142 mov r13,rdx
143 mov r11,r10
144
145 xor rdx,rdx
146 add r13,r11
147 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700148 mov QWORD[((-8))+r9*8+rsp],r13
149 mov QWORD[r9*8+rsp],rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800150
Adam Langleye9ada862015-05-11 17:20:37 -0700151 lea r14,[1+r14]
152 jmp NEAR $L$outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800153ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700154$L$outer:
155 mov rbx,QWORD[r14*8+r12]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800156 xor r15,r15
157 mov rbp,r8
Adam Langleye9ada862015-05-11 17:20:37 -0700158 mov r10,QWORD[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800159 mul rbx
160 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700161 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800162 adc rdx,0
163
164 imul rbp,r10
165 mov r11,rdx
166
167 mul rbp
168 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700169 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800170 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700171 mov r10,QWORD[8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800172 mov r13,rdx
173
Adam Langleye9ada862015-05-11 17:20:37 -0700174 lea r15,[1+r15]
175 jmp NEAR $L$inner_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800176
177ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700178$L$inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800179 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700180 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800181 adc rdx,0
182 add r13,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700183 mov r10,QWORD[r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800184 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700185 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800186 mov r13,rdx
187
Adam Langleye9ada862015-05-11 17:20:37 -0700188$L$inner_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800189 mul rbx
190 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700191 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800192 adc rdx,0
193 add r10,r11
194 mov r11,rdx
195 adc r11,0
Adam Langleye9ada862015-05-11 17:20:37 -0700196 lea r15,[1+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800197
198 mul rbp
199 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700200 jne NEAR $L$inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800201
202 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700203 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800204 adc rdx,0
205 add r13,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700206 mov r10,QWORD[r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800207 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700208 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800209 mov r13,rdx
210
211 xor rdx,rdx
212 add r13,r11
213 adc rdx,0
214 add r13,r10
215 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700216 mov QWORD[((-8))+r9*8+rsp],r13
217 mov QWORD[r9*8+rsp],rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800218
Adam Langleye9ada862015-05-11 17:20:37 -0700219 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800220 cmp r14,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700221 jb NEAR $L$outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800222
223 xor r14,r14
Adam Langleye9ada862015-05-11 17:20:37 -0700224 mov rax,QWORD[rsp]
225 lea rsi,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800226 mov r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700227 jmp NEAR $L$sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800228ALIGN 16
Robert Sloan8ff03552017-06-14 12:40:58 -0700229$L$sub:
230 sbb rax,QWORD[r14*8+rcx]
Adam Langleye9ada862015-05-11 17:20:37 -0700231 mov QWORD[r14*8+rdi],rax
232 mov rax,QWORD[8+r14*8+rsi]
233 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800234 dec r15
Adam Langleye9ada862015-05-11 17:20:37 -0700235 jnz NEAR $L$sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800236
237 sbb rax,0
238 xor r14,r14
Robert Sloana94fe052017-02-21 08:49:28 -0800239 and rsi,rax
240 not rax
241 mov rcx,rdi
242 and rcx,rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800243 mov r15,r9
Robert Sloana94fe052017-02-21 08:49:28 -0800244 or rsi,rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800245ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700246$L$copy:
Robert Sloana94fe052017-02-21 08:49:28 -0800247 mov rax,QWORD[r14*8+rsi]
Adam Langleye9ada862015-05-11 17:20:37 -0700248 mov QWORD[r14*8+rsp],r14
Robert Sloana94fe052017-02-21 08:49:28 -0800249 mov QWORD[r14*8+rdi],rax
Adam Langleye9ada862015-05-11 17:20:37 -0700250 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800251 sub r15,1
Adam Langleye9ada862015-05-11 17:20:37 -0700252 jnz NEAR $L$copy
Adam Langleyd9e397b2015-01-22 14:27:53 -0800253
Adam Langleye9ada862015-05-11 17:20:37 -0700254 mov rsi,QWORD[8+r9*8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800255
Adam Langleyd9e397b2015-01-22 14:27:53 -0800256 mov rax,1
Robert Sloana94fe052017-02-21 08:49:28 -0800257 mov r15,QWORD[((-48))+rsi]
258
259 mov r14,QWORD[((-40))+rsi]
260
261 mov r13,QWORD[((-32))+rsi]
262
263 mov r12,QWORD[((-24))+rsi]
264
265 mov rbp,QWORD[((-16))+rsi]
266
267 mov rbx,QWORD[((-8))+rsi]
268
269 lea rsp,[rsi]
270
Adam Langleye9ada862015-05-11 17:20:37 -0700271$L$mul_epilogue:
272 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
273 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800274 DB 0F3h,0C3h ;repret
Robert Sloana94fe052017-02-21 08:49:28 -0800275
Adam Langleye9ada862015-05-11 17:20:37 -0700276$L$SEH_end_bn_mul_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800277
278ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700279bn_mul4x_mont:
280 mov QWORD[8+rsp],rdi ;WIN64 prologue
281 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800282 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700283$L$SEH_begin_bn_mul4x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800284 mov rdi,rcx
285 mov rsi,rdx
286 mov rdx,r8
287 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700288 mov r8,QWORD[40+rsp]
289 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800290
291
Adam Langleyd9e397b2015-01-22 14:27:53 -0800292
293 mov r9d,r9d
Robert Sloana94fe052017-02-21 08:49:28 -0800294 mov rax,rsp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800295
Robert Sloana94fe052017-02-21 08:49:28 -0800296$L$mul4x_enter:
297 push rbx
298
299 push rbp
300
301 push r12
302
303 push r13
304
305 push r14
306
307 push r15
308
309
310 neg r9
311 mov r11,rsp
312 lea r10,[((-32))+r9*8+rsp]
313 neg r9
314 and r10,-1024
315
316 sub r11,r10
317 and r11,-4096
318 lea rsp,[r11*1+r10]
319 mov r11,QWORD[rsp]
320 cmp rsp,r10
321 ja NEAR $L$mul4x_page_walk
322 jmp NEAR $L$mul4x_page_walk_done
323
324$L$mul4x_page_walk:
325 lea rsp,[((-4096))+rsp]
326 mov r11,QWORD[rsp]
327 cmp rsp,r10
328 ja NEAR $L$mul4x_page_walk
329$L$mul4x_page_walk_done:
330
331 mov QWORD[8+r9*8+rsp],rax
332
Adam Langleye9ada862015-05-11 17:20:37 -0700333$L$mul4x_body:
334 mov QWORD[16+r9*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800335 mov r12,rdx
Adam Langleye9ada862015-05-11 17:20:37 -0700336 mov r8,QWORD[r8]
337 mov rbx,QWORD[r12]
338 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800339
340 xor r14,r14
341 xor r15,r15
342
343 mov rbp,r8
344 mul rbx
345 mov r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700346 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800347
348 imul rbp,r10
349 mov r11,rdx
350
351 mul rbp
352 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700353 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800354 adc rdx,0
355 mov rdi,rdx
356
357 mul rbx
358 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700359 mov rax,QWORD[8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800360 adc rdx,0
361 mov r10,rdx
362
363 mul rbp
364 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700365 mov rax,QWORD[16+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800366 adc rdx,0
367 add rdi,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700368 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800369 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700370 mov QWORD[rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800371 mov r13,rdx
Adam Langleye9ada862015-05-11 17:20:37 -0700372 jmp NEAR $L$1st4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800373ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700374$L$1st4x:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800375 mul rbx
376 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700377 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800378 adc rdx,0
379 mov r11,rdx
380
381 mul rbp
382 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700383 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800384 adc rdx,0
385 add r13,r10
386 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700387 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800388 mov rdi,rdx
389
390 mul rbx
391 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700392 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800393 adc rdx,0
394 mov r10,rdx
395
396 mul rbp
397 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700398 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800399 adc rdx,0
400 add rdi,r11
401 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700402 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800403 mov r13,rdx
404
405 mul rbx
406 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700407 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800408 adc rdx,0
409 mov r11,rdx
410
411 mul rbp
412 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700413 mov rax,QWORD[8+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800414 adc rdx,0
415 add r13,r10
416 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700417 mov QWORD[((-8))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800418 mov rdi,rdx
419
420 mul rbx
421 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700422 mov rax,QWORD[8+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800423 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700424 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800425 mov r10,rdx
426
427 mul rbp
428 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700429 mov rax,QWORD[((-16))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800430 adc rdx,0
431 add rdi,r11
432 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700433 mov QWORD[((-32))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800434 mov r13,rdx
435 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700436 jb NEAR $L$1st4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800437
438 mul rbx
439 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700440 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800441 adc rdx,0
442 mov r11,rdx
443
444 mul rbp
445 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700446 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800447 adc rdx,0
448 add r13,r10
449 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700450 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800451 mov rdi,rdx
452
453 mul rbx
454 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700455 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800456 adc rdx,0
457 mov r10,rdx
458
459 mul rbp
460 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700461 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800462 adc rdx,0
463 add rdi,r11
464 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700465 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800466 mov r13,rdx
467
468 xor rdi,rdi
469 add r13,r10
470 adc rdi,0
Adam Langleye9ada862015-05-11 17:20:37 -0700471 mov QWORD[((-8))+r15*8+rsp],r13
472 mov QWORD[r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800473
Adam Langleye9ada862015-05-11 17:20:37 -0700474 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800475ALIGN 4
Adam Langleye9ada862015-05-11 17:20:37 -0700476$L$outer4x:
477 mov rbx,QWORD[r14*8+r12]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800478 xor r15,r15
Adam Langleye9ada862015-05-11 17:20:37 -0700479 mov r10,QWORD[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800480 mov rbp,r8
481 mul rbx
482 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700483 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800484 adc rdx,0
485
486 imul rbp,r10
487 mov r11,rdx
488
489 mul rbp
490 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700491 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800492 adc rdx,0
493 mov rdi,rdx
494
495 mul rbx
496 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700497 mov rax,QWORD[8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800498 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700499 add r11,QWORD[8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800500 adc rdx,0
501 mov r10,rdx
502
503 mul rbp
504 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700505 mov rax,QWORD[16+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800506 adc rdx,0
507 add rdi,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700508 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800509 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700510 mov QWORD[rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800511 mov r13,rdx
Adam Langleye9ada862015-05-11 17:20:37 -0700512 jmp NEAR $L$inner4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800513ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700514$L$inner4x:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800515 mul rbx
516 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700517 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800518 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700519 add r10,QWORD[((-16))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800520 adc rdx,0
521 mov r11,rdx
522
523 mul rbp
524 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700525 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800526 adc rdx,0
527 add r13,r10
528 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700529 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800530 mov rdi,rdx
531
532 mul rbx
533 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700534 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800535 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700536 add r11,QWORD[((-8))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800537 adc rdx,0
538 mov r10,rdx
539
540 mul rbp
541 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700542 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800543 adc rdx,0
544 add rdi,r11
545 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700546 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800547 mov r13,rdx
548
549 mul rbx
550 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700551 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800552 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700553 add r10,QWORD[r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800554 adc rdx,0
555 mov r11,rdx
556
557 mul rbp
558 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700559 mov rax,QWORD[8+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800560 adc rdx,0
561 add r13,r10
562 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700563 mov QWORD[((-8))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800564 mov rdi,rdx
565
566 mul rbx
567 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700568 mov rax,QWORD[8+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800569 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700570 add r11,QWORD[8+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800571 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700572 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800573 mov r10,rdx
574
575 mul rbp
576 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700577 mov rax,QWORD[((-16))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800578 adc rdx,0
579 add rdi,r11
580 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700581 mov QWORD[((-32))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800582 mov r13,rdx
583 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700584 jb NEAR $L$inner4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800585
586 mul rbx
587 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700588 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800589 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700590 add r10,QWORD[((-16))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800591 adc rdx,0
592 mov r11,rdx
593
594 mul rbp
595 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700596 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800597 adc rdx,0
598 add r13,r10
599 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700600 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800601 mov rdi,rdx
602
603 mul rbx
604 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700605 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800606 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700607 add r11,QWORD[((-8))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800608 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700609 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800610 mov r10,rdx
611
612 mul rbp
613 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700614 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800615 adc rdx,0
616 add rdi,r11
617 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700618 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800619 mov r13,rdx
620
621 xor rdi,rdi
622 add r13,r10
623 adc rdi,0
Adam Langleye9ada862015-05-11 17:20:37 -0700624 add r13,QWORD[r9*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800625 adc rdi,0
Adam Langleye9ada862015-05-11 17:20:37 -0700626 mov QWORD[((-8))+r15*8+rsp],r13
627 mov QWORD[r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800628
629 cmp r14,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700630 jb NEAR $L$outer4x
631 mov rdi,QWORD[16+r9*8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800632 lea r15,[((-4))+r9]
Adam Langleye9ada862015-05-11 17:20:37 -0700633 mov rax,QWORD[rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800634 pxor xmm0,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700635 mov rdx,QWORD[8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800636 shr r15,2
Adam Langleye9ada862015-05-11 17:20:37 -0700637 lea rsi,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800638 xor r14,r14
639
Adam Langleye9ada862015-05-11 17:20:37 -0700640 sub rax,QWORD[rcx]
641 mov rbx,QWORD[16+rsi]
642 mov rbp,QWORD[24+rsi]
643 sbb rdx,QWORD[8+rcx]
Adam Langleye9ada862015-05-11 17:20:37 -0700644 jmp NEAR $L$sub4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800645ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700646$L$sub4x:
647 mov QWORD[r14*8+rdi],rax
648 mov QWORD[8+r14*8+rdi],rdx
649 sbb rbx,QWORD[16+r14*8+rcx]
650 mov rax,QWORD[32+r14*8+rsi]
651 mov rdx,QWORD[40+r14*8+rsi]
652 sbb rbp,QWORD[24+r14*8+rcx]
653 mov QWORD[16+r14*8+rdi],rbx
654 mov QWORD[24+r14*8+rdi],rbp
655 sbb rax,QWORD[32+r14*8+rcx]
656 mov rbx,QWORD[48+r14*8+rsi]
657 mov rbp,QWORD[56+r14*8+rsi]
658 sbb rdx,QWORD[40+r14*8+rcx]
659 lea r14,[4+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800660 dec r15
Adam Langleye9ada862015-05-11 17:20:37 -0700661 jnz NEAR $L$sub4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800662
Adam Langleye9ada862015-05-11 17:20:37 -0700663 mov QWORD[r14*8+rdi],rax
664 mov rax,QWORD[32+r14*8+rsi]
665 sbb rbx,QWORD[16+r14*8+rcx]
666 mov QWORD[8+r14*8+rdi],rdx
667 sbb rbp,QWORD[24+r14*8+rcx]
668 mov QWORD[16+r14*8+rdi],rbx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800669
670 sbb rax,0
Adam Langleye9ada862015-05-11 17:20:37 -0700671 mov QWORD[24+r14*8+rdi],rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800672 xor r14,r14
Robert Sloana94fe052017-02-21 08:49:28 -0800673 and rsi,rax
674 not rax
675 mov rcx,rdi
676 and rcx,rax
677 lea r15,[((-4))+r9]
678 or rsi,rcx
679 shr r15,2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800680
Robert Sloana94fe052017-02-21 08:49:28 -0800681 movdqu xmm1,XMMWORD[rsi]
682 movdqa XMMWORD[rsp],xmm0
683 movdqu XMMWORD[rdi],xmm1
Adam Langleye9ada862015-05-11 17:20:37 -0700684 jmp NEAR $L$copy4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800685ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700686$L$copy4x:
Robert Sloana94fe052017-02-21 08:49:28 -0800687 movdqu xmm2,XMMWORD[16+r14*1+rsi]
688 movdqu xmm1,XMMWORD[32+r14*1+rsi]
689 movdqa XMMWORD[16+r14*1+rsp],xmm0
690 movdqu XMMWORD[16+r14*1+rdi],xmm2
691 movdqa XMMWORD[32+r14*1+rsp],xmm0
692 movdqu XMMWORD[32+r14*1+rdi],xmm1
Adam Langleye9ada862015-05-11 17:20:37 -0700693 lea r14,[32+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800694 dec r15
Adam Langleye9ada862015-05-11 17:20:37 -0700695 jnz NEAR $L$copy4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800696
Robert Sloana94fe052017-02-21 08:49:28 -0800697 movdqu xmm2,XMMWORD[16+r14*1+rsi]
698 movdqa XMMWORD[16+r14*1+rsp],xmm0
699 movdqu XMMWORD[16+r14*1+rdi],xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700700 mov rsi,QWORD[8+r9*8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800701
Adam Langleyd9e397b2015-01-22 14:27:53 -0800702 mov rax,1
Robert Sloana94fe052017-02-21 08:49:28 -0800703 mov r15,QWORD[((-48))+rsi]
704
705 mov r14,QWORD[((-40))+rsi]
706
707 mov r13,QWORD[((-32))+rsi]
708
709 mov r12,QWORD[((-24))+rsi]
710
711 mov rbp,QWORD[((-16))+rsi]
712
713 mov rbx,QWORD[((-8))+rsi]
714
715 lea rsp,[rsi]
716
Adam Langleye9ada862015-05-11 17:20:37 -0700717$L$mul4x_epilogue:
718 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
719 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800720 DB 0F3h,0C3h ;repret
Robert Sloana94fe052017-02-21 08:49:28 -0800721
Adam Langleye9ada862015-05-11 17:20:37 -0700722$L$SEH_end_bn_mul4x_mont:
723EXTERN bn_sqr8x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -0800724
725
726ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -0700727bn_sqr8x_mont:
728 mov QWORD[8+rsp],rdi ;WIN64 prologue
729 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800730 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700731$L$SEH_begin_bn_sqr8x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800732 mov rdi,rcx
733 mov rsi,rdx
734 mov rdx,r8
735 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700736 mov r8,QWORD[40+rsp]
737 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800738
739
Robert Sloana94fe052017-02-21 08:49:28 -0800740
Adam Langleyd9e397b2015-01-22 14:27:53 -0800741 mov rax,rsp
Robert Sloana94fe052017-02-21 08:49:28 -0800742
743$L$sqr8x_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800744 push rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800745
Adam Langleyd9e397b2015-01-22 14:27:53 -0800746 push rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800747
Adam Langleyd9e397b2015-01-22 14:27:53 -0800748 push r12
Robert Sloana94fe052017-02-21 08:49:28 -0800749
Adam Langleyd9e397b2015-01-22 14:27:53 -0800750 push r13
Robert Sloana94fe052017-02-21 08:49:28 -0800751
Adam Langleyd9e397b2015-01-22 14:27:53 -0800752 push r14
Robert Sloana94fe052017-02-21 08:49:28 -0800753
Adam Langleyd9e397b2015-01-22 14:27:53 -0800754 push r15
755
Robert Sloana94fe052017-02-21 08:49:28 -0800756$L$sqr8x_prologue:
757
Adam Langleyd9e397b2015-01-22 14:27:53 -0800758 mov r10d,r9d
759 shl r9d,3
760 shl r10,3+2
761 neg r9
762
763
764
765
766
767
David Benjamin4969cc92016-04-22 15:02:23 -0400768 lea r11,[((-64))+r9*2+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800769 mov rbp,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700770 mov r8,QWORD[r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800771 sub r11,rsi
772 and r11,4095
773 cmp r10,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700774 jb NEAR $L$sqr8x_sp_alt
Robert Sloana94fe052017-02-21 08:49:28 -0800775 sub rbp,r11
776 lea rbp,[((-64))+r9*2+rbp]
Adam Langleye9ada862015-05-11 17:20:37 -0700777 jmp NEAR $L$sqr8x_sp_done
Adam Langleyd9e397b2015-01-22 14:27:53 -0800778
779ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -0700780$L$sqr8x_sp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -0400781 lea r10,[((4096-64))+r9*2]
Robert Sloana94fe052017-02-21 08:49:28 -0800782 lea rbp,[((-64))+r9*2+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800783 sub r11,r10
784 mov r10,0
785 cmovc r11,r10
Robert Sloana94fe052017-02-21 08:49:28 -0800786 sub rbp,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700787$L$sqr8x_sp_done:
Robert Sloana94fe052017-02-21 08:49:28 -0800788 and rbp,-64
789 mov r11,rsp
790 sub r11,rbp
791 and r11,-4096
792 lea rsp,[rbp*1+r11]
793 mov r10,QWORD[rsp]
794 cmp rsp,rbp
795 ja NEAR $L$sqr8x_page_walk
796 jmp NEAR $L$sqr8x_page_walk_done
797
798ALIGN 16
799$L$sqr8x_page_walk:
800 lea rsp,[((-4096))+rsp]
801 mov r10,QWORD[rsp]
802 cmp rsp,rbp
803 ja NEAR $L$sqr8x_page_walk
804$L$sqr8x_page_walk_done:
805
Adam Langleyd9e397b2015-01-22 14:27:53 -0800806 mov r10,r9
807 neg r9
808
Adam Langleye9ada862015-05-11 17:20:37 -0700809 mov QWORD[32+rsp],r8
810 mov QWORD[40+rsp],rax
Robert Sloana94fe052017-02-21 08:49:28 -0800811
Adam Langleye9ada862015-05-11 17:20:37 -0700812$L$sqr8x_body:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800813
David Benjamin4969cc92016-04-22 15:02:23 -0400814DB 102,72,15,110,209
Adam Langleyd9e397b2015-01-22 14:27:53 -0800815 pxor xmm0,xmm0
816DB 102,72,15,110,207
817DB 102,73,15,110,218
818 call bn_sqr8x_internal
819
David Benjamin4969cc92016-04-22 15:02:23 -0400820
821
822
823 lea rbx,[r9*1+rdi]
824 mov rcx,r9
825 mov rdx,r9
826DB 102,72,15,126,207
827 sar rcx,3+2
828 jmp NEAR $L$sqr8x_sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800829
830ALIGN 32
David Benjamin4969cc92016-04-22 15:02:23 -0400831$L$sqr8x_sub:
832 mov r12,QWORD[rbx]
833 mov r13,QWORD[8+rbx]
834 mov r14,QWORD[16+rbx]
835 mov r15,QWORD[24+rbx]
836 lea rbx,[32+rbx]
837 sbb r12,QWORD[rbp]
838 sbb r13,QWORD[8+rbp]
839 sbb r14,QWORD[16+rbp]
840 sbb r15,QWORD[24+rbp]
841 lea rbp,[32+rbp]
842 mov QWORD[rdi],r12
843 mov QWORD[8+rdi],r13
844 mov QWORD[16+rdi],r14
845 mov QWORD[24+rdi],r15
846 lea rdi,[32+rdi]
847 inc rcx
848 jnz NEAR $L$sqr8x_sub
849
850 sbb rax,0
851 lea rbx,[r9*1+rbx]
852 lea rdi,[r9*1+rdi]
853
854DB 102,72,15,110,200
855 pxor xmm0,xmm0
856 pshufd xmm1,xmm1,0
857 mov rsi,QWORD[40+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800858
David Benjamin4969cc92016-04-22 15:02:23 -0400859 jmp NEAR $L$sqr8x_cond_copy
860
861ALIGN 32
862$L$sqr8x_cond_copy:
863 movdqa xmm2,XMMWORD[rbx]
864 movdqa xmm3,XMMWORD[16+rbx]
865 lea rbx,[32+rbx]
866 movdqu xmm4,XMMWORD[rdi]
867 movdqu xmm5,XMMWORD[16+rdi]
868 lea rdi,[32+rdi]
869 movdqa XMMWORD[(-32)+rbx],xmm0
870 movdqa XMMWORD[(-16)+rbx],xmm0
871 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0
872 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0
873 pcmpeqd xmm0,xmm1
874 pand xmm2,xmm1
875 pand xmm3,xmm1
876 pand xmm4,xmm0
877 pand xmm5,xmm0
878 pxor xmm0,xmm0
879 por xmm4,xmm2
880 por xmm5,xmm3
881 movdqu XMMWORD[(-32)+rdi],xmm4
882 movdqu XMMWORD[(-16)+rdi],xmm5
883 add r9,32
884 jnz NEAR $L$sqr8x_cond_copy
Adam Langleyd9e397b2015-01-22 14:27:53 -0800885
886 mov rax,1
Adam Langleye9ada862015-05-11 17:20:37 -0700887 mov r15,QWORD[((-48))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800888
Adam Langleye9ada862015-05-11 17:20:37 -0700889 mov r14,QWORD[((-40))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800890
Adam Langleye9ada862015-05-11 17:20:37 -0700891 mov r13,QWORD[((-32))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800892
Adam Langleye9ada862015-05-11 17:20:37 -0700893 mov r12,QWORD[((-24))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800894
Adam Langleye9ada862015-05-11 17:20:37 -0700895 mov rbp,QWORD[((-16))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800896
Adam Langleye9ada862015-05-11 17:20:37 -0700897 mov rbx,QWORD[((-8))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800898
Adam Langleye9ada862015-05-11 17:20:37 -0700899 lea rsp,[rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800900
Adam Langleye9ada862015-05-11 17:20:37 -0700901$L$sqr8x_epilogue:
902 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
903 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800904 DB 0F3h,0C3h ;repret
Robert Sloana94fe052017-02-21 08:49:28 -0800905
Adam Langleye9ada862015-05-11 17:20:37 -0700906$L$SEH_end_bn_sqr8x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800907DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
908DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
909DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
910DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
911DB 115,108,46,111,114,103,62,0
912ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700913EXTERN __imp_RtlVirtualUnwind
Adam Langleyd9e397b2015-01-22 14:27:53 -0800914
915ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700916mul_handler:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800917 push rsi
918 push rdi
919 push rbx
920 push rbp
921 push r12
922 push r13
923 push r14
924 push r15
925 pushfq
926 sub rsp,64
927
Adam Langleye9ada862015-05-11 17:20:37 -0700928 mov rax,QWORD[120+r8]
929 mov rbx,QWORD[248+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800930
Adam Langleye9ada862015-05-11 17:20:37 -0700931 mov rsi,QWORD[8+r9]
932 mov r11,QWORD[56+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800933
Adam Langleye9ada862015-05-11 17:20:37 -0700934 mov r10d,DWORD[r11]
935 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800936 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700937 jb NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800938
Adam Langleye9ada862015-05-11 17:20:37 -0700939 mov rax,QWORD[152+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800940
Adam Langleye9ada862015-05-11 17:20:37 -0700941 mov r10d,DWORD[4+r11]
942 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800943 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700944 jae NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800945
Adam Langleye9ada862015-05-11 17:20:37 -0700946 mov r10,QWORD[192+r8]
947 mov rax,QWORD[8+r10*8+rax]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800948
Robert Sloana94fe052017-02-21 08:49:28 -0800949 jmp NEAR $L$common_pop_regs
Adam Langleye9ada862015-05-11 17:20:37 -0700950
Adam Langleyd9e397b2015-01-22 14:27:53 -0800951
952
953ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700954sqr_handler:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800955 push rsi
956 push rdi
957 push rbx
958 push rbp
959 push r12
960 push r13
961 push r14
962 push r15
963 pushfq
964 sub rsp,64
965
Adam Langleye9ada862015-05-11 17:20:37 -0700966 mov rax,QWORD[120+r8]
967 mov rbx,QWORD[248+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800968
Adam Langleye9ada862015-05-11 17:20:37 -0700969 mov rsi,QWORD[8+r9]
970 mov r11,QWORD[56+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800971
Adam Langleye9ada862015-05-11 17:20:37 -0700972 mov r10d,DWORD[r11]
973 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800974 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700975 jb NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800976
Robert Sloana94fe052017-02-21 08:49:28 -0800977 mov r10d,DWORD[4+r11]
978 lea r10,[r10*1+rsi]
979 cmp rbx,r10
980 jb NEAR $L$common_pop_regs
981
Adam Langleye9ada862015-05-11 17:20:37 -0700982 mov rax,QWORD[152+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800983
Robert Sloana94fe052017-02-21 08:49:28 -0800984 mov r10d,DWORD[8+r11]
Adam Langleye9ada862015-05-11 17:20:37 -0700985 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800986 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700987 jae NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800988
Adam Langleye9ada862015-05-11 17:20:37 -0700989 mov rax,QWORD[40+rax]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800990
Robert Sloana94fe052017-02-21 08:49:28 -0800991$L$common_pop_regs:
Adam Langleye9ada862015-05-11 17:20:37 -0700992 mov rbx,QWORD[((-8))+rax]
993 mov rbp,QWORD[((-16))+rax]
994 mov r12,QWORD[((-24))+rax]
995 mov r13,QWORD[((-32))+rax]
996 mov r14,QWORD[((-40))+rax]
997 mov r15,QWORD[((-48))+rax]
998 mov QWORD[144+r8],rbx
999 mov QWORD[160+r8],rbp
1000 mov QWORD[216+r8],r12
1001 mov QWORD[224+r8],r13
1002 mov QWORD[232+r8],r14
1003 mov QWORD[240+r8],r15
Adam Langleyd9e397b2015-01-22 14:27:53 -08001004
Adam Langleye9ada862015-05-11 17:20:37 -07001005$L$common_seh_tail:
1006 mov rdi,QWORD[8+rax]
1007 mov rsi,QWORD[16+rax]
1008 mov QWORD[152+r8],rax
1009 mov QWORD[168+r8],rsi
1010 mov QWORD[176+r8],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -08001011
Adam Langleye9ada862015-05-11 17:20:37 -07001012 mov rdi,QWORD[40+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001013 mov rsi,r8
1014 mov ecx,154
Adam Langleye9ada862015-05-11 17:20:37 -07001015 DD 0xa548f3fc
Adam Langleyd9e397b2015-01-22 14:27:53 -08001016
1017 mov rsi,r9
1018 xor rcx,rcx
Adam Langleye9ada862015-05-11 17:20:37 -07001019 mov rdx,QWORD[8+rsi]
1020 mov r8,QWORD[rsi]
1021 mov r9,QWORD[16+rsi]
1022 mov r10,QWORD[40+rsi]
1023 lea r11,[56+rsi]
1024 lea r12,[24+rsi]
1025 mov QWORD[32+rsp],r10
1026 mov QWORD[40+rsp],r11
1027 mov QWORD[48+rsp],r12
1028 mov QWORD[56+rsp],rcx
1029 call QWORD[__imp_RtlVirtualUnwind]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001030
1031 mov eax,1
1032 add rsp,64
1033 popfq
1034 pop r15
1035 pop r14
1036 pop r13
1037 pop r12
1038 pop rbp
1039 pop rbx
1040 pop rdi
1041 pop rsi
1042 DB 0F3h,0C3h ;repret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001043
Adam Langleye9ada862015-05-11 17:20:37 -07001044
1045section .pdata rdata align=4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001046ALIGN 4
Adam Langleye9ada862015-05-11 17:20:37 -07001047 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase
1048 DD $L$SEH_end_bn_mul_mont wrt ..imagebase
1049 DD $L$SEH_info_bn_mul_mont wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08001050
Adam Langleye9ada862015-05-11 17:20:37 -07001051 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase
1052 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase
1053 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08001054
Adam Langleye9ada862015-05-11 17:20:37 -07001055 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
1056 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase
1057 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase
1058section .xdata rdata align=8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001059ALIGN 8
Adam Langleye9ada862015-05-11 17:20:37 -07001060$L$SEH_info_bn_mul_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001061DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07001062 DD mul_handler wrt ..imagebase
1063 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1064$L$SEH_info_bn_mul4x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001065DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07001066 DD mul_handler wrt ..imagebase
1067 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
1068$L$SEH_info_bn_sqr8x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001069DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07001070 DD sqr_handler wrt ..imagebase
Robert Sloana94fe052017-02-21 08:49:28 -08001071 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
1072ALIGN 8