blob: 1a9da5123060869fe1d3a3339b2fe8c7ca684569 [file] [log] [blame]
Adam Langleye9ada862015-05-11 17:20:37 -07001default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
Adam Langleyd9e397b2015-01-22 14:27:53 -08006
Adam Langleyd9e397b2015-01-22 14:27:53 -08007
Adam Langleye9ada862015-05-11 17:20:37 -07008EXTERN OPENSSL_ia32cap_P
9
10global bn_mul_mont
Adam Langleyd9e397b2015-01-22 14:27:53 -080011
12ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070013bn_mul_mont:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -080016 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -070017$L$SEH_begin_bn_mul_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -080018 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -070022 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -080024
25
Robert Sloana94fe052017-02-21 08:49:28 -080026
27 mov r9d,r9d
28 mov rax,rsp
29
Adam Langleyd9e397b2015-01-22 14:27:53 -080030 test r9d,3
Adam Langleye9ada862015-05-11 17:20:37 -070031 jnz NEAR $L$mul_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080032 cmp r9d,8
Adam Langleye9ada862015-05-11 17:20:37 -070033 jb NEAR $L$mul_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080034 cmp rdx,rsi
Adam Langleye9ada862015-05-11 17:20:37 -070035 jne NEAR $L$mul4x_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080036 test r9d,7
Adam Langleye9ada862015-05-11 17:20:37 -070037 jz NEAR $L$sqr8x_enter
38 jmp NEAR $L$mul4x_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -080039
40ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -070041$L$mul_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -080042 push rbx
Robert Sloana94fe052017-02-21 08:49:28 -080043
Adam Langleyd9e397b2015-01-22 14:27:53 -080044 push rbp
Robert Sloana94fe052017-02-21 08:49:28 -080045
Adam Langleyd9e397b2015-01-22 14:27:53 -080046 push r12
Robert Sloana94fe052017-02-21 08:49:28 -080047
Adam Langleyd9e397b2015-01-22 14:27:53 -080048 push r13
Robert Sloana94fe052017-02-21 08:49:28 -080049
Adam Langleyd9e397b2015-01-22 14:27:53 -080050 push r14
Robert Sloana94fe052017-02-21 08:49:28 -080051
Adam Langleyd9e397b2015-01-22 14:27:53 -080052 push r15
53
Adam Langleyd9e397b2015-01-22 14:27:53 -080054
Robert Sloana94fe052017-02-21 08:49:28 -080055 neg r9
56 mov r11,rsp
57 lea r10,[((-16))+r9*8+rsp]
58 neg r9
59 and r10,-1024
60
61
62
63
64
65
66
67
68
69 sub r11,r10
70 and r11,-4096
71 lea rsp,[r11*1+r10]
72 mov r11,QWORD[rsp]
73 cmp rsp,r10
74 ja NEAR $L$mul_page_walk
75 jmp NEAR $L$mul_page_walk_done
76
77ALIGN 16
78$L$mul_page_walk:
79 lea rsp,[((-4096))+rsp]
80 mov r11,QWORD[rsp]
81 cmp rsp,r10
82 ja NEAR $L$mul_page_walk
83$L$mul_page_walk_done:
84
85 mov QWORD[8+r9*8+rsp],rax
86
Adam Langleye9ada862015-05-11 17:20:37 -070087$L$mul_body:
Adam Langleyd9e397b2015-01-22 14:27:53 -080088 mov r12,rdx
Adam Langleye9ada862015-05-11 17:20:37 -070089 mov r8,QWORD[r8]
90 mov rbx,QWORD[r12]
91 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -080092
93 xor r14,r14
94 xor r15,r15
95
96 mov rbp,r8
97 mul rbx
98 mov r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -070099 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800100
101 imul rbp,r10
102 mov r11,rdx
103
104 mul rbp
105 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700106 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800107 adc rdx,0
108 mov r13,rdx
109
Adam Langleye9ada862015-05-11 17:20:37 -0700110 lea r15,[1+r15]
111 jmp NEAR $L$1st_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800112
113ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700114$L$1st:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800115 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700116 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800117 adc rdx,0
118 add r13,r11
119 mov r11,r10
120 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700121 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800122 mov r13,rdx
123
Adam Langleye9ada862015-05-11 17:20:37 -0700124$L$1st_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800125 mul rbx
126 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700127 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800128 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700129 lea r15,[1+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800130 mov r10,rdx
131
132 mul rbp
133 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700134 jne NEAR $L$1st
Adam Langleyd9e397b2015-01-22 14:27:53 -0800135
136 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700137 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800138 adc rdx,0
139 add r13,r11
140 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700141 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800142 mov r13,rdx
143 mov r11,r10
144
145 xor rdx,rdx
146 add r13,r11
147 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700148 mov QWORD[((-8))+r9*8+rsp],r13
149 mov QWORD[r9*8+rsp],rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800150
Adam Langleye9ada862015-05-11 17:20:37 -0700151 lea r14,[1+r14]
152 jmp NEAR $L$outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800153ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700154$L$outer:
155 mov rbx,QWORD[r14*8+r12]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800156 xor r15,r15
157 mov rbp,r8
Adam Langleye9ada862015-05-11 17:20:37 -0700158 mov r10,QWORD[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800159 mul rbx
160 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700161 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800162 adc rdx,0
163
164 imul rbp,r10
165 mov r11,rdx
166
167 mul rbp
168 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700169 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800170 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700171 mov r10,QWORD[8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800172 mov r13,rdx
173
Adam Langleye9ada862015-05-11 17:20:37 -0700174 lea r15,[1+r15]
175 jmp NEAR $L$inner_enter
Adam Langleyd9e397b2015-01-22 14:27:53 -0800176
177ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700178$L$inner:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800179 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700180 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800181 adc rdx,0
182 add r13,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700183 mov r10,QWORD[r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800184 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700185 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800186 mov r13,rdx
187
Adam Langleye9ada862015-05-11 17:20:37 -0700188$L$inner_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800189 mul rbx
190 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700191 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800192 adc rdx,0
193 add r10,r11
194 mov r11,rdx
195 adc r11,0
Adam Langleye9ada862015-05-11 17:20:37 -0700196 lea r15,[1+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800197
198 mul rbp
199 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700200 jne NEAR $L$inner
Adam Langleyd9e397b2015-01-22 14:27:53 -0800201
202 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700203 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800204 adc rdx,0
205 add r13,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700206 mov r10,QWORD[r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800207 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700208 mov QWORD[((-16))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800209 mov r13,rdx
210
211 xor rdx,rdx
212 add r13,r11
213 adc rdx,0
214 add r13,r10
215 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700216 mov QWORD[((-8))+r9*8+rsp],r13
217 mov QWORD[r9*8+rsp],rdx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800218
Adam Langleye9ada862015-05-11 17:20:37 -0700219 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800220 cmp r14,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700221 jb NEAR $L$outer
Adam Langleyd9e397b2015-01-22 14:27:53 -0800222
223 xor r14,r14
Adam Langleye9ada862015-05-11 17:20:37 -0700224 mov rax,QWORD[rsp]
225 lea rsi,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800226 mov r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700227 jmp NEAR $L$sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800228ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700229$L$sub: sbb rax,QWORD[r14*8+rcx]
230 mov QWORD[r14*8+rdi],rax
231 mov rax,QWORD[8+r14*8+rsi]
232 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800233 dec r15
Adam Langleye9ada862015-05-11 17:20:37 -0700234 jnz NEAR $L$sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800235
236 sbb rax,0
237 xor r14,r14
Robert Sloana94fe052017-02-21 08:49:28 -0800238 and rsi,rax
239 not rax
240 mov rcx,rdi
241 and rcx,rax
Adam Langleyd9e397b2015-01-22 14:27:53 -0800242 mov r15,r9
Robert Sloana94fe052017-02-21 08:49:28 -0800243 or rsi,rcx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800244ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700245$L$copy:
Robert Sloana94fe052017-02-21 08:49:28 -0800246 mov rax,QWORD[r14*8+rsi]
Adam Langleye9ada862015-05-11 17:20:37 -0700247 mov QWORD[r14*8+rsp],r14
Robert Sloana94fe052017-02-21 08:49:28 -0800248 mov QWORD[r14*8+rdi],rax
Adam Langleye9ada862015-05-11 17:20:37 -0700249 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800250 sub r15,1
Adam Langleye9ada862015-05-11 17:20:37 -0700251 jnz NEAR $L$copy
Adam Langleyd9e397b2015-01-22 14:27:53 -0800252
Adam Langleye9ada862015-05-11 17:20:37 -0700253 mov rsi,QWORD[8+r9*8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800254
Adam Langleyd9e397b2015-01-22 14:27:53 -0800255 mov rax,1
Robert Sloana94fe052017-02-21 08:49:28 -0800256 mov r15,QWORD[((-48))+rsi]
257
258 mov r14,QWORD[((-40))+rsi]
259
260 mov r13,QWORD[((-32))+rsi]
261
262 mov r12,QWORD[((-24))+rsi]
263
264 mov rbp,QWORD[((-16))+rsi]
265
266 mov rbx,QWORD[((-8))+rsi]
267
268 lea rsp,[rsi]
269
Adam Langleye9ada862015-05-11 17:20:37 -0700270$L$mul_epilogue:
271 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
272 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800273 DB 0F3h,0C3h ;repret
Robert Sloana94fe052017-02-21 08:49:28 -0800274
Adam Langleye9ada862015-05-11 17:20:37 -0700275$L$SEH_end_bn_mul_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800276
277ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700278bn_mul4x_mont:
279 mov QWORD[8+rsp],rdi ;WIN64 prologue
280 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800281 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700282$L$SEH_begin_bn_mul4x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800283 mov rdi,rcx
284 mov rsi,rdx
285 mov rdx,r8
286 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700287 mov r8,QWORD[40+rsp]
288 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800289
290
Adam Langleyd9e397b2015-01-22 14:27:53 -0800291
292 mov r9d,r9d
Robert Sloana94fe052017-02-21 08:49:28 -0800293 mov rax,rsp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800294
Robert Sloana94fe052017-02-21 08:49:28 -0800295$L$mul4x_enter:
296 push rbx
297
298 push rbp
299
300 push r12
301
302 push r13
303
304 push r14
305
306 push r15
307
308
309 neg r9
310 mov r11,rsp
311 lea r10,[((-32))+r9*8+rsp]
312 neg r9
313 and r10,-1024
314
315 sub r11,r10
316 and r11,-4096
317 lea rsp,[r11*1+r10]
318 mov r11,QWORD[rsp]
319 cmp rsp,r10
320 ja NEAR $L$mul4x_page_walk
321 jmp NEAR $L$mul4x_page_walk_done
322
323$L$mul4x_page_walk:
324 lea rsp,[((-4096))+rsp]
325 mov r11,QWORD[rsp]
326 cmp rsp,r10
327 ja NEAR $L$mul4x_page_walk
328$L$mul4x_page_walk_done:
329
330 mov QWORD[8+r9*8+rsp],rax
331
Adam Langleye9ada862015-05-11 17:20:37 -0700332$L$mul4x_body:
333 mov QWORD[16+r9*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800334 mov r12,rdx
Adam Langleye9ada862015-05-11 17:20:37 -0700335 mov r8,QWORD[r8]
336 mov rbx,QWORD[r12]
337 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800338
339 xor r14,r14
340 xor r15,r15
341
342 mov rbp,r8
343 mul rbx
344 mov r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700345 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800346
347 imul rbp,r10
348 mov r11,rdx
349
350 mul rbp
351 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700352 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800353 adc rdx,0
354 mov rdi,rdx
355
356 mul rbx
357 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700358 mov rax,QWORD[8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800359 adc rdx,0
360 mov r10,rdx
361
362 mul rbp
363 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700364 mov rax,QWORD[16+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800365 adc rdx,0
366 add rdi,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700367 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800368 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700369 mov QWORD[rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800370 mov r13,rdx
Adam Langleye9ada862015-05-11 17:20:37 -0700371 jmp NEAR $L$1st4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800372ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700373$L$1st4x:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800374 mul rbx
375 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700376 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800377 adc rdx,0
378 mov r11,rdx
379
380 mul rbp
381 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700382 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800383 adc rdx,0
384 add r13,r10
385 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700386 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800387 mov rdi,rdx
388
389 mul rbx
390 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700391 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800392 adc rdx,0
393 mov r10,rdx
394
395 mul rbp
396 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700397 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800398 adc rdx,0
399 add rdi,r11
400 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700401 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800402 mov r13,rdx
403
404 mul rbx
405 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700406 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800407 adc rdx,0
408 mov r11,rdx
409
410 mul rbp
411 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700412 mov rax,QWORD[8+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800413 adc rdx,0
414 add r13,r10
415 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700416 mov QWORD[((-8))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800417 mov rdi,rdx
418
419 mul rbx
420 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700421 mov rax,QWORD[8+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800422 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700423 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800424 mov r10,rdx
425
426 mul rbp
427 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700428 mov rax,QWORD[((-16))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800429 adc rdx,0
430 add rdi,r11
431 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700432 mov QWORD[((-32))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800433 mov r13,rdx
434 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700435 jb NEAR $L$1st4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800436
437 mul rbx
438 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700439 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800440 adc rdx,0
441 mov r11,rdx
442
443 mul rbp
444 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700445 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800446 adc rdx,0
447 add r13,r10
448 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700449 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800450 mov rdi,rdx
451
452 mul rbx
453 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700454 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800455 adc rdx,0
456 mov r10,rdx
457
458 mul rbp
459 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700460 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800461 adc rdx,0
462 add rdi,r11
463 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700464 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800465 mov r13,rdx
466
467 xor rdi,rdi
468 add r13,r10
469 adc rdi,0
Adam Langleye9ada862015-05-11 17:20:37 -0700470 mov QWORD[((-8))+r15*8+rsp],r13
471 mov QWORD[r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800472
Adam Langleye9ada862015-05-11 17:20:37 -0700473 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800474ALIGN 4
Adam Langleye9ada862015-05-11 17:20:37 -0700475$L$outer4x:
476 mov rbx,QWORD[r14*8+r12]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800477 xor r15,r15
Adam Langleye9ada862015-05-11 17:20:37 -0700478 mov r10,QWORD[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800479 mov rbp,r8
480 mul rbx
481 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700482 mov rax,QWORD[rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800483 adc rdx,0
484
485 imul rbp,r10
486 mov r11,rdx
487
488 mul rbp
489 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700490 mov rax,QWORD[8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800491 adc rdx,0
492 mov rdi,rdx
493
494 mul rbx
495 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700496 mov rax,QWORD[8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800497 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700498 add r11,QWORD[8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800499 adc rdx,0
500 mov r10,rdx
501
502 mul rbp
503 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700504 mov rax,QWORD[16+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800505 adc rdx,0
506 add rdi,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700507 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800508 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700509 mov QWORD[rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800510 mov r13,rdx
Adam Langleye9ada862015-05-11 17:20:37 -0700511 jmp NEAR $L$inner4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800512ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700513$L$inner4x:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800514 mul rbx
515 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700516 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800517 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700518 add r10,QWORD[((-16))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800519 adc rdx,0
520 mov r11,rdx
521
522 mul rbp
523 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700524 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800525 adc rdx,0
526 add r13,r10
527 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700528 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800529 mov rdi,rdx
530
531 mul rbx
532 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700533 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800534 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700535 add r11,QWORD[((-8))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800536 adc rdx,0
537 mov r10,rdx
538
539 mul rbp
540 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700541 mov rax,QWORD[r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800542 adc rdx,0
543 add rdi,r11
544 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700545 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800546 mov r13,rdx
547
548 mul rbx
549 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700550 mov rax,QWORD[r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800551 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700552 add r10,QWORD[r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800553 adc rdx,0
554 mov r11,rdx
555
556 mul rbp
557 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700558 mov rax,QWORD[8+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800559 adc rdx,0
560 add r13,r10
561 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700562 mov QWORD[((-8))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800563 mov rdi,rdx
564
565 mul rbx
566 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700567 mov rax,QWORD[8+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800568 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700569 add r11,QWORD[8+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800570 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700571 lea r15,[4+r15]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800572 mov r10,rdx
573
574 mul rbp
575 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700576 mov rax,QWORD[((-16))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800577 adc rdx,0
578 add rdi,r11
579 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700580 mov QWORD[((-32))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800581 mov r13,rdx
582 cmp r15,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700583 jb NEAR $L$inner4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800584
585 mul rbx
586 add r10,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700587 mov rax,QWORD[((-16))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800588 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700589 add r10,QWORD[((-16))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800590 adc rdx,0
591 mov r11,rdx
592
593 mul rbp
594 add r13,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700595 mov rax,QWORD[((-8))+r15*8+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800596 adc rdx,0
597 add r13,r10
598 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700599 mov QWORD[((-24))+r15*8+rsp],r13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800600 mov rdi,rdx
601
602 mul rbx
603 add r11,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700604 mov rax,QWORD[((-8))+r15*8+rcx]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800605 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700606 add r11,QWORD[((-8))+r15*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800607 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700608 lea r14,[1+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800609 mov r10,rdx
610
611 mul rbp
612 add rdi,rax
Adam Langleye9ada862015-05-11 17:20:37 -0700613 mov rax,QWORD[rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800614 adc rdx,0
615 add rdi,r11
616 adc rdx,0
Adam Langleye9ada862015-05-11 17:20:37 -0700617 mov QWORD[((-16))+r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800618 mov r13,rdx
619
620 xor rdi,rdi
621 add r13,r10
622 adc rdi,0
Adam Langleye9ada862015-05-11 17:20:37 -0700623 add r13,QWORD[r9*8+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800624 adc rdi,0
Adam Langleye9ada862015-05-11 17:20:37 -0700625 mov QWORD[((-8))+r15*8+rsp],r13
626 mov QWORD[r15*8+rsp],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800627
628 cmp r14,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700629 jb NEAR $L$outer4x
630 mov rdi,QWORD[16+r9*8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800631 lea r15,[((-4))+r9]
Adam Langleye9ada862015-05-11 17:20:37 -0700632 mov rax,QWORD[rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800633 pxor xmm0,xmm0
Adam Langleye9ada862015-05-11 17:20:37 -0700634 mov rdx,QWORD[8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800635 shr r15,2
Adam Langleye9ada862015-05-11 17:20:37 -0700636 lea rsi,[rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800637 xor r14,r14
638
Adam Langleye9ada862015-05-11 17:20:37 -0700639 sub rax,QWORD[rcx]
640 mov rbx,QWORD[16+rsi]
641 mov rbp,QWORD[24+rsi]
642 sbb rdx,QWORD[8+rcx]
Adam Langleye9ada862015-05-11 17:20:37 -0700643 jmp NEAR $L$sub4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800644ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700645$L$sub4x:
646 mov QWORD[r14*8+rdi],rax
647 mov QWORD[8+r14*8+rdi],rdx
648 sbb rbx,QWORD[16+r14*8+rcx]
649 mov rax,QWORD[32+r14*8+rsi]
650 mov rdx,QWORD[40+r14*8+rsi]
651 sbb rbp,QWORD[24+r14*8+rcx]
652 mov QWORD[16+r14*8+rdi],rbx
653 mov QWORD[24+r14*8+rdi],rbp
654 sbb rax,QWORD[32+r14*8+rcx]
655 mov rbx,QWORD[48+r14*8+rsi]
656 mov rbp,QWORD[56+r14*8+rsi]
657 sbb rdx,QWORD[40+r14*8+rcx]
658 lea r14,[4+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800659 dec r15
Adam Langleye9ada862015-05-11 17:20:37 -0700660 jnz NEAR $L$sub4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800661
Adam Langleye9ada862015-05-11 17:20:37 -0700662 mov QWORD[r14*8+rdi],rax
663 mov rax,QWORD[32+r14*8+rsi]
664 sbb rbx,QWORD[16+r14*8+rcx]
665 mov QWORD[8+r14*8+rdi],rdx
666 sbb rbp,QWORD[24+r14*8+rcx]
667 mov QWORD[16+r14*8+rdi],rbx
Adam Langleyd9e397b2015-01-22 14:27:53 -0800668
669 sbb rax,0
Adam Langleye9ada862015-05-11 17:20:37 -0700670 mov QWORD[24+r14*8+rdi],rbp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800671 xor r14,r14
Robert Sloana94fe052017-02-21 08:49:28 -0800672 and rsi,rax
673 not rax
674 mov rcx,rdi
675 and rcx,rax
676 lea r15,[((-4))+r9]
677 or rsi,rcx
678 shr r15,2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800679
Robert Sloana94fe052017-02-21 08:49:28 -0800680 movdqu xmm1,XMMWORD[rsi]
681 movdqa XMMWORD[rsp],xmm0
682 movdqu XMMWORD[rdi],xmm1
Adam Langleye9ada862015-05-11 17:20:37 -0700683 jmp NEAR $L$copy4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800684ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700685$L$copy4x:
Robert Sloana94fe052017-02-21 08:49:28 -0800686 movdqu xmm2,XMMWORD[16+r14*1+rsi]
687 movdqu xmm1,XMMWORD[32+r14*1+rsi]
688 movdqa XMMWORD[16+r14*1+rsp],xmm0
689 movdqu XMMWORD[16+r14*1+rdi],xmm2
690 movdqa XMMWORD[32+r14*1+rsp],xmm0
691 movdqu XMMWORD[32+r14*1+rdi],xmm1
Adam Langleye9ada862015-05-11 17:20:37 -0700692 lea r14,[32+r14]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800693 dec r15
Adam Langleye9ada862015-05-11 17:20:37 -0700694 jnz NEAR $L$copy4x
Adam Langleyd9e397b2015-01-22 14:27:53 -0800695
Robert Sloana94fe052017-02-21 08:49:28 -0800696 movdqu xmm2,XMMWORD[16+r14*1+rsi]
697 movdqa XMMWORD[16+r14*1+rsp],xmm0
698 movdqu XMMWORD[16+r14*1+rdi],xmm2
Adam Langleye9ada862015-05-11 17:20:37 -0700699 mov rsi,QWORD[8+r9*8+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800700
Adam Langleyd9e397b2015-01-22 14:27:53 -0800701 mov rax,1
Robert Sloana94fe052017-02-21 08:49:28 -0800702 mov r15,QWORD[((-48))+rsi]
703
704 mov r14,QWORD[((-40))+rsi]
705
706 mov r13,QWORD[((-32))+rsi]
707
708 mov r12,QWORD[((-24))+rsi]
709
710 mov rbp,QWORD[((-16))+rsi]
711
712 mov rbx,QWORD[((-8))+rsi]
713
714 lea rsp,[rsi]
715
Adam Langleye9ada862015-05-11 17:20:37 -0700716$L$mul4x_epilogue:
717 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
718 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800719 DB 0F3h,0C3h ;repret
Robert Sloana94fe052017-02-21 08:49:28 -0800720
Adam Langleye9ada862015-05-11 17:20:37 -0700721$L$SEH_end_bn_mul4x_mont:
722EXTERN bn_sqr8x_internal
Adam Langleyd9e397b2015-01-22 14:27:53 -0800723
724
725ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -0700726bn_sqr8x_mont:
727 mov QWORD[8+rsp],rdi ;WIN64 prologue
728 mov QWORD[16+rsp],rsi
Adam Langleyd9e397b2015-01-22 14:27:53 -0800729 mov rax,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700730$L$SEH_begin_bn_sqr8x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800731 mov rdi,rcx
732 mov rsi,rdx
733 mov rdx,r8
734 mov rcx,r9
Adam Langleye9ada862015-05-11 17:20:37 -0700735 mov r8,QWORD[40+rsp]
736 mov r9,QWORD[48+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800737
738
Robert Sloana94fe052017-02-21 08:49:28 -0800739
Adam Langleyd9e397b2015-01-22 14:27:53 -0800740 mov rax,rsp
Robert Sloana94fe052017-02-21 08:49:28 -0800741
742$L$sqr8x_enter:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800743 push rbx
Robert Sloana94fe052017-02-21 08:49:28 -0800744
Adam Langleyd9e397b2015-01-22 14:27:53 -0800745 push rbp
Robert Sloana94fe052017-02-21 08:49:28 -0800746
Adam Langleyd9e397b2015-01-22 14:27:53 -0800747 push r12
Robert Sloana94fe052017-02-21 08:49:28 -0800748
Adam Langleyd9e397b2015-01-22 14:27:53 -0800749 push r13
Robert Sloana94fe052017-02-21 08:49:28 -0800750
Adam Langleyd9e397b2015-01-22 14:27:53 -0800751 push r14
Robert Sloana94fe052017-02-21 08:49:28 -0800752
Adam Langleyd9e397b2015-01-22 14:27:53 -0800753 push r15
754
Robert Sloana94fe052017-02-21 08:49:28 -0800755$L$sqr8x_prologue:
756
Adam Langleyd9e397b2015-01-22 14:27:53 -0800757 mov r10d,r9d
758 shl r9d,3
759 shl r10,3+2
760 neg r9
761
762
763
764
765
766
David Benjamin4969cc92016-04-22 15:02:23 -0400767 lea r11,[((-64))+r9*2+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800768 mov rbp,rsp
Adam Langleye9ada862015-05-11 17:20:37 -0700769 mov r8,QWORD[r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800770 sub r11,rsi
771 and r11,4095
772 cmp r10,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700773 jb NEAR $L$sqr8x_sp_alt
Robert Sloana94fe052017-02-21 08:49:28 -0800774 sub rbp,r11
775 lea rbp,[((-64))+r9*2+rbp]
Adam Langleye9ada862015-05-11 17:20:37 -0700776 jmp NEAR $L$sqr8x_sp_done
Adam Langleyd9e397b2015-01-22 14:27:53 -0800777
778ALIGN 32
Adam Langleye9ada862015-05-11 17:20:37 -0700779$L$sqr8x_sp_alt:
David Benjamin4969cc92016-04-22 15:02:23 -0400780 lea r10,[((4096-64))+r9*2]
Robert Sloana94fe052017-02-21 08:49:28 -0800781 lea rbp,[((-64))+r9*2+rbp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800782 sub r11,r10
783 mov r10,0
784 cmovc r11,r10
Robert Sloana94fe052017-02-21 08:49:28 -0800785 sub rbp,r11
Adam Langleye9ada862015-05-11 17:20:37 -0700786$L$sqr8x_sp_done:
Robert Sloana94fe052017-02-21 08:49:28 -0800787 and rbp,-64
788 mov r11,rsp
789 sub r11,rbp
790 and r11,-4096
791 lea rsp,[rbp*1+r11]
792 mov r10,QWORD[rsp]
793 cmp rsp,rbp
794 ja NEAR $L$sqr8x_page_walk
795 jmp NEAR $L$sqr8x_page_walk_done
796
797ALIGN 16
798$L$sqr8x_page_walk:
799 lea rsp,[((-4096))+rsp]
800 mov r10,QWORD[rsp]
801 cmp rsp,rbp
802 ja NEAR $L$sqr8x_page_walk
803$L$sqr8x_page_walk_done:
804
Adam Langleyd9e397b2015-01-22 14:27:53 -0800805 mov r10,r9
806 neg r9
807
Adam Langleye9ada862015-05-11 17:20:37 -0700808 mov QWORD[32+rsp],r8
809 mov QWORD[40+rsp],rax
Robert Sloana94fe052017-02-21 08:49:28 -0800810
Adam Langleye9ada862015-05-11 17:20:37 -0700811$L$sqr8x_body:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800812
David Benjamin4969cc92016-04-22 15:02:23 -0400813DB 102,72,15,110,209
Adam Langleyd9e397b2015-01-22 14:27:53 -0800814 pxor xmm0,xmm0
815DB 102,72,15,110,207
816DB 102,73,15,110,218
817 call bn_sqr8x_internal
818
David Benjamin4969cc92016-04-22 15:02:23 -0400819
820
821
822 lea rbx,[r9*1+rdi]
823 mov rcx,r9
824 mov rdx,r9
825DB 102,72,15,126,207
826 sar rcx,3+2
827 jmp NEAR $L$sqr8x_sub
Adam Langleyd9e397b2015-01-22 14:27:53 -0800828
829ALIGN 32
David Benjamin4969cc92016-04-22 15:02:23 -0400830$L$sqr8x_sub:
831 mov r12,QWORD[rbx]
832 mov r13,QWORD[8+rbx]
833 mov r14,QWORD[16+rbx]
834 mov r15,QWORD[24+rbx]
835 lea rbx,[32+rbx]
836 sbb r12,QWORD[rbp]
837 sbb r13,QWORD[8+rbp]
838 sbb r14,QWORD[16+rbp]
839 sbb r15,QWORD[24+rbp]
840 lea rbp,[32+rbp]
841 mov QWORD[rdi],r12
842 mov QWORD[8+rdi],r13
843 mov QWORD[16+rdi],r14
844 mov QWORD[24+rdi],r15
845 lea rdi,[32+rdi]
846 inc rcx
847 jnz NEAR $L$sqr8x_sub
848
849 sbb rax,0
850 lea rbx,[r9*1+rbx]
851 lea rdi,[r9*1+rdi]
852
853DB 102,72,15,110,200
854 pxor xmm0,xmm0
855 pshufd xmm1,xmm1,0
856 mov rsi,QWORD[40+rsp]
Robert Sloana94fe052017-02-21 08:49:28 -0800857
David Benjamin4969cc92016-04-22 15:02:23 -0400858 jmp NEAR $L$sqr8x_cond_copy
859
860ALIGN 32
861$L$sqr8x_cond_copy:
862 movdqa xmm2,XMMWORD[rbx]
863 movdqa xmm3,XMMWORD[16+rbx]
864 lea rbx,[32+rbx]
865 movdqu xmm4,XMMWORD[rdi]
866 movdqu xmm5,XMMWORD[16+rdi]
867 lea rdi,[32+rdi]
868 movdqa XMMWORD[(-32)+rbx],xmm0
869 movdqa XMMWORD[(-16)+rbx],xmm0
870 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0
871 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0
872 pcmpeqd xmm0,xmm1
873 pand xmm2,xmm1
874 pand xmm3,xmm1
875 pand xmm4,xmm0
876 pand xmm5,xmm0
877 pxor xmm0,xmm0
878 por xmm4,xmm2
879 por xmm5,xmm3
880 movdqu XMMWORD[(-32)+rdi],xmm4
881 movdqu XMMWORD[(-16)+rdi],xmm5
882 add r9,32
883 jnz NEAR $L$sqr8x_cond_copy
Adam Langleyd9e397b2015-01-22 14:27:53 -0800884
885 mov rax,1
Adam Langleye9ada862015-05-11 17:20:37 -0700886 mov r15,QWORD[((-48))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800887
Adam Langleye9ada862015-05-11 17:20:37 -0700888 mov r14,QWORD[((-40))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800889
Adam Langleye9ada862015-05-11 17:20:37 -0700890 mov r13,QWORD[((-32))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800891
Adam Langleye9ada862015-05-11 17:20:37 -0700892 mov r12,QWORD[((-24))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800893
Adam Langleye9ada862015-05-11 17:20:37 -0700894 mov rbp,QWORD[((-16))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800895
Adam Langleye9ada862015-05-11 17:20:37 -0700896 mov rbx,QWORD[((-8))+rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800897
Adam Langleye9ada862015-05-11 17:20:37 -0700898 lea rsp,[rsi]
Robert Sloana94fe052017-02-21 08:49:28 -0800899
Adam Langleye9ada862015-05-11 17:20:37 -0700900$L$sqr8x_epilogue:
901 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
902 mov rsi,QWORD[16+rsp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800903 DB 0F3h,0C3h ;repret
Robert Sloana94fe052017-02-21 08:49:28 -0800904
Adam Langleye9ada862015-05-11 17:20:37 -0700905$L$SEH_end_bn_sqr8x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800906DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
907DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
908DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
909DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
910DB 115,108,46,111,114,103,62,0
911ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700912EXTERN __imp_RtlVirtualUnwind
Adam Langleyd9e397b2015-01-22 14:27:53 -0800913
914ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700915mul_handler:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800916 push rsi
917 push rdi
918 push rbx
919 push rbp
920 push r12
921 push r13
922 push r14
923 push r15
924 pushfq
925 sub rsp,64
926
Adam Langleye9ada862015-05-11 17:20:37 -0700927 mov rax,QWORD[120+r8]
928 mov rbx,QWORD[248+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800929
Adam Langleye9ada862015-05-11 17:20:37 -0700930 mov rsi,QWORD[8+r9]
931 mov r11,QWORD[56+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800932
Adam Langleye9ada862015-05-11 17:20:37 -0700933 mov r10d,DWORD[r11]
934 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800935 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700936 jb NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800937
Adam Langleye9ada862015-05-11 17:20:37 -0700938 mov rax,QWORD[152+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800939
Adam Langleye9ada862015-05-11 17:20:37 -0700940 mov r10d,DWORD[4+r11]
941 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800942 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700943 jae NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800944
Adam Langleye9ada862015-05-11 17:20:37 -0700945 mov r10,QWORD[192+r8]
946 mov rax,QWORD[8+r10*8+rax]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800947
Robert Sloana94fe052017-02-21 08:49:28 -0800948 jmp NEAR $L$common_pop_regs
Adam Langleye9ada862015-05-11 17:20:37 -0700949
Adam Langleyd9e397b2015-01-22 14:27:53 -0800950
951
952ALIGN 16
Adam Langleye9ada862015-05-11 17:20:37 -0700953sqr_handler:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800954 push rsi
955 push rdi
956 push rbx
957 push rbp
958 push r12
959 push r13
960 push r14
961 push r15
962 pushfq
963 sub rsp,64
964
Adam Langleye9ada862015-05-11 17:20:37 -0700965 mov rax,QWORD[120+r8]
966 mov rbx,QWORD[248+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800967
Adam Langleye9ada862015-05-11 17:20:37 -0700968 mov rsi,QWORD[8+r9]
969 mov r11,QWORD[56+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800970
Adam Langleye9ada862015-05-11 17:20:37 -0700971 mov r10d,DWORD[r11]
972 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800973 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700974 jb NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800975
Robert Sloana94fe052017-02-21 08:49:28 -0800976 mov r10d,DWORD[4+r11]
977 lea r10,[r10*1+rsi]
978 cmp rbx,r10
979 jb NEAR $L$common_pop_regs
980
Adam Langleye9ada862015-05-11 17:20:37 -0700981 mov rax,QWORD[152+r8]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800982
Robert Sloana94fe052017-02-21 08:49:28 -0800983 mov r10d,DWORD[8+r11]
Adam Langleye9ada862015-05-11 17:20:37 -0700984 lea r10,[r10*1+rsi]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800985 cmp rbx,r10
Adam Langleye9ada862015-05-11 17:20:37 -0700986 jae NEAR $L$common_seh_tail
Adam Langleyd9e397b2015-01-22 14:27:53 -0800987
Adam Langleye9ada862015-05-11 17:20:37 -0700988 mov rax,QWORD[40+rax]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800989
Robert Sloana94fe052017-02-21 08:49:28 -0800990$L$common_pop_regs:
Adam Langleye9ada862015-05-11 17:20:37 -0700991 mov rbx,QWORD[((-8))+rax]
992 mov rbp,QWORD[((-16))+rax]
993 mov r12,QWORD[((-24))+rax]
994 mov r13,QWORD[((-32))+rax]
995 mov r14,QWORD[((-40))+rax]
996 mov r15,QWORD[((-48))+rax]
997 mov QWORD[144+r8],rbx
998 mov QWORD[160+r8],rbp
999 mov QWORD[216+r8],r12
1000 mov QWORD[224+r8],r13
1001 mov QWORD[232+r8],r14
1002 mov QWORD[240+r8],r15
Adam Langleyd9e397b2015-01-22 14:27:53 -08001003
Adam Langleye9ada862015-05-11 17:20:37 -07001004$L$common_seh_tail:
1005 mov rdi,QWORD[8+rax]
1006 mov rsi,QWORD[16+rax]
1007 mov QWORD[152+r8],rax
1008 mov QWORD[168+r8],rsi
1009 mov QWORD[176+r8],rdi
Adam Langleyd9e397b2015-01-22 14:27:53 -08001010
Adam Langleye9ada862015-05-11 17:20:37 -07001011 mov rdi,QWORD[40+r9]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001012 mov rsi,r8
1013 mov ecx,154
Adam Langleye9ada862015-05-11 17:20:37 -07001014 DD 0xa548f3fc
Adam Langleyd9e397b2015-01-22 14:27:53 -08001015
1016 mov rsi,r9
1017 xor rcx,rcx
Adam Langleye9ada862015-05-11 17:20:37 -07001018 mov rdx,QWORD[8+rsi]
1019 mov r8,QWORD[rsi]
1020 mov r9,QWORD[16+rsi]
1021 mov r10,QWORD[40+rsi]
1022 lea r11,[56+rsi]
1023 lea r12,[24+rsi]
1024 mov QWORD[32+rsp],r10
1025 mov QWORD[40+rsp],r11
1026 mov QWORD[48+rsp],r12
1027 mov QWORD[56+rsp],rcx
1028 call QWORD[__imp_RtlVirtualUnwind]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001029
1030 mov eax,1
1031 add rsp,64
1032 popfq
1033 pop r15
1034 pop r14
1035 pop r13
1036 pop r12
1037 pop rbp
1038 pop rbx
1039 pop rdi
1040 pop rsi
1041 DB 0F3h,0C3h ;repret
Adam Langleyd9e397b2015-01-22 14:27:53 -08001042
Adam Langleye9ada862015-05-11 17:20:37 -07001043
1044section .pdata rdata align=4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001045ALIGN 4
Adam Langleye9ada862015-05-11 17:20:37 -07001046 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase
1047 DD $L$SEH_end_bn_mul_mont wrt ..imagebase
1048 DD $L$SEH_info_bn_mul_mont wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08001049
Adam Langleye9ada862015-05-11 17:20:37 -07001050 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase
1051 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase
1052 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase
Adam Langleyd9e397b2015-01-22 14:27:53 -08001053
Adam Langleye9ada862015-05-11 17:20:37 -07001054 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
1055 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase
1056 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase
1057section .xdata rdata align=8
Adam Langleyd9e397b2015-01-22 14:27:53 -08001058ALIGN 8
Adam Langleye9ada862015-05-11 17:20:37 -07001059$L$SEH_info_bn_mul_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001060DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07001061 DD mul_handler wrt ..imagebase
1062 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1063$L$SEH_info_bn_mul4x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001064DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07001065 DD mul_handler wrt ..imagebase
1066 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
1067$L$SEH_info_bn_sqr8x_mont:
Adam Langleyd9e397b2015-01-22 14:27:53 -08001068DB 9,0,0,0
Adam Langleye9ada862015-05-11 17:20:37 -07001069 DD sqr_handler wrt ..imagebase
Robert Sloana94fe052017-02-21 08:49:28 -08001070 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
1071ALIGN 8