blob: 5e0600ac69d650690e76b16f6e84cd3871bfbdb4 [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_defs.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
Elliott Hughesed398002017-06-21 14:41:24 -070010 Copyright (C) 2004-2017 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38#include "libvex_trc_values.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "main_util.h"
41#include "host_generic_regs.h"
42#include "host_amd64_defs.h"
sewardjc33671d2005-02-01 20:30:00 +000043
44
45/* --------- Registers. --------- */
46
sewardja5b50222015-03-26 07:18:32 +000047const RRegUniverse* getRRegUniverse_AMD64 ( void )
48{
49 /* The real-register universe is a big constant, so we just want to
50 initialise it once. */
51 static RRegUniverse rRegUniverse_AMD64;
52 static Bool rRegUniverse_AMD64_initted = False;
53
54 /* Handy shorthand, nothing more */
55 RRegUniverse* ru = &rRegUniverse_AMD64;
56
57 /* This isn't thread-safe. Sigh. */
58 if (LIKELY(rRegUniverse_AMD64_initted))
59 return ru;
60
61 RRegUniverse__init(ru);
62
63 /* Add the registers. The initial segment of this array must be
64 those available for allocation by reg-alloc, and those that
65 follow are not available for allocation. */
66 ru->regs[ru->size++] = hregAMD64_RSI();
67 ru->regs[ru->size++] = hregAMD64_RDI();
68 ru->regs[ru->size++] = hregAMD64_R8();
69 ru->regs[ru->size++] = hregAMD64_R9();
70 ru->regs[ru->size++] = hregAMD64_R12();
71 ru->regs[ru->size++] = hregAMD64_R13();
72 ru->regs[ru->size++] = hregAMD64_R14();
73 ru->regs[ru->size++] = hregAMD64_R15();
74 ru->regs[ru->size++] = hregAMD64_RBX();
75 ru->regs[ru->size++] = hregAMD64_XMM3();
76 ru->regs[ru->size++] = hregAMD64_XMM4();
77 ru->regs[ru->size++] = hregAMD64_XMM5();
78 ru->regs[ru->size++] = hregAMD64_XMM6();
79 ru->regs[ru->size++] = hregAMD64_XMM7();
80 ru->regs[ru->size++] = hregAMD64_XMM8();
81 ru->regs[ru->size++] = hregAMD64_XMM9();
82 ru->regs[ru->size++] = hregAMD64_XMM10();
83 ru->regs[ru->size++] = hregAMD64_XMM11();
84 ru->regs[ru->size++] = hregAMD64_XMM12();
85 ru->regs[ru->size++] = hregAMD64_R10();
86 ru->allocable = ru->size;
87 /* And other regs, not available to the allocator. */
88 ru->regs[ru->size++] = hregAMD64_RAX();
89 ru->regs[ru->size++] = hregAMD64_RCX();
90 ru->regs[ru->size++] = hregAMD64_RDX();
91 ru->regs[ru->size++] = hregAMD64_RSP();
92 ru->regs[ru->size++] = hregAMD64_RBP();
93 ru->regs[ru->size++] = hregAMD64_R11();
94 ru->regs[ru->size++] = hregAMD64_XMM0();
95 ru->regs[ru->size++] = hregAMD64_XMM1();
96
97 rRegUniverse_AMD64_initted = True;
98
99 RRegUniverse__check_is_sane(ru);
100 return ru;
101}
102
103
sewardjc33671d2005-02-01 20:30:00 +0000104void ppHRegAMD64 ( HReg reg )
sewardj614b3fb2005-02-02 02:16:03 +0000105{
106 Int r;
florian55085f82012-11-21 00:36:55 +0000107 static const HChar* ireg64_names[16]
sewardj614b3fb2005-02-02 02:16:03 +0000108 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
109 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
110 /* Be generic for all virtual regs. */
111 if (hregIsVirtual(reg)) {
112 ppHReg(reg);
113 return;
114 }
115 /* But specific for real regs. */
116 switch (hregClass(reg)) {
117 case HRcInt64:
sewardja5b50222015-03-26 07:18:32 +0000118 r = hregEncoding(reg);
sewardj614b3fb2005-02-02 02:16:03 +0000119 vassert(r >= 0 && r < 16);
120 vex_printf("%s", ireg64_names[r]);
121 return;
sewardj614b3fb2005-02-02 02:16:03 +0000122 case HRcVec128:
sewardja5b50222015-03-26 07:18:32 +0000123 r = hregEncoding(reg);
sewardj614b3fb2005-02-02 02:16:03 +0000124 vassert(r >= 0 && r < 16);
125 vex_printf("%%xmm%d", r);
126 return;
127 default:
128 vpanic("ppHRegAMD64");
129 }
sewardjc33671d2005-02-01 20:30:00 +0000130}
131
sewardj549e0642005-02-05 12:00:14 +0000132static void ppHRegAMD64_lo32 ( HReg reg )
133{
134 Int r;
florian55085f82012-11-21 00:36:55 +0000135 static const HChar* ireg32_names[16]
sewardja5b50222015-03-26 07:18:32 +0000136 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
137 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
sewardj549e0642005-02-05 12:00:14 +0000138 /* Be generic for all virtual regs. */
139 if (hregIsVirtual(reg)) {
140 ppHReg(reg);
141 vex_printf("d");
142 return;
143 }
144 /* But specific for real regs. */
145 switch (hregClass(reg)) {
146 case HRcInt64:
sewardja5b50222015-03-26 07:18:32 +0000147 r = hregEncoding(reg);
sewardj549e0642005-02-05 12:00:14 +0000148 vassert(r >= 0 && r < 16);
149 vex_printf("%s", ireg32_names[r]);
150 return;
151 default:
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
153 }
154}
155
sewardjc33671d2005-02-01 20:30:00 +0000156
sewardjf67eadf2005-02-03 03:53:52 +0000157/* --------- Condition codes, Intel encoding. --------- */
158
florian55085f82012-11-21 00:36:55 +0000159const HChar* showAMD64CondCode ( AMD64CondCode cond )
sewardjf67eadf2005-02-03 03:53:52 +0000160{
161 switch (cond) {
162 case Acc_O: return "o";
163 case Acc_NO: return "no";
164 case Acc_B: return "b";
165 case Acc_NB: return "nb";
166 case Acc_Z: return "z";
167 case Acc_NZ: return "nz";
168 case Acc_BE: return "be";
169 case Acc_NBE: return "nbe";
170 case Acc_S: return "s";
171 case Acc_NS: return "ns";
172 case Acc_P: return "p";
173 case Acc_NP: return "np";
174 case Acc_L: return "l";
175 case Acc_NL: return "nl";
176 case Acc_LE: return "le";
177 case Acc_NLE: return "nle";
178 case Acc_ALWAYS: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
180 }
181}
sewardj614b3fb2005-02-02 02:16:03 +0000182
183
184/* --------- AMD64AMode: memory address expressions. --------- */
185
186AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
floriand8e3eca2015-03-13 12:46:49 +0000187 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
sewardj614b3fb2005-02-02 02:16:03 +0000188 am->tag = Aam_IR;
189 am->Aam.IR.imm = imm32;
190 am->Aam.IR.reg = reg;
191 return am;
192}
193AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
floriand8e3eca2015-03-13 12:46:49 +0000194 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
sewardj614b3fb2005-02-02 02:16:03 +0000195 am->tag = Aam_IRRS;
196 am->Aam.IRRS.imm = imm32;
197 am->Aam.IRRS.base = base;
198 am->Aam.IRRS.index = indEx;
199 am->Aam.IRRS.shift = shift;
200 vassert(shift >= 0 && shift <= 3);
201 return am;
202}
203
sewardj614b3fb2005-02-02 02:16:03 +0000204void ppAMD64AMode ( AMD64AMode* am ) {
205 switch (am->tag) {
206 case Aam_IR:
207 if (am->Aam.IR.imm == 0)
208 vex_printf("(");
209 else
210 vex_printf("0x%x(", am->Aam.IR.imm);
211 ppHRegAMD64(am->Aam.IR.reg);
212 vex_printf(")");
213 return;
214 case Aam_IRRS:
215 vex_printf("0x%x(", am->Aam.IRRS.imm);
216 ppHRegAMD64(am->Aam.IRRS.base);
217 vex_printf(",");
218 ppHRegAMD64(am->Aam.IRRS.index);
219 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
220 return;
221 default:
222 vpanic("ppAMD64AMode");
223 }
224}
225
sewardjf67eadf2005-02-03 03:53:52 +0000226static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
227 switch (am->tag) {
228 case Aam_IR:
229 addHRegUse(u, HRmRead, am->Aam.IR.reg);
230 return;
231 case Aam_IRRS:
232 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
233 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
234 return;
235 default:
236 vpanic("addRegUsage_AMD64AMode");
237 }
238}
239
240static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
241 switch (am->tag) {
242 case Aam_IR:
243 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
244 return;
245 case Aam_IRRS:
246 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
247 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
248 return;
249 default:
250 vpanic("mapRegs_AMD64AMode");
251 }
252}
sewardj614b3fb2005-02-02 02:16:03 +0000253
254/* --------- Operand, which can be reg, immediate or memory. --------- */
255
256AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
floriand8e3eca2015-03-13 12:46:49 +0000257 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
sewardj614b3fb2005-02-02 02:16:03 +0000258 op->tag = Armi_Imm;
259 op->Armi.Imm.imm32 = imm32;
260 return op;
261}
sewardj8258a8c2005-02-02 03:11:24 +0000262AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
floriand8e3eca2015-03-13 12:46:49 +0000263 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
sewardj8258a8c2005-02-02 03:11:24 +0000264 op->tag = Armi_Reg;
265 op->Armi.Reg.reg = reg;
266 return op;
267}
sewardj614b3fb2005-02-02 02:16:03 +0000268AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
floriand8e3eca2015-03-13 12:46:49 +0000269 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
sewardj614b3fb2005-02-02 02:16:03 +0000270 op->tag = Armi_Mem;
271 op->Armi.Mem.am = am;
272 return op;
273}
274
sewardj9cc2bbf2011-06-05 17:56:03 +0000275static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
sewardj614b3fb2005-02-02 02:16:03 +0000276 switch (op->tag) {
277 case Armi_Imm:
278 vex_printf("$0x%x", op->Armi.Imm.imm32);
279 return;
sewardj9cc2bbf2011-06-05 17:56:03 +0000280 case Armi_Reg:
281 if (lo32)
282 ppHRegAMD64_lo32(op->Armi.Reg.reg);
283 else
284 ppHRegAMD64(op->Armi.Reg.reg);
sewardj614b3fb2005-02-02 02:16:03 +0000285 return;
286 case Armi_Mem:
287 ppAMD64AMode(op->Armi.Mem.am);
288 return;
289 default:
290 vpanic("ppAMD64RMI");
291 }
292}
sewardj9cc2bbf2011-06-05 17:56:03 +0000293void ppAMD64RMI ( AMD64RMI* op ) {
294 ppAMD64RMI_wrk(op, False/*!lo32*/);
295}
296void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
297 ppAMD64RMI_wrk(op, True/*lo32*/);
298}
sewardj614b3fb2005-02-02 02:16:03 +0000299
sewardjf67eadf2005-02-03 03:53:52 +0000300/* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
302 accordingly. */
303static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
304 switch (op->tag) {
305 case Armi_Imm:
306 return;
307 case Armi_Reg:
308 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
309 return;
310 case Armi_Mem:
311 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
312 return;
313 default:
314 vpanic("addRegUsage_AMD64RMI");
315 }
316}
317
318static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
319 switch (op->tag) {
320 case Armi_Imm:
321 return;
322 case Armi_Reg:
323 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
324 return;
325 case Armi_Mem:
326 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
327 return;
328 default:
329 vpanic("mapRegs_AMD64RMI");
330 }
331}
332
333
334/* --------- Operand, which can be reg or immediate only. --------- */
335
336AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
floriand8e3eca2015-03-13 12:46:49 +0000337 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
sewardjf67eadf2005-02-03 03:53:52 +0000338 op->tag = Ari_Imm;
339 op->Ari.Imm.imm32 = imm32;
340 return op;
341}
342AMD64RI* AMD64RI_Reg ( HReg reg ) {
floriand8e3eca2015-03-13 12:46:49 +0000343 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
sewardjf67eadf2005-02-03 03:53:52 +0000344 op->tag = Ari_Reg;
345 op->Ari.Reg.reg = reg;
346 return op;
347}
348
349void ppAMD64RI ( AMD64RI* op ) {
350 switch (op->tag) {
351 case Ari_Imm:
352 vex_printf("$0x%x", op->Ari.Imm.imm32);
353 return;
354 case Ari_Reg:
355 ppHRegAMD64(op->Ari.Reg.reg);
356 return;
357 default:
358 vpanic("ppAMD64RI");
359 }
360}
361
362/* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
364 accordingly. */
365static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
366 switch (op->tag) {
367 case Ari_Imm:
368 return;
369 case Ari_Reg:
370 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
371 return;
372 default:
373 vpanic("addRegUsage_AMD64RI");
374 }
375}
376
377static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
378 switch (op->tag) {
379 case Ari_Imm:
380 return;
381 case Ari_Reg:
382 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
383 return;
384 default:
385 vpanic("mapRegs_AMD64RI");
386 }
387}
sewardj8258a8c2005-02-02 03:11:24 +0000388
389
390/* --------- Operand, which can be reg or memory only. --------- */
391
392AMD64RM* AMD64RM_Reg ( HReg reg ) {
floriand8e3eca2015-03-13 12:46:49 +0000393 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
sewardj8258a8c2005-02-02 03:11:24 +0000394 op->tag = Arm_Reg;
395 op->Arm.Reg.reg = reg;
396 return op;
397}
sewardj05b3b6a2005-02-04 01:44:33 +0000398AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
floriand8e3eca2015-03-13 12:46:49 +0000399 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
sewardj05b3b6a2005-02-04 01:44:33 +0000400 op->tag = Arm_Mem;
401 op->Arm.Mem.am = am;
402 return op;
403}
sewardj8258a8c2005-02-02 03:11:24 +0000404
405void ppAMD64RM ( AMD64RM* op ) {
406 switch (op->tag) {
407 case Arm_Mem:
408 ppAMD64AMode(op->Arm.Mem.am);
409 return;
410 case Arm_Reg:
411 ppHRegAMD64(op->Arm.Reg.reg);
412 return;
413 default:
414 vpanic("ppAMD64RM");
415 }
416}
417
sewardjf67eadf2005-02-03 03:53:52 +0000418/* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
421static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
422 switch (op->tag) {
423 case Arm_Mem:
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
427 return;
428 case Arm_Reg:
429 /* reg is read, written or modified. Add it in the
430 appropriate way. */
431 addHRegUse(u, mode, op->Arm.Reg.reg);
432 return;
433 default:
434 vpanic("addRegUsage_AMD64RM");
435 }
436}
437
438static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
439{
440 switch (op->tag) {
441 case Arm_Mem:
442 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
443 return;
444 case Arm_Reg:
445 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
446 return;
447 default:
448 vpanic("mapRegs_AMD64RM");
449 }
450}
451
452
sewardj9b967672005-02-08 11:13:09 +0000453/* --------- Instructions. --------- */
454
florian55085f82012-11-21 00:36:55 +0000455static const HChar* showAMD64ScalarSz ( Int sz ) {
sewardj9b967672005-02-08 11:13:09 +0000456 switch (sz) {
457 case 2: return "w";
458 case 4: return "l";
459 case 8: return "q";
460 default: vpanic("showAMD64ScalarSz");
461 }
462}
463
florian55085f82012-11-21 00:36:55 +0000464const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
sewardjd0a12df2005-02-10 02:07:43 +0000465 switch (op) {
466 case Aun_NOT: return "not";
467 case Aun_NEG: return "neg";
468 default: vpanic("showAMD64UnaryOp");
469 }
470}
sewardj614b3fb2005-02-02 02:16:03 +0000471
florian55085f82012-11-21 00:36:55 +0000472const HChar* showAMD64AluOp ( AMD64AluOp op ) {
sewardj614b3fb2005-02-02 02:16:03 +0000473 switch (op) {
474 case Aalu_MOV: return "mov";
475 case Aalu_CMP: return "cmp";
476 case Aalu_ADD: return "add";
477 case Aalu_SUB: return "sub";
478 case Aalu_ADC: return "adc";
479 case Aalu_SBB: return "sbb";
480 case Aalu_AND: return "and";
481 case Aalu_OR: return "or";
482 case Aalu_XOR: return "xor";
sewardj7de0d3c2005-02-13 02:26:41 +0000483 case Aalu_MUL: return "imul";
sewardj614b3fb2005-02-02 02:16:03 +0000484 default: vpanic("showAMD64AluOp");
485 }
486}
487
florian55085f82012-11-21 00:36:55 +0000488const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
sewardj8258a8c2005-02-02 03:11:24 +0000489 switch (op) {
490 case Ash_SHL: return "shl";
491 case Ash_SHR: return "shr";
492 case Ash_SAR: return "sar";
493 default: vpanic("showAMD64ShiftOp");
494 }
495}
496
florian55085f82012-11-21 00:36:55 +0000497const HChar* showA87FpOp ( A87FpOp op ) {
sewardj25a85812005-05-08 23:03:48 +0000498 switch (op) {
sewardj25a85812005-05-08 23:03:48 +0000499 case Afp_SCALE: return "scale";
500 case Afp_ATAN: return "atan";
501 case Afp_YL2X: return "yl2x";
sewardj5e205372005-05-09 02:57:08 +0000502 case Afp_YL2XP1: return "yl2xp1";
sewardjf4c803b2006-09-11 11:07:34 +0000503 case Afp_PREM: return "prem";
sewardj4970e4e2008-10-11 10:07:55 +0000504 case Afp_PREM1: return "prem1";
sewardj25a85812005-05-08 23:03:48 +0000505 case Afp_SQRT: return "sqrt";
sewardj25a85812005-05-08 23:03:48 +0000506 case Afp_SIN: return "sin";
507 case Afp_COS: return "cos";
sewardj5e205372005-05-09 02:57:08 +0000508 case Afp_TAN: return "tan";
sewardj25a85812005-05-08 23:03:48 +0000509 case Afp_ROUND: return "round";
510 case Afp_2XM1: return "2xm1";
511 default: vpanic("showA87FpOp");
512 }
513}
sewardj1001dc42005-02-21 08:25:55 +0000514
florian55085f82012-11-21 00:36:55 +0000515const HChar* showAMD64SseOp ( AMD64SseOp op ) {
sewardj1001dc42005-02-21 08:25:55 +0000516 switch (op) {
sewardj18303862005-02-21 12:36:54 +0000517 case Asse_MOV: return "movups";
sewardj1001dc42005-02-21 08:25:55 +0000518 case Asse_ADDF: return "add";
519 case Asse_SUBF: return "sub";
520 case Asse_MULF: return "mul";
521 case Asse_DIVF: return "div";
sewardj1a01e652005-02-23 11:39:21 +0000522 case Asse_MAXF: return "max";
523 case Asse_MINF: return "min";
sewardj8d965312005-02-25 02:48:47 +0000524 case Asse_CMPEQF: return "cmpFeq";
525 case Asse_CMPLTF: return "cmpFlt";
526 case Asse_CMPLEF: return "cmpFle";
527 case Asse_CMPUNF: return "cmpFun";
sewardja7ba8c42005-05-10 20:08:34 +0000528 case Asse_RCPF: return "rcp";
529 case Asse_RSQRTF: return "rsqrt";
sewardj18303862005-02-21 12:36:54 +0000530 case Asse_SQRTF: return "sqrt";
sewardj1001dc42005-02-21 08:25:55 +0000531 case Asse_AND: return "and";
532 case Asse_OR: return "or";
533 case Asse_XOR: return "xor";
534 case Asse_ANDN: return "andn";
sewardj97628592005-05-10 22:42:54 +0000535 case Asse_ADD8: return "paddb";
536 case Asse_ADD16: return "paddw";
537 case Asse_ADD32: return "paddd";
sewardj09717342005-05-05 21:34:02 +0000538 case Asse_ADD64: return "paddq";
sewardj5992bd02005-05-11 02:13:42 +0000539 case Asse_QADD8U: return "paddusb";
540 case Asse_QADD16U: return "paddusw";
541 case Asse_QADD8S: return "paddsb";
542 case Asse_QADD16S: return "paddsw";
sewardj97628592005-05-10 22:42:54 +0000543 case Asse_SUB8: return "psubb";
544 case Asse_SUB16: return "psubw";
545 case Asse_SUB32: return "psubd";
sewardj09717342005-05-05 21:34:02 +0000546 case Asse_SUB64: return "psubq";
sewardj97628592005-05-10 22:42:54 +0000547 case Asse_QSUB8U: return "psubusb";
548 case Asse_QSUB16U: return "psubusw";
549 case Asse_QSUB8S: return "psubsb";
550 case Asse_QSUB16S: return "psubsw";
sewardjadffcef2005-05-11 00:03:06 +0000551 case Asse_MUL16: return "pmullw";
552 case Asse_MULHI16U: return "pmulhuw";
553 case Asse_MULHI16S: return "pmulhw";
sewardj5992bd02005-05-11 02:13:42 +0000554 case Asse_AVG8U: return "pavgb";
555 case Asse_AVG16U: return "pavgw";
sewardjadffcef2005-05-11 00:03:06 +0000556 case Asse_MAX16S: return "pmaxw";
557 case Asse_MAX8U: return "pmaxub";
558 case Asse_MIN16S: return "pminw";
559 case Asse_MIN8U: return "pminub";
sewardj5992bd02005-05-11 02:13:42 +0000560 case Asse_CMPEQ8: return "pcmpeqb";
561 case Asse_CMPEQ16: return "pcmpeqw";
sewardj09717342005-05-05 21:34:02 +0000562 case Asse_CMPEQ32: return "pcmpeqd";
sewardj5992bd02005-05-11 02:13:42 +0000563 case Asse_CMPGT8S: return "pcmpgtb";
564 case Asse_CMPGT16S: return "pcmpgtw";
565 case Asse_CMPGT32S: return "pcmpgtd";
sewardjadffcef2005-05-11 00:03:06 +0000566 case Asse_SHL16: return "psllw";
567 case Asse_SHL32: return "pslld";
568 case Asse_SHL64: return "psllq";
569 case Asse_SHR16: return "psrlw";
570 case Asse_SHR32: return "psrld";
sewardj09717342005-05-05 21:34:02 +0000571 case Asse_SHR64: return "psrlq";
sewardjadffcef2005-05-11 00:03:06 +0000572 case Asse_SAR16: return "psraw";
573 case Asse_SAR32: return "psrad";
sewardj97628592005-05-10 22:42:54 +0000574 case Asse_PACKSSD: return "packssdw";
575 case Asse_PACKSSW: return "packsswb";
576 case Asse_PACKUSW: return "packuswb";
577 case Asse_UNPCKHB: return "punpckhb";
578 case Asse_UNPCKHW: return "punpckhw";
579 case Asse_UNPCKHD: return "punpckhd";
580 case Asse_UNPCKHQ: return "punpckhq";
581 case Asse_UNPCKLB: return "punpcklb";
582 case Asse_UNPCKLW: return "punpcklw";
583 case Asse_UNPCKLD: return "punpckld";
584 case Asse_UNPCKLQ: return "punpcklq";
sewardj1001dc42005-02-21 08:25:55 +0000585 default: vpanic("showAMD64SseOp");
586 }
587}
sewardj614b3fb2005-02-02 02:16:03 +0000588
sewardj813ce9e2005-02-04 21:16:48 +0000589AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000590 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj813ce9e2005-02-04 21:16:48 +0000591 i->tag = Ain_Imm64;
592 i->Ain.Imm64.imm64 = imm64;
593 i->Ain.Imm64.dst = dst;
594 return i;
595}
sewardj614b3fb2005-02-02 02:16:03 +0000596AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000597 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj614b3fb2005-02-02 02:16:03 +0000598 i->tag = Ain_Alu64R;
599 i->Ain.Alu64R.op = op;
600 i->Ain.Alu64R.src = src;
601 i->Ain.Alu64R.dst = dst;
602 return i;
603}
sewardjf67eadf2005-02-03 03:53:52 +0000604AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000605 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjf67eadf2005-02-03 03:53:52 +0000606 i->tag = Ain_Alu64M;
607 i->Ain.Alu64M.op = op;
608 i->Ain.Alu64M.src = src;
609 i->Ain.Alu64M.dst = dst;
610 vassert(op != Aalu_MUL);
611 return i;
612}
sewardj501a3392005-05-11 15:37:50 +0000613AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000614 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj8258a8c2005-02-02 03:11:24 +0000615 i->tag = Ain_Sh64;
616 i->Ain.Sh64.op = op;
617 i->Ain.Sh64.src = src;
618 i->Ain.Sh64.dst = dst;
619 return i;
620}
sewardj501a3392005-05-11 15:37:50 +0000621AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000622 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj501a3392005-05-11 15:37:50 +0000623 i->tag = Ain_Test64;
624 i->Ain.Test64.imm32 = imm32;
625 i->Ain.Test64.dst = dst;
sewardj05b3b6a2005-02-04 01:44:33 +0000626 return i;
627}
sewardj501a3392005-05-11 15:37:50 +0000628AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000629 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjd0a12df2005-02-10 02:07:43 +0000630 i->tag = Ain_Unary64;
631 i->Ain.Unary64.op = op;
632 i->Ain.Unary64.dst = dst;
633 return i;
634}
sewardj6ce1a232007-03-31 19:12:38 +0000635AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000636 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj6ce1a232007-03-31 19:12:38 +0000637 i->tag = Ain_Lea64;
638 i->Ain.Lea64.am = am;
639 i->Ain.Lea64.dst = dst;
640 return i;
641}
sewardj9cc2bbf2011-06-05 17:56:03 +0000642AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000643 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj9cc2bbf2011-06-05 17:56:03 +0000644 i->tag = Ain_Alu32R;
645 i->Ain.Alu32R.op = op;
646 i->Ain.Alu32R.src = src;
647 i->Ain.Alu32R.dst = dst;
648 switch (op) {
649 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
650 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
651 default: vassert(0);
652 }
653 return i;
654}
sewardj501a3392005-05-11 15:37:50 +0000655AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
floriand8e3eca2015-03-13 12:46:49 +0000656 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj9b967672005-02-08 11:13:09 +0000657 i->tag = Ain_MulL;
658 i->Ain.MulL.syned = syned;
sewardj9b967672005-02-08 11:13:09 +0000659 i->Ain.MulL.src = src;
sewardj9b967672005-02-08 11:13:09 +0000660 return i;
661}
sewardj7de0d3c2005-02-13 02:26:41 +0000662AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
floriand8e3eca2015-03-13 12:46:49 +0000663 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj7de0d3c2005-02-13 02:26:41 +0000664 i->tag = Ain_Div;
665 i->Ain.Div.syned = syned;
666 i->Ain.Div.sz = sz;
667 i->Ain.Div.src = src;
668 vassert(sz == 4 || sz == 8);
669 return i;
670}
sewardj1001dc42005-02-21 08:25:55 +0000671AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
floriand8e3eca2015-03-13 12:46:49 +0000672 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1001dc42005-02-21 08:25:55 +0000673 i->tag = Ain_Push;
674 i->Ain.Push.src = src;
675 return i;
676}
sewardjcfe046e2013-01-17 14:23:53 +0000677AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
678 RetLoc rloc ) {
floriand8e3eca2015-03-13 12:46:49 +0000679 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj05b3b6a2005-02-04 01:44:33 +0000680 i->tag = Ain_Call;
681 i->Ain.Call.cond = cond;
682 i->Ain.Call.target = target;
683 i->Ain.Call.regparms = regparms;
sewardjcfe046e2013-01-17 14:23:53 +0000684 i->Ain.Call.rloc = rloc;
sewardj05b3b6a2005-02-04 01:44:33 +0000685 vassert(regparms >= 0 && regparms <= 6);
sewardj74142b82013-08-08 10:28:59 +0000686 vassert(is_sane_RetLoc(rloc));
sewardj05b3b6a2005-02-04 01:44:33 +0000687 return i;
688}
sewardjc6f970f2012-04-02 21:54:49 +0000689
690AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
691 AMD64CondCode cond, Bool toFastEP ) {
floriand8e3eca2015-03-13 12:46:49 +0000692 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjc6f970f2012-04-02 21:54:49 +0000693 i->tag = Ain_XDirect;
694 i->Ain.XDirect.dstGA = dstGA;
695 i->Ain.XDirect.amRIP = amRIP;
696 i->Ain.XDirect.cond = cond;
697 i->Ain.XDirect.toFastEP = toFastEP;
sewardjf67eadf2005-02-03 03:53:52 +0000698 return i;
699}
sewardjc6f970f2012-04-02 21:54:49 +0000700AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
701 AMD64CondCode cond ) {
floriand8e3eca2015-03-13 12:46:49 +0000702 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjc6f970f2012-04-02 21:54:49 +0000703 i->tag = Ain_XIndir;
704 i->Ain.XIndir.dstGA = dstGA;
705 i->Ain.XIndir.amRIP = amRIP;
706 i->Ain.XIndir.cond = cond;
707 return i;
708}
709AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
710 AMD64CondCode cond, IRJumpKind jk ) {
floriand8e3eca2015-03-13 12:46:49 +0000711 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjc6f970f2012-04-02 21:54:49 +0000712 i->tag = Ain_XAssisted;
713 i->Ain.XAssisted.dstGA = dstGA;
714 i->Ain.XAssisted.amRIP = amRIP;
715 i->Ain.XAssisted.cond = cond;
716 i->Ain.XAssisted.jk = jk;
717 return i;
718}
719
sewardje357c672015-01-27 23:35:58 +0000720AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000721 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj05b3b6a2005-02-04 01:44:33 +0000722 i->tag = Ain_CMov64;
723 i->Ain.CMov64.cond = cond;
724 i->Ain.CMov64.src = src;
725 i->Ain.CMov64.dst = dst;
726 vassert(cond != Acc_ALWAYS);
727 return i;
728}
sewardjbdea5502015-01-27 23:17:02 +0000729AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
730 AMD64AMode* addr, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000731 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjbdea5502015-01-27 23:17:02 +0000732 i->tag = Ain_CLoad;
733 i->Ain.CLoad.cond = cond;
734 i->Ain.CLoad.szB = szB;
735 i->Ain.CLoad.addr = addr;
736 i->Ain.CLoad.dst = dst;
sewardj6f1ec582015-01-28 10:52:36 +0000737 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
738 return i;
739}
740AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
741 HReg src, AMD64AMode* addr ) {
floriand8e3eca2015-03-13 12:46:49 +0000742 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj6f1ec582015-01-28 10:52:36 +0000743 i->tag = Ain_CStore;
744 i->Ain.CStore.cond = cond;
745 i->Ain.CStore.szB = szB;
746 i->Ain.CStore.src = src;
747 i->Ain.CStore.addr = addr;
748 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
sewardjbdea5502015-01-27 23:17:02 +0000749 return i;
750}
sewardjca257bc2010-09-08 08:34:52 +0000751AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000752 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjca257bc2010-09-08 08:34:52 +0000753 i->tag = Ain_MovxLQ;
754 i->Ain.MovxLQ.syned = syned;
755 i->Ain.MovxLQ.src = src;
756 i->Ain.MovxLQ.dst = dst;
sewardjf67eadf2005-02-03 03:53:52 +0000757 return i;
758}
sewardj8258a8c2005-02-02 03:11:24 +0000759AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
760 AMD64AMode* src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000761 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj8258a8c2005-02-02 03:11:24 +0000762 i->tag = Ain_LoadEX;
763 i->Ain.LoadEX.szSmall = szSmall;
764 i->Ain.LoadEX.syned = syned;
765 i->Ain.LoadEX.src = src;
766 i->Ain.LoadEX.dst = dst;
767 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
768 return i;
769}
sewardjf67eadf2005-02-03 03:53:52 +0000770AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000771 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjf67eadf2005-02-03 03:53:52 +0000772 i->tag = Ain_Store;
773 i->Ain.Store.sz = sz;
774 i->Ain.Store.src = src;
775 i->Ain.Store.dst = dst;
776 vassert(sz == 1 || sz == 2 || sz == 4);
777 return i;
778}
sewardja5bd0af2005-03-24 20:40:12 +0000779AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000780 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardja5bd0af2005-03-24 20:40:12 +0000781 i->tag = Ain_Set64;
782 i->Ain.Set64.cond = cond;
783 i->Ain.Set64.dst = dst;
784 return i;
785}
sewardjf53b7352005-04-06 20:01:56 +0000786AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000787 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjf53b7352005-04-06 20:01:56 +0000788 i->tag = Ain_Bsfr64;
789 i->Ain.Bsfr64.isFwds = isFwds;
790 i->Ain.Bsfr64.src = src;
791 i->Ain.Bsfr64.dst = dst;
792 return i;
793}
sewardje9d8a262009-07-01 08:06:34 +0000794AMD64Instr* AMD64Instr_MFence ( void ) {
floriand8e3eca2015-03-13 12:46:49 +0000795 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj25a85812005-05-08 23:03:48 +0000796 i->tag = Ain_MFence;
797 return i;
798}
sewardje9d8a262009-07-01 08:06:34 +0000799AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
floriand8e3eca2015-03-13 12:46:49 +0000800 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardje9d8a262009-07-01 08:06:34 +0000801 i->tag = Ain_ACAS;
802 i->Ain.ACAS.addr = addr;
803 i->Ain.ACAS.sz = sz;
804 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
805 return i;
806}
807AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
floriand8e3eca2015-03-13 12:46:49 +0000808 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardje9d8a262009-07-01 08:06:34 +0000809 i->tag = Ain_DACAS;
810 i->Ain.DACAS.addr = addr;
811 i->Ain.DACAS.sz = sz;
812 vassert(sz == 8 || sz == 4);
813 return i;
814}
815
sewardj25a85812005-05-08 23:03:48 +0000816AMD64Instr* AMD64Instr_A87Free ( Int nregs )
817{
floriand8e3eca2015-03-13 12:46:49 +0000818 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj25a85812005-05-08 23:03:48 +0000819 i->tag = Ain_A87Free;
820 i->Ain.A87Free.nregs = nregs;
821 vassert(nregs >= 1 && nregs <= 7);
822 return i;
823}
sewardjd15b5972010-06-27 09:06:34 +0000824AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
sewardj25a85812005-05-08 23:03:48 +0000825{
floriand8e3eca2015-03-13 12:46:49 +0000826 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj25a85812005-05-08 23:03:48 +0000827 i->tag = Ain_A87PushPop;
828 i->Ain.A87PushPop.addr = addr;
829 i->Ain.A87PushPop.isPush = isPush;
sewardjd15b5972010-06-27 09:06:34 +0000830 i->Ain.A87PushPop.szB = szB;
831 vassert(szB == 8 || szB == 4);
sewardj25a85812005-05-08 23:03:48 +0000832 return i;
833}
834AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
835{
floriand8e3eca2015-03-13 12:46:49 +0000836 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj25a85812005-05-08 23:03:48 +0000837 i->tag = Ain_A87FpOp;
838 i->Ain.A87FpOp.op = op;
839 return i;
840}
841AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
842{
floriand8e3eca2015-03-13 12:46:49 +0000843 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj25a85812005-05-08 23:03:48 +0000844 i->tag = Ain_A87LdCW;
845 i->Ain.A87LdCW.addr = addr;
sewardjd0a12df2005-02-10 02:07:43 +0000846 return i;
847}
sewardjf4c803b2006-09-11 11:07:34 +0000848AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
849{
floriand8e3eca2015-03-13 12:46:49 +0000850 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjf4c803b2006-09-11 11:07:34 +0000851 i->tag = Ain_A87StSW;
852 i->Ain.A87StSW.addr = addr;
853 return i;
854}
sewardj1a01e652005-02-23 11:39:21 +0000855AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
floriand8e3eca2015-03-13 12:46:49 +0000856 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1a01e652005-02-23 11:39:21 +0000857 i->tag = Ain_LdMXCSR;
858 i->Ain.LdMXCSR.addr = addr;
859 return i;
860}
sewardj18303862005-02-21 12:36:54 +0000861AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000862 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj18303862005-02-21 12:36:54 +0000863 i->tag = Ain_SseUComIS;
sewardj03ccf852005-03-21 02:47:42 +0000864 i->Ain.SseUComIS.sz = toUChar(sz);
sewardj18303862005-02-21 12:36:54 +0000865 i->Ain.SseUComIS.srcL = srcL;
866 i->Ain.SseUComIS.srcR = srcR;
867 i->Ain.SseUComIS.dst = dst;
868 vassert(sz == 4 || sz == 8);
869 return i;
870}
sewardj1a01e652005-02-23 11:39:21 +0000871AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000872 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1a01e652005-02-23 11:39:21 +0000873 i->tag = Ain_SseSI2SF;
sewardj03ccf852005-03-21 02:47:42 +0000874 i->Ain.SseSI2SF.szS = toUChar(szS);
875 i->Ain.SseSI2SF.szD = toUChar(szD);
sewardj1a01e652005-02-23 11:39:21 +0000876 i->Ain.SseSI2SF.src = src;
877 i->Ain.SseSI2SF.dst = dst;
878 vassert(szS == 4 || szS == 8);
879 vassert(szD == 4 || szD == 8);
880 return i;
881}
882AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000883 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1a01e652005-02-23 11:39:21 +0000884 i->tag = Ain_SseSF2SI;
sewardj03ccf852005-03-21 02:47:42 +0000885 i->Ain.SseSF2SI.szS = toUChar(szS);
886 i->Ain.SseSF2SI.szD = toUChar(szD);
sewardj1a01e652005-02-23 11:39:21 +0000887 i->Ain.SseSF2SI.src = src;
888 i->Ain.SseSF2SI.dst = dst;
889 vassert(szS == 4 || szS == 8);
890 vassert(szD == 4 || szD == 8);
891 return i;
892}
sewardj8d965312005-02-25 02:48:47 +0000893AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
894{
floriand8e3eca2015-03-13 12:46:49 +0000895 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj8d965312005-02-25 02:48:47 +0000896 i->tag = Ain_SseSDSS;
897 i->Ain.SseSDSS.from64 = from64;
898 i->Ain.SseSDSS.src = src;
899 i->Ain.SseSDSS.dst = dst;
900 return i;
901}
sewardj18303862005-02-21 12:36:54 +0000902AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
903 HReg reg, AMD64AMode* addr ) {
floriand8e3eca2015-03-13 12:46:49 +0000904 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1001dc42005-02-21 08:25:55 +0000905 i->tag = Ain_SseLdSt;
906 i->Ain.SseLdSt.isLoad = isLoad;
sewardj03ccf852005-03-21 02:47:42 +0000907 i->Ain.SseLdSt.sz = toUChar(sz);
sewardj1001dc42005-02-21 08:25:55 +0000908 i->Ain.SseLdSt.reg = reg;
909 i->Ain.SseLdSt.addr = addr;
sewardj18303862005-02-21 12:36:54 +0000910 vassert(sz == 4 || sz == 8 || sz == 16);
sewardj1001dc42005-02-21 08:25:55 +0000911 return i;
912}
sewardj70dbeb02015-08-12 11:15:53 +0000913AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode cond,
914 HReg src, AMD64AMode* addr )
915{
916 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
917 i->tag = Ain_SseCStore;
918 i->Ain.SseCStore.cond = cond;
919 i->Ain.SseCStore.src = src;
920 i->Ain.SseCStore.addr = addr;
921 vassert(cond != Acc_ALWAYS);
922 return i;
923}
924AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode cond,
925 AMD64AMode* addr, HReg dst )
926{
927 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
928 i->tag = Ain_SseCLoad;
929 i->Ain.SseCLoad.cond = cond;
930 i->Ain.SseCLoad.addr = addr;
931 i->Ain.SseCLoad.dst = dst;
932 vassert(cond != Acc_ALWAYS);
933 return i;
934}
sewardj1001dc42005-02-21 08:25:55 +0000935AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
936{
floriand8e3eca2015-03-13 12:46:49 +0000937 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1001dc42005-02-21 08:25:55 +0000938 i->tag = Ain_SseLdzLO;
939 i->Ain.SseLdzLO.sz = sz;
940 i->Ain.SseLdzLO.reg = reg;
941 i->Ain.SseLdzLO.addr = addr;
942 vassert(sz == 4 || sz == 8);
943 return i;
944}
sewardj8d965312005-02-25 02:48:47 +0000945AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000946 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj8d965312005-02-25 02:48:47 +0000947 i->tag = Ain_Sse32Fx4;
948 i->Ain.Sse32Fx4.op = op;
949 i->Ain.Sse32Fx4.src = src;
950 i->Ain.Sse32Fx4.dst = dst;
951 vassert(op != Asse_MOV);
952 return i;
953}
954AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000955 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj8d965312005-02-25 02:48:47 +0000956 i->tag = Ain_Sse32FLo;
957 i->Ain.Sse32FLo.op = op;
958 i->Ain.Sse32FLo.src = src;
959 i->Ain.Sse32FLo.dst = dst;
960 vassert(op != Asse_MOV);
961 return i;
962}
sewardj4c328cf2005-05-05 12:05:54 +0000963AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000964 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj4c328cf2005-05-05 12:05:54 +0000965 i->tag = Ain_Sse64Fx2;
966 i->Ain.Sse64Fx2.op = op;
967 i->Ain.Sse64Fx2.src = src;
968 i->Ain.Sse64Fx2.dst = dst;
969 vassert(op != Asse_MOV);
970 return i;
971}
sewardj1001dc42005-02-21 08:25:55 +0000972AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000973 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1001dc42005-02-21 08:25:55 +0000974 i->tag = Ain_Sse64FLo;
975 i->Ain.Sse64FLo.op = op;
976 i->Ain.Sse64FLo.src = src;
977 i->Ain.Sse64FLo.dst = dst;
978 vassert(op != Asse_MOV);
979 return i;
980}
981AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
floriand8e3eca2015-03-13 12:46:49 +0000982 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj1001dc42005-02-21 08:25:55 +0000983 i->tag = Ain_SseReRg;
984 i->Ain.SseReRg.op = op;
985 i->Ain.SseReRg.src = re;
986 i->Ain.SseReRg.dst = rg;
987 return i;
988}
sewardj8d965312005-02-25 02:48:47 +0000989AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000990 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj8d965312005-02-25 02:48:47 +0000991 i->tag = Ain_SseCMov;
992 i->Ain.SseCMov.cond = cond;
993 i->Ain.SseCMov.src = src;
994 i->Ain.SseCMov.dst = dst;
995 vassert(cond != Acc_ALWAYS);
996 return i;
997}
sewardj09717342005-05-05 21:34:02 +0000998AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
floriand8e3eca2015-03-13 12:46:49 +0000999 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj09717342005-05-05 21:34:02 +00001000 i->tag = Ain_SseShuf;
1001 i->Ain.SseShuf.order = order;
1002 i->Ain.SseShuf.src = src;
1003 i->Ain.SseShuf.dst = dst;
1004 vassert(order >= 0 && order <= 0xFF);
1005 return i;
1006}
sewardj3616a2e2012-05-27 16:18:13 +00001007//uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1008//uu HReg reg, AMD64AMode* addr ) {
floriand8e3eca2015-03-13 12:46:49 +00001009//uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj3616a2e2012-05-27 16:18:13 +00001010//uu i->tag = Ain_AvxLdSt;
1011//uu i->Ain.AvxLdSt.isLoad = isLoad;
1012//uu i->Ain.AvxLdSt.reg = reg;
1013//uu i->Ain.AvxLdSt.addr = addr;
1014//uu return i;
1015//uu }
1016//uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
floriand8e3eca2015-03-13 12:46:49 +00001017//uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardj3616a2e2012-05-27 16:18:13 +00001018//uu i->tag = Ain_AvxReRg;
1019//uu i->Ain.AvxReRg.op = op;
1020//uu i->Ain.AvxReRg.src = re;
1021//uu i->Ain.AvxReRg.dst = rg;
1022//uu return i;
1023//uu }
sewardjc6f970f2012-04-02 21:54:49 +00001024AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1025 AMD64AMode* amFailAddr ) {
floriand8e3eca2015-03-13 12:46:49 +00001026 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjc6f970f2012-04-02 21:54:49 +00001027 i->tag = Ain_EvCheck;
1028 i->Ain.EvCheck.amCounter = amCounter;
1029 i->Ain.EvCheck.amFailAddr = amFailAddr;
1030 return i;
1031}
1032AMD64Instr* AMD64Instr_ProfInc ( void ) {
floriand8e3eca2015-03-13 12:46:49 +00001033 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
sewardjc6f970f2012-04-02 21:54:49 +00001034 i->tag = Ain_ProfInc;
1035 return i;
1036}
sewardjc33671d2005-02-01 20:30:00 +00001037
floriand8c64e02014-10-08 08:54:44 +00001038void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
sewardjc33671d2005-02-01 20:30:00 +00001039{
cerion92b64362005-12-13 12:02:26 +00001040 vassert(mode64 == True);
sewardjc33671d2005-02-01 20:30:00 +00001041 switch (i->tag) {
sewardj813ce9e2005-02-04 21:16:48 +00001042 case Ain_Imm64:
sewardj1b8d58e2005-02-05 14:34:18 +00001043 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
sewardj813ce9e2005-02-04 21:16:48 +00001044 ppHRegAMD64(i->Ain.Imm64.dst);
1045 return;
sewardj614b3fb2005-02-02 02:16:03 +00001046 case Ain_Alu64R:
1047 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1048 ppAMD64RMI(i->Ain.Alu64R.src);
1049 vex_printf(",");
1050 ppHRegAMD64(i->Ain.Alu64R.dst);
1051 return;
sewardjf67eadf2005-02-03 03:53:52 +00001052 case Ain_Alu64M:
1053 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1054 ppAMD64RI(i->Ain.Alu64M.src);
1055 vex_printf(",");
1056 ppAMD64AMode(i->Ain.Alu64M.dst);
1057 return;
sewardj8258a8c2005-02-02 03:11:24 +00001058 case Ain_Sh64:
sewardj1b8d58e2005-02-05 14:34:18 +00001059 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
sewardj8258a8c2005-02-02 03:11:24 +00001060 if (i->Ain.Sh64.src == 0)
1061 vex_printf("%%cl,");
1062 else
sewardj03ccf852005-03-21 02:47:42 +00001063 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
sewardj501a3392005-05-11 15:37:50 +00001064 ppHRegAMD64(i->Ain.Sh64.dst);
sewardj8258a8c2005-02-02 03:11:24 +00001065 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001066 case Ain_Test64:
sewardj501a3392005-05-11 15:37:50 +00001067 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1068 ppHRegAMD64(i->Ain.Test64.dst);
sewardj05b3b6a2005-02-04 01:44:33 +00001069 return;
sewardjd0a12df2005-02-10 02:07:43 +00001070 case Ain_Unary64:
sewardjb5220772005-04-27 11:53:23 +00001071 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
sewardj501a3392005-05-11 15:37:50 +00001072 ppHRegAMD64(i->Ain.Unary64.dst);
sewardjd0a12df2005-02-10 02:07:43 +00001073 return;
sewardj6ce1a232007-03-31 19:12:38 +00001074 case Ain_Lea64:
1075 vex_printf("leaq ");
1076 ppAMD64AMode(i->Ain.Lea64.am);
1077 vex_printf(",");
1078 ppHRegAMD64(i->Ain.Lea64.dst);
1079 return;
sewardj9cc2bbf2011-06-05 17:56:03 +00001080 case Ain_Alu32R:
1081 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1082 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1083 vex_printf(",");
1084 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1085 return;
sewardj9b967672005-02-08 11:13:09 +00001086 case Ain_MulL:
sewardj501a3392005-05-11 15:37:50 +00001087 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
sewardj9b967672005-02-08 11:13:09 +00001088 ppAMD64RM(i->Ain.MulL.src);
1089 return;
sewardj7de0d3c2005-02-13 02:26:41 +00001090 case Ain_Div:
1091 vex_printf("%cdiv%s ",
1092 i->Ain.Div.syned ? 's' : 'u',
1093 showAMD64ScalarSz(i->Ain.Div.sz));
1094 ppAMD64RM(i->Ain.Div.src);
1095 return;
sewardj1001dc42005-02-21 08:25:55 +00001096 case Ain_Push:
1097 vex_printf("pushq ");
1098 ppAMD64RMI(i->Ain.Push.src);
1099 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001100 case Ain_Call:
sewardjcfe046e2013-01-17 14:23:53 +00001101 vex_printf("call%s[%d,",
sewardj05b3b6a2005-02-04 01:44:33 +00001102 i->Ain.Call.cond==Acc_ALWAYS
1103 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1104 i->Ain.Call.regparms );
sewardjcfe046e2013-01-17 14:23:53 +00001105 ppRetLoc(i->Ain.Call.rloc);
1106 vex_printf("] 0x%llx", i->Ain.Call.target);
sewardj05b3b6a2005-02-04 01:44:33 +00001107 break;
sewardjc6f970f2012-04-02 21:54:49 +00001108
1109 case Ain_XDirect:
1110 vex_printf("(xDirect) ");
1111 vex_printf("if (%%rflags.%s) { ",
1112 showAMD64CondCode(i->Ain.XDirect.cond));
1113 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1114 vex_printf("movq %%r11,");
1115 ppAMD64AMode(i->Ain.XDirect.amRIP);
1116 vex_printf("; ");
1117 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1118 i->Ain.XDirect.toFastEP ? "fast" : "slow");
sewardjf67eadf2005-02-03 03:53:52 +00001119 return;
sewardjc6f970f2012-04-02 21:54:49 +00001120 case Ain_XIndir:
1121 vex_printf("(xIndir) ");
1122 vex_printf("if (%%rflags.%s) { ",
1123 showAMD64CondCode(i->Ain.XIndir.cond));
1124 vex_printf("movq ");
1125 ppHRegAMD64(i->Ain.XIndir.dstGA);
1126 vex_printf(",");
1127 ppAMD64AMode(i->Ain.XIndir.amRIP);
1128 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1129 return;
1130 case Ain_XAssisted:
1131 vex_printf("(xAssisted) ");
1132 vex_printf("if (%%rflags.%s) { ",
1133 showAMD64CondCode(i->Ain.XAssisted.cond));
1134 vex_printf("movq ");
1135 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1136 vex_printf(",");
1137 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1138 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1139 (Int)i->Ain.XAssisted.jk);
1140 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1141 return;
1142
sewardj05b3b6a2005-02-04 01:44:33 +00001143 case Ain_CMov64:
1144 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
sewardje357c672015-01-27 23:35:58 +00001145 ppHRegAMD64(i->Ain.CMov64.src);
sewardj05b3b6a2005-02-04 01:44:33 +00001146 vex_printf(",");
1147 ppHRegAMD64(i->Ain.CMov64.dst);
1148 return;
sewardjbdea5502015-01-27 23:17:02 +00001149 case Ain_CLoad:
1150 vex_printf("if (%%rflags.%s) { ",
1151 showAMD64CondCode(i->Ain.CLoad.cond));
sewardj6f1ec582015-01-28 10:52:36 +00001152 vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
sewardjbdea5502015-01-27 23:17:02 +00001153 ppAMD64AMode(i->Ain.CLoad.addr);
sewardj6f1ec582015-01-28 10:52:36 +00001154 vex_printf(", ");
sewardjbdea5502015-01-27 23:17:02 +00001155 (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1156 (i->Ain.CLoad.dst);
1157 vex_printf(" }");
1158 return;
sewardj6f1ec582015-01-28 10:52:36 +00001159 case Ain_CStore:
1160 vex_printf("if (%%rflags.%s) { ",
1161 showAMD64CondCode(i->Ain.CStore.cond));
1162 vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
1163 (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1164 (i->Ain.CStore.src);
1165 vex_printf(", ");
1166 ppAMD64AMode(i->Ain.CStore.addr);
1167 vex_printf(" }");
1168 return;
1169
sewardjca257bc2010-09-08 08:34:52 +00001170 case Ain_MovxLQ:
1171 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1172 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
sewardjf67eadf2005-02-03 03:53:52 +00001173 vex_printf(",");
sewardjca257bc2010-09-08 08:34:52 +00001174 ppHRegAMD64(i->Ain.MovxLQ.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001175 return;
sewardj8258a8c2005-02-02 03:11:24 +00001176 case Ain_LoadEX:
sewardj549e0642005-02-05 12:00:14 +00001177 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1178 vex_printf("movl ");
1179 ppAMD64AMode(i->Ain.LoadEX.src);
1180 vex_printf(",");
1181 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1182 } else {
1183 vex_printf("mov%c%cq ",
1184 i->Ain.LoadEX.syned ? 's' : 'z',
1185 i->Ain.LoadEX.szSmall==1
1186 ? 'b'
1187 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1188 ppAMD64AMode(i->Ain.LoadEX.src);
1189 vex_printf(",");
1190 ppHRegAMD64(i->Ain.LoadEX.dst);
1191 }
sewardj8258a8c2005-02-02 03:11:24 +00001192 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001193 case Ain_Store:
1194 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1195 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1196 ppHRegAMD64(i->Ain.Store.src);
1197 vex_printf(",");
1198 ppAMD64AMode(i->Ain.Store.dst);
1199 return;
sewardja5bd0af2005-03-24 20:40:12 +00001200 case Ain_Set64:
1201 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1202 ppHRegAMD64(i->Ain.Set64.dst);
1203 return;
sewardjf53b7352005-04-06 20:01:56 +00001204 case Ain_Bsfr64:
1205 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1206 ppHRegAMD64(i->Ain.Bsfr64.src);
1207 vex_printf(",");
1208 ppHRegAMD64(i->Ain.Bsfr64.dst);
1209 return;
sewardjd0a12df2005-02-10 02:07:43 +00001210 case Ain_MFence:
1211 vex_printf("mfence" );
1212 return;
sewardje9d8a262009-07-01 08:06:34 +00001213 case Ain_ACAS:
1214 vex_printf("lock cmpxchg%c ",
1215 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1216 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1217 vex_printf("{%%rax->%%rbx},");
1218 ppAMD64AMode(i->Ain.ACAS.addr);
1219 return;
1220 case Ain_DACAS:
1221 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1222 (Int)(2 * i->Ain.DACAS.sz));
1223 ppAMD64AMode(i->Ain.DACAS.addr);
1224 return;
sewardj25a85812005-05-08 23:03:48 +00001225 case Ain_A87Free:
sewardjf4c803b2006-09-11 11:07:34 +00001226 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
sewardj25a85812005-05-08 23:03:48 +00001227 break;
1228 case Ain_A87PushPop:
sewardjd15b5972010-06-27 09:06:34 +00001229 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1230 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
sewardj25a85812005-05-08 23:03:48 +00001231 ppAMD64AMode(i->Ain.A87PushPop.addr);
1232 break;
1233 case Ain_A87FpOp:
sewardjf4c803b2006-09-11 11:07:34 +00001234 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
sewardj25a85812005-05-08 23:03:48 +00001235 break;
1236 case Ain_A87LdCW:
1237 vex_printf("fldcw ");
1238 ppAMD64AMode(i->Ain.A87LdCW.addr);
1239 break;
sewardjf4c803b2006-09-11 11:07:34 +00001240 case Ain_A87StSW:
1241 vex_printf("fstsw ");
1242 ppAMD64AMode(i->Ain.A87StSW.addr);
1243 break;
sewardj1a01e652005-02-23 11:39:21 +00001244 case Ain_LdMXCSR:
1245 vex_printf("ldmxcsr ");
1246 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1247 break;
sewardj18303862005-02-21 12:36:54 +00001248 case Ain_SseUComIS:
1249 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1250 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1251 vex_printf(",");
1252 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1253 vex_printf(" ; pushfq ; popq ");
1254 ppHRegAMD64(i->Ain.SseUComIS.dst);
1255 break;
sewardj1a01e652005-02-23 11:39:21 +00001256 case Ain_SseSI2SF:
1257 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1258 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1259 (i->Ain.SseSI2SF.src);
1260 vex_printf(",");
1261 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1262 break;
1263 case Ain_SseSF2SI:
1264 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1265 ppHRegAMD64(i->Ain.SseSF2SI.src);
1266 vex_printf(",");
1267 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1268 (i->Ain.SseSF2SI.dst);
1269 break;
sewardj8d965312005-02-25 02:48:47 +00001270 case Ain_SseSDSS:
1271 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1272 ppHRegAMD64(i->Ain.SseSDSS.src);
1273 vex_printf(",");
1274 ppHRegAMD64(i->Ain.SseSDSS.dst);
1275 break;
sewardj1001dc42005-02-21 08:25:55 +00001276 case Ain_SseLdSt:
sewardj18303862005-02-21 12:36:54 +00001277 switch (i->Ain.SseLdSt.sz) {
1278 case 4: vex_printf("movss "); break;
1279 case 8: vex_printf("movsd "); break;
1280 case 16: vex_printf("movups "); break;
1281 default: vassert(0);
1282 }
sewardj1001dc42005-02-21 08:25:55 +00001283 if (i->Ain.SseLdSt.isLoad) {
1284 ppAMD64AMode(i->Ain.SseLdSt.addr);
1285 vex_printf(",");
1286 ppHRegAMD64(i->Ain.SseLdSt.reg);
1287 } else {
1288 ppHRegAMD64(i->Ain.SseLdSt.reg);
1289 vex_printf(",");
1290 ppAMD64AMode(i->Ain.SseLdSt.addr);
1291 }
1292 return;
sewardj70dbeb02015-08-12 11:15:53 +00001293 case Ain_SseCStore:
1294 vex_printf("if (%%rflags.%s) { ",
1295 showAMD64CondCode(i->Ain.SseCStore.cond));
1296 vex_printf("movups ");
1297 ppHRegAMD64(i->Ain.SseCStore.src);
1298 vex_printf(", ");
1299 ppAMD64AMode(i->Ain.SseCStore.addr);
1300 vex_printf(" }");
1301 return;
1302 case Ain_SseCLoad:
1303 vex_printf("if (%%rflags.%s) { ",
1304 showAMD64CondCode(i->Ain.SseCLoad.cond));
1305 vex_printf("movups ");
1306 ppAMD64AMode(i->Ain.SseCLoad.addr);
1307 vex_printf(", ");
1308 ppHRegAMD64(i->Ain.SseCLoad.dst);
1309 vex_printf(" }");
1310 return;
sewardj1001dc42005-02-21 08:25:55 +00001311 case Ain_SseLdzLO:
1312 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1313 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1314 vex_printf(",");
1315 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1316 return;
sewardj8d965312005-02-25 02:48:47 +00001317 case Ain_Sse32Fx4:
1318 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1319 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1320 vex_printf(",");
1321 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1322 return;
1323 case Ain_Sse32FLo:
1324 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1325 ppHRegAMD64(i->Ain.Sse32FLo.src);
1326 vex_printf(",");
1327 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1328 return;
sewardj4c328cf2005-05-05 12:05:54 +00001329 case Ain_Sse64Fx2:
1330 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1331 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1332 vex_printf(",");
1333 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1334 return;
sewardj1001dc42005-02-21 08:25:55 +00001335 case Ain_Sse64FLo:
1336 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1337 ppHRegAMD64(i->Ain.Sse64FLo.src);
1338 vex_printf(",");
1339 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1340 return;
1341 case Ain_SseReRg:
1342 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1343 ppHRegAMD64(i->Ain.SseReRg.src);
1344 vex_printf(",");
1345 ppHRegAMD64(i->Ain.SseReRg.dst);
1346 return;
sewardj8d965312005-02-25 02:48:47 +00001347 case Ain_SseCMov:
1348 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1349 ppHRegAMD64(i->Ain.SseCMov.src);
1350 vex_printf(",");
1351 ppHRegAMD64(i->Ain.SseCMov.dst);
1352 return;
sewardj09717342005-05-05 21:34:02 +00001353 case Ain_SseShuf:
florianb1737742015-08-03 16:03:13 +00001354 vex_printf("pshufd $0x%x,", (UInt)i->Ain.SseShuf.order);
sewardj09717342005-05-05 21:34:02 +00001355 ppHRegAMD64(i->Ain.SseShuf.src);
1356 vex_printf(",");
1357 ppHRegAMD64(i->Ain.SseShuf.dst);
1358 return;
sewardj3616a2e2012-05-27 16:18:13 +00001359 //uu case Ain_AvxLdSt:
1360 //uu vex_printf("vmovups ");
1361 //uu if (i->Ain.AvxLdSt.isLoad) {
1362 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1363 //uu vex_printf(",");
1364 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1365 //uu } else {
1366 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1367 //uu vex_printf(",");
1368 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1369 //uu }
1370 //uu return;
1371 //uu case Ain_AvxReRg:
1372 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1373 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1374 //uu vex_printf(",");
1375 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1376 //uu return;
sewardjc6f970f2012-04-02 21:54:49 +00001377 case Ain_EvCheck:
1378 vex_printf("(evCheck) decl ");
1379 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1380 vex_printf("; jns nofail; jmp *");
1381 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1382 vex_printf("; nofail:");
1383 return;
1384 case Ain_ProfInc:
1385 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1386 return;
sewardjc33671d2005-02-01 20:30:00 +00001387 default:
1388 vpanic("ppAMD64Instr");
1389 }
1390}
1391
1392/* --------- Helpers for register allocation. --------- */
1393
floriand8c64e02014-10-08 08:54:44 +00001394void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
sewardjc33671d2005-02-01 20:30:00 +00001395{
sewardj1001dc42005-02-21 08:25:55 +00001396 Bool unary;
cerion92b64362005-12-13 12:02:26 +00001397 vassert(mode64 == True);
sewardjc33671d2005-02-01 20:30:00 +00001398 initHRegUsage(u);
1399 switch (i->tag) {
sewardj813ce9e2005-02-04 21:16:48 +00001400 case Ain_Imm64:
1401 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1402 return;
sewardjf67eadf2005-02-03 03:53:52 +00001403 case Ain_Alu64R:
1404 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1405 if (i->Ain.Alu64R.op == Aalu_MOV) {
1406 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1407 return;
1408 }
1409 if (i->Ain.Alu64R.op == Aalu_CMP) {
1410 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1411 return;
1412 }
1413 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1414 return;
1415 case Ain_Alu64M:
1416 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1417 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1418 return;
1419 case Ain_Sh64:
sewardj501a3392005-05-11 15:37:50 +00001420 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001421 if (i->Ain.Sh64.src == 0)
1422 addHRegUse(u, HRmRead, hregAMD64_RCX());
1423 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001424 case Ain_Test64:
sewardj501a3392005-05-11 15:37:50 +00001425 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
sewardj05b3b6a2005-02-04 01:44:33 +00001426 return;
sewardjd0a12df2005-02-10 02:07:43 +00001427 case Ain_Unary64:
sewardj501a3392005-05-11 15:37:50 +00001428 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
sewardjd0a12df2005-02-10 02:07:43 +00001429 return;
sewardj6ce1a232007-03-31 19:12:38 +00001430 case Ain_Lea64:
1431 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1432 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1433 return;
sewardj9cc2bbf2011-06-05 17:56:03 +00001434 case Ain_Alu32R:
1435 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1436 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1437 if (i->Ain.Alu32R.op == Aalu_CMP) {
1438 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1439 return;
1440 }
1441 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1442 return;
sewardj9b967672005-02-08 11:13:09 +00001443 case Ain_MulL:
1444 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1445 addHRegUse(u, HRmModify, hregAMD64_RAX());
1446 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1447 return;
sewardj7de0d3c2005-02-13 02:26:41 +00001448 case Ain_Div:
1449 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1450 addHRegUse(u, HRmModify, hregAMD64_RAX());
1451 addHRegUse(u, HRmModify, hregAMD64_RDX());
1452 return;
sewardj1001dc42005-02-21 08:25:55 +00001453 case Ain_Push:
1454 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1455 addHRegUse(u, HRmModify, hregAMD64_RSP());
1456 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001457 case Ain_Call:
1458 /* This is a bit subtle. */
1459 /* First off, claim it trashes all the caller-saved regs
1460 which fall within the register allocator's jurisdiction.
1461 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
sewardj3616a2e2012-05-27 16:18:13 +00001462 and all the xmm registers.
sewardj05b3b6a2005-02-04 01:44:33 +00001463 */
1464 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1465 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1466 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1467 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1468 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1469 addHRegUse(u, HRmWrite, hregAMD64_R8());
1470 addHRegUse(u, HRmWrite, hregAMD64_R9());
1471 addHRegUse(u, HRmWrite, hregAMD64_R10());
1472 addHRegUse(u, HRmWrite, hregAMD64_R11());
sewardj1001dc42005-02-21 08:25:55 +00001473 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1474 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
sewardj1001dc42005-02-21 08:25:55 +00001475 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1476 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1477 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1478 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1479 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1480 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1481 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1482 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1483 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1484 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
sewardj05b3b6a2005-02-04 01:44:33 +00001485
1486 /* Now we have to state any parameter-carrying registers
1487 which might be read. This depends on the regparmness. */
1488 switch (i->Ain.Call.regparms) {
1489 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1490 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1491 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1492 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1493 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1494 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1495 case 0: break;
1496 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1497 }
1498 /* Finally, there is the issue that the insn trashes a
1499 register because the literal target address has to be
1500 loaded into a register. Fortunately, r11 is stated in the
1501 ABI as a scratch register, and so seems a suitable victim. */
1502 addHRegUse(u, HRmWrite, hregAMD64_R11());
1503 /* Upshot of this is that the assembler really must use r11,
1504 and no other, as a destination temporary. */
1505 return;
sewardjc6f970f2012-04-02 21:54:49 +00001506 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1507 conditionally exit the block. Hence we only need to list (1)
1508 the registers that they read, and (2) the registers that they
1509 write in the case where the block is not exited. (2) is
1510 empty, hence only (1) is relevant here. */
1511 case Ain_XDirect:
1512 /* Don't bother to mention the write to %r11, since it is not
1513 available to the allocator. */
1514 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1515 return;
1516 case Ain_XIndir:
1517 /* Ditto re %r11 */
1518 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1519 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1520 return;
1521 case Ain_XAssisted:
1522 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1523 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1524 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
sewardjf67eadf2005-02-03 03:53:52 +00001525 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001526 case Ain_CMov64:
sewardje357c672015-01-27 23:35:58 +00001527 addHRegUse(u, HRmRead, i->Ain.CMov64.src);
sewardj05b3b6a2005-02-04 01:44:33 +00001528 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1529 return;
sewardjbdea5502015-01-27 23:17:02 +00001530 case Ain_CLoad:
1531 addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
1532 addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
1533 return;
sewardj6f1ec582015-01-28 10:52:36 +00001534 case Ain_CStore:
1535 addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
1536 addHRegUse(u, HRmRead, i->Ain.CStore.src);
1537 return;
sewardjca257bc2010-09-08 08:34:52 +00001538 case Ain_MovxLQ:
1539 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1540 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001541 return;
1542 case Ain_LoadEX:
1543 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1544 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1545 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001546 case Ain_Store:
1547 addHRegUse(u, HRmRead, i->Ain.Store.src);
1548 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1549 return;
sewardja5bd0af2005-03-24 20:40:12 +00001550 case Ain_Set64:
1551 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1552 return;
sewardjf53b7352005-04-06 20:01:56 +00001553 case Ain_Bsfr64:
1554 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1555 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1556 return;
sewardjd0a12df2005-02-10 02:07:43 +00001557 case Ain_MFence:
1558 return;
sewardje9d8a262009-07-01 08:06:34 +00001559 case Ain_ACAS:
1560 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1561 addHRegUse(u, HRmRead, hregAMD64_RBX());
1562 addHRegUse(u, HRmModify, hregAMD64_RAX());
1563 return;
1564 case Ain_DACAS:
1565 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1566 addHRegUse(u, HRmRead, hregAMD64_RCX());
1567 addHRegUse(u, HRmRead, hregAMD64_RBX());
1568 addHRegUse(u, HRmModify, hregAMD64_RDX());
1569 addHRegUse(u, HRmModify, hregAMD64_RAX());
1570 return;
sewardj25a85812005-05-08 23:03:48 +00001571 case Ain_A87Free:
1572 return;
1573 case Ain_A87PushPop:
1574 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1575 return;
1576 case Ain_A87FpOp:
1577 return;
1578 case Ain_A87LdCW:
1579 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1580 return;
sewardjf4c803b2006-09-11 11:07:34 +00001581 case Ain_A87StSW:
1582 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1583 return;
sewardj1a01e652005-02-23 11:39:21 +00001584 case Ain_LdMXCSR:
1585 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1586 return;
sewardj18303862005-02-21 12:36:54 +00001587 case Ain_SseUComIS:
1588 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1589 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1590 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1591 return;
sewardj1a01e652005-02-23 11:39:21 +00001592 case Ain_SseSI2SF:
1593 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1594 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1595 return;
1596 case Ain_SseSF2SI:
1597 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1598 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1599 return;
sewardj8d965312005-02-25 02:48:47 +00001600 case Ain_SseSDSS:
1601 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1602 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1603 return;
sewardj1001dc42005-02-21 08:25:55 +00001604 case Ain_SseLdSt:
1605 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1606 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1607 i->Ain.SseLdSt.reg);
1608 return;
sewardj70dbeb02015-08-12 11:15:53 +00001609 case Ain_SseCStore:
1610 addRegUsage_AMD64AMode(u, i->Ain.SseCStore.addr);
1611 addHRegUse(u, HRmRead, i->Ain.SseCStore.src);
1612 return;
1613 case Ain_SseCLoad:
1614 addRegUsage_AMD64AMode(u, i->Ain.SseCLoad.addr);
1615 addHRegUse(u, HRmModify, i->Ain.SseCLoad.dst);
1616 return;
sewardj1001dc42005-02-21 08:25:55 +00001617 case Ain_SseLdzLO:
1618 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1619 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1620 return;
sewardj8d965312005-02-25 02:48:47 +00001621 case Ain_Sse32Fx4:
1622 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
sewardj03ccf852005-03-21 02:47:42 +00001623 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1624 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1625 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
sewardj8d965312005-02-25 02:48:47 +00001626 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1627 addHRegUse(u, unary ? HRmWrite : HRmModify,
1628 i->Ain.Sse32Fx4.dst);
1629 return;
1630 case Ain_Sse32FLo:
1631 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
sewardj03ccf852005-03-21 02:47:42 +00001632 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1633 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1634 || i->Ain.Sse32FLo.op == Asse_SQRTF );
sewardj8d965312005-02-25 02:48:47 +00001635 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1636 addHRegUse(u, unary ? HRmWrite : HRmModify,
1637 i->Ain.Sse32FLo.dst);
1638 return;
sewardj4c328cf2005-05-05 12:05:54 +00001639 case Ain_Sse64Fx2:
1640 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
sewardjca673ab2005-05-11 10:03:08 +00001641 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1642 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1643 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
sewardj4c328cf2005-05-05 12:05:54 +00001644 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1645 addHRegUse(u, unary ? HRmWrite : HRmModify,
1646 i->Ain.Sse64Fx2.dst);
1647 return;
sewardj1001dc42005-02-21 08:25:55 +00001648 case Ain_Sse64FLo:
1649 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
sewardj03ccf852005-03-21 02:47:42 +00001650 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1651 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1652 || i->Ain.Sse64FLo.op == Asse_SQRTF );
sewardj1001dc42005-02-21 08:25:55 +00001653 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1654 addHRegUse(u, unary ? HRmWrite : HRmModify,
1655 i->Ain.Sse64FLo.dst);
1656 return;
1657 case Ain_SseReRg:
sewardjac530442005-05-11 16:13:37 +00001658 if ( (i->Ain.SseReRg.op == Asse_XOR
1659 || i->Ain.SseReRg.op == Asse_CMPEQ32)
florian79efdc62013-02-11 00:47:35 +00001660 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
sewardjac530442005-05-11 16:13:37 +00001661 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1662 r,r' as a write of a value to r, and independent of any
1663 previous value in r */
sewardj1001dc42005-02-21 08:25:55 +00001664 /* (as opposed to a rite of passage :-) */
1665 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1666 } else {
1667 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1668 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1669 ? HRmWrite : HRmModify,
1670 i->Ain.SseReRg.dst);
1671 }
1672 return;
sewardj8d965312005-02-25 02:48:47 +00001673 case Ain_SseCMov:
1674 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1675 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1676 return;
sewardj09717342005-05-05 21:34:02 +00001677 case Ain_SseShuf:
1678 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1679 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1680 return;
sewardj3616a2e2012-05-27 16:18:13 +00001681 //uu case Ain_AvxLdSt:
1682 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1683 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1684 //uu i->Ain.AvxLdSt.reg);
1685 //uu return;
1686 //uu case Ain_AvxReRg:
1687 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1688 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1689 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1690 //uu /* See comments on the case for Ain_SseReRg. */
1691 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1692 //uu } else {
1693 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1694 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1695 //uu ? HRmWrite : HRmModify,
1696 //uu i->Ain.AvxReRg.dst);
1697 //uu }
1698 //uu return;
sewardjc6f970f2012-04-02 21:54:49 +00001699 case Ain_EvCheck:
1700 /* We expect both amodes only to mention %rbp, so this is in
1701 fact pointless, since %rbp isn't allocatable, but anyway.. */
1702 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1703 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1704 return;
1705 case Ain_ProfInc:
1706 addHRegUse(u, HRmWrite, hregAMD64_R11());
1707 return;
sewardjc33671d2005-02-01 20:30:00 +00001708 default:
cerion92b64362005-12-13 12:02:26 +00001709 ppAMD64Instr(i, mode64);
sewardjc33671d2005-02-01 20:30:00 +00001710 vpanic("getRegUsage_AMD64Instr");
1711 }
1712}
sewardjf67eadf2005-02-03 03:53:52 +00001713
1714/* local helper */
sewardj25a85812005-05-08 23:03:48 +00001715static inline void mapReg(HRegRemap* m, HReg* r)
sewardjf67eadf2005-02-03 03:53:52 +00001716{
1717 *r = lookupHRegRemap(m, *r);
1718}
sewardjc33671d2005-02-01 20:30:00 +00001719
cerion92b64362005-12-13 12:02:26 +00001720void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
sewardjc33671d2005-02-01 20:30:00 +00001721{
cerion92b64362005-12-13 12:02:26 +00001722 vassert(mode64 == True);
sewardjc33671d2005-02-01 20:30:00 +00001723 switch (i->tag) {
sewardj813ce9e2005-02-04 21:16:48 +00001724 case Ain_Imm64:
1725 mapReg(m, &i->Ain.Imm64.dst);
1726 return;
sewardjf67eadf2005-02-03 03:53:52 +00001727 case Ain_Alu64R:
1728 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1729 mapReg(m, &i->Ain.Alu64R.dst);
1730 return;
1731 case Ain_Alu64M:
1732 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1733 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1734 return;
1735 case Ain_Sh64:
sewardj501a3392005-05-11 15:37:50 +00001736 mapReg(m, &i->Ain.Sh64.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001737 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001738 case Ain_Test64:
sewardj501a3392005-05-11 15:37:50 +00001739 mapReg(m, &i->Ain.Test64.dst);
sewardj05b3b6a2005-02-04 01:44:33 +00001740 return;
sewardjd0a12df2005-02-10 02:07:43 +00001741 case Ain_Unary64:
sewardj501a3392005-05-11 15:37:50 +00001742 mapReg(m, &i->Ain.Unary64.dst);
sewardjd0a12df2005-02-10 02:07:43 +00001743 return;
sewardj6ce1a232007-03-31 19:12:38 +00001744 case Ain_Lea64:
1745 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1746 mapReg(m, &i->Ain.Lea64.dst);
1747 return;
sewardj9cc2bbf2011-06-05 17:56:03 +00001748 case Ain_Alu32R:
1749 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1750 mapReg(m, &i->Ain.Alu32R.dst);
1751 return;
sewardj9b967672005-02-08 11:13:09 +00001752 case Ain_MulL:
1753 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1754 return;
sewardj7de0d3c2005-02-13 02:26:41 +00001755 case Ain_Div:
1756 mapRegs_AMD64RM(m, i->Ain.Div.src);
1757 return;
sewardj1001dc42005-02-21 08:25:55 +00001758 case Ain_Push:
1759 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1760 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001761 case Ain_Call:
1762 return;
sewardjc6f970f2012-04-02 21:54:49 +00001763 case Ain_XDirect:
1764 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1765 return;
1766 case Ain_XIndir:
1767 mapReg(m, &i->Ain.XIndir.dstGA);
1768 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1769 return;
1770 case Ain_XAssisted:
1771 mapReg(m, &i->Ain.XAssisted.dstGA);
1772 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
sewardjf67eadf2005-02-03 03:53:52 +00001773 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001774 case Ain_CMov64:
sewardje357c672015-01-27 23:35:58 +00001775 mapReg(m, &i->Ain.CMov64.src);
sewardj05b3b6a2005-02-04 01:44:33 +00001776 mapReg(m, &i->Ain.CMov64.dst);
1777 return;
sewardjbdea5502015-01-27 23:17:02 +00001778 case Ain_CLoad:
1779 mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
1780 mapReg(m, &i->Ain.CLoad.dst);
1781 return;
sewardj6f1ec582015-01-28 10:52:36 +00001782 case Ain_CStore:
1783 mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
1784 mapReg(m, &i->Ain.CStore.src);
1785 return;
sewardjca257bc2010-09-08 08:34:52 +00001786 case Ain_MovxLQ:
1787 mapReg(m, &i->Ain.MovxLQ.src);
1788 mapReg(m, &i->Ain.MovxLQ.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001789 return;
1790 case Ain_LoadEX:
1791 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1792 mapReg(m, &i->Ain.LoadEX.dst);
1793 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001794 case Ain_Store:
1795 mapReg(m, &i->Ain.Store.src);
1796 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1797 return;
sewardja5bd0af2005-03-24 20:40:12 +00001798 case Ain_Set64:
1799 mapReg(m, &i->Ain.Set64.dst);
1800 return;
sewardjf53b7352005-04-06 20:01:56 +00001801 case Ain_Bsfr64:
1802 mapReg(m, &i->Ain.Bsfr64.src);
1803 mapReg(m, &i->Ain.Bsfr64.dst);
1804 return;
sewardjd0a12df2005-02-10 02:07:43 +00001805 case Ain_MFence:
1806 return;
sewardje9d8a262009-07-01 08:06:34 +00001807 case Ain_ACAS:
1808 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1809 return;
1810 case Ain_DACAS:
1811 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1812 return;
sewardj25a85812005-05-08 23:03:48 +00001813 case Ain_A87Free:
1814 return;
1815 case Ain_A87PushPop:
1816 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1817 return;
1818 case Ain_A87FpOp:
1819 return;
1820 case Ain_A87LdCW:
1821 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1822 return;
sewardjf4c803b2006-09-11 11:07:34 +00001823 case Ain_A87StSW:
1824 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1825 return;
sewardj1a01e652005-02-23 11:39:21 +00001826 case Ain_LdMXCSR:
1827 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1828 return;
sewardj18303862005-02-21 12:36:54 +00001829 case Ain_SseUComIS:
1830 mapReg(m, &i->Ain.SseUComIS.srcL);
1831 mapReg(m, &i->Ain.SseUComIS.srcR);
1832 mapReg(m, &i->Ain.SseUComIS.dst);
1833 return;
sewardj1a01e652005-02-23 11:39:21 +00001834 case Ain_SseSI2SF:
1835 mapReg(m, &i->Ain.SseSI2SF.src);
1836 mapReg(m, &i->Ain.SseSI2SF.dst);
1837 return;
1838 case Ain_SseSF2SI:
1839 mapReg(m, &i->Ain.SseSF2SI.src);
1840 mapReg(m, &i->Ain.SseSF2SI.dst);
1841 return;
sewardj8d965312005-02-25 02:48:47 +00001842 case Ain_SseSDSS:
1843 mapReg(m, &i->Ain.SseSDSS.src);
1844 mapReg(m, &i->Ain.SseSDSS.dst);
1845 return;
sewardj1001dc42005-02-21 08:25:55 +00001846 case Ain_SseLdSt:
1847 mapReg(m, &i->Ain.SseLdSt.reg);
1848 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1849 break;
sewardj70dbeb02015-08-12 11:15:53 +00001850 case Ain_SseCStore:
1851 mapRegs_AMD64AMode(m, i->Ain.SseCStore.addr);
1852 mapReg(m, &i->Ain.SseCStore.src);
1853 return;
1854 case Ain_SseCLoad:
1855 mapRegs_AMD64AMode(m, i->Ain.SseCLoad.addr);
1856 mapReg(m, &i->Ain.SseCLoad.dst);
1857 return;
sewardj1001dc42005-02-21 08:25:55 +00001858 case Ain_SseLdzLO:
1859 mapReg(m, &i->Ain.SseLdzLO.reg);
1860 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1861 break;
sewardj8d965312005-02-25 02:48:47 +00001862 case Ain_Sse32Fx4:
1863 mapReg(m, &i->Ain.Sse32Fx4.src);
1864 mapReg(m, &i->Ain.Sse32Fx4.dst);
1865 return;
1866 case Ain_Sse32FLo:
1867 mapReg(m, &i->Ain.Sse32FLo.src);
1868 mapReg(m, &i->Ain.Sse32FLo.dst);
1869 return;
sewardj4c328cf2005-05-05 12:05:54 +00001870 case Ain_Sse64Fx2:
1871 mapReg(m, &i->Ain.Sse64Fx2.src);
1872 mapReg(m, &i->Ain.Sse64Fx2.dst);
1873 return;
sewardj1001dc42005-02-21 08:25:55 +00001874 case Ain_Sse64FLo:
1875 mapReg(m, &i->Ain.Sse64FLo.src);
1876 mapReg(m, &i->Ain.Sse64FLo.dst);
1877 return;
1878 case Ain_SseReRg:
1879 mapReg(m, &i->Ain.SseReRg.src);
1880 mapReg(m, &i->Ain.SseReRg.dst);
1881 return;
sewardj8d965312005-02-25 02:48:47 +00001882 case Ain_SseCMov:
1883 mapReg(m, &i->Ain.SseCMov.src);
1884 mapReg(m, &i->Ain.SseCMov.dst);
1885 return;
sewardj09717342005-05-05 21:34:02 +00001886 case Ain_SseShuf:
1887 mapReg(m, &i->Ain.SseShuf.src);
1888 mapReg(m, &i->Ain.SseShuf.dst);
1889 return;
sewardj3616a2e2012-05-27 16:18:13 +00001890 //uu case Ain_AvxLdSt:
1891 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1892 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1893 //uu break;
1894 //uu case Ain_AvxReRg:
1895 //uu mapReg(m, &i->Ain.AvxReRg.src);
1896 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1897 //uu return;
sewardjc6f970f2012-04-02 21:54:49 +00001898 case Ain_EvCheck:
1899 /* We expect both amodes only to mention %rbp, so this is in
1900 fact pointless, since %rbp isn't allocatable, but anyway.. */
1901 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1902 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1903 return;
1904 case Ain_ProfInc:
1905 /* hardwires r11 -- nothing to modify. */
1906 return;
sewardjc33671d2005-02-01 20:30:00 +00001907 default:
cerion92b64362005-12-13 12:02:26 +00001908 ppAMD64Instr(i, mode64);
sewardjc33671d2005-02-01 20:30:00 +00001909 vpanic("mapRegs_AMD64Instr");
1910 }
1911}
1912
1913/* Figure out if i represents a reg-reg move, and if so assign the
1914 source and destination to *src and *dst. If in doubt say No. Used
1915 by the register allocator to do move coalescing.
1916*/
floriand8c64e02014-10-08 08:54:44 +00001917Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
sewardjc33671d2005-02-01 20:30:00 +00001918{
sewardjc4530ae2012-05-21 10:18:49 +00001919 switch (i->tag) {
1920 case Ain_Alu64R:
1921 /* Moves between integer regs */
1922 if (i->Ain.Alu64R.op != Aalu_MOV)
1923 return False;
1924 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1925 return False;
1926 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1927 *dst = i->Ain.Alu64R.dst;
1928 return True;
1929 case Ain_SseReRg:
1930 /* Moves between SSE regs */
1931 if (i->Ain.SseReRg.op != Asse_MOV)
1932 return False;
1933 *src = i->Ain.SseReRg.src;
1934 *dst = i->Ain.SseReRg.dst;
1935 return True;
sewardj3616a2e2012-05-27 16:18:13 +00001936 //uu case Ain_AvxReRg:
1937 //uu /* Moves between AVX regs */
1938 //uu if (i->Ain.AvxReRg.op != Asse_MOV)
1939 //uu return False;
1940 //uu *src = i->Ain.AvxReRg.src;
1941 //uu *dst = i->Ain.AvxReRg.dst;
1942 //uu return True;
sewardjc4530ae2012-05-21 10:18:49 +00001943 default:
sewardjf67eadf2005-02-03 03:53:52 +00001944 return False;
sewardjf67eadf2005-02-03 03:53:52 +00001945 }
sewardjc4530ae2012-05-21 10:18:49 +00001946 /*NOTREACHED*/
sewardjc33671d2005-02-01 20:30:00 +00001947}
1948
1949
1950/* Generate amd64 spill/reload instructions under the direction of the
1951 register allocator. Note it's critical these don't write the
1952 condition codes. */
1953
sewardj2a1ed8e2009-12-31 19:26:03 +00001954void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1955 HReg rreg, Int offsetB, Bool mode64 )
sewardjd0a12df2005-02-10 02:07:43 +00001956{
1957 AMD64AMode* am;
1958 vassert(offsetB >= 0);
1959 vassert(!hregIsVirtual(rreg));
cerion92b64362005-12-13 12:02:26 +00001960 vassert(mode64 == True);
sewardj2a1ed8e2009-12-31 19:26:03 +00001961 *i1 = *i2 = NULL;
sewardjd0a12df2005-02-10 02:07:43 +00001962 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
sewardjd0a12df2005-02-10 02:07:43 +00001963 switch (hregClass(rreg)) {
1964 case HRcInt64:
sewardj2a1ed8e2009-12-31 19:26:03 +00001965 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1966 return;
sewardj1001dc42005-02-21 08:25:55 +00001967 case HRcVec128:
sewardj2a1ed8e2009-12-31 19:26:03 +00001968 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1969 return;
sewardjd0a12df2005-02-10 02:07:43 +00001970 default:
1971 ppHRegClass(hregClass(rreg));
1972 vpanic("genSpill_AMD64: unimplemented regclass");
1973 }
sewardjc33671d2005-02-01 20:30:00 +00001974}
1975
sewardj2a1ed8e2009-12-31 19:26:03 +00001976void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1977 HReg rreg, Int offsetB, Bool mode64 )
sewardjd0a12df2005-02-10 02:07:43 +00001978{
1979 AMD64AMode* am;
1980 vassert(offsetB >= 0);
1981 vassert(!hregIsVirtual(rreg));
cerion92b64362005-12-13 12:02:26 +00001982 vassert(mode64 == True);
sewardj2a1ed8e2009-12-31 19:26:03 +00001983 *i1 = *i2 = NULL;
sewardjd0a12df2005-02-10 02:07:43 +00001984 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1985 switch (hregClass(rreg)) {
1986 case HRcInt64:
sewardj2a1ed8e2009-12-31 19:26:03 +00001987 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1988 return;
sewardj1001dc42005-02-21 08:25:55 +00001989 case HRcVec128:
sewardj2a1ed8e2009-12-31 19:26:03 +00001990 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1991 return;
sewardjd0a12df2005-02-10 02:07:43 +00001992 default:
1993 ppHRegClass(hregClass(rreg));
1994 vpanic("genReload_AMD64: unimplemented regclass");
1995 }
sewardjc33671d2005-02-01 20:30:00 +00001996}
1997
Elliott Hughesed398002017-06-21 14:41:24 -07001998AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
1999{
2000 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
2001
2002 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
2003 Convert to: src=RMI_Mem, dst=Reg
2004 */
2005 if (i->tag == Ain_Alu64R
2006 && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR
2007 || i->Ain.Alu64R.op == Aalu_XOR)
2008 && i->Ain.Alu64R.src->tag == Armi_Reg
2009 && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) {
2010 vassert(! sameHReg(i->Ain.Alu64R.dst, vreg));
2011 return AMD64Instr_Alu64R(
2012 i->Ain.Alu64R.op,
2013 AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())),
2014 i->Ain.Alu64R.dst
2015 );
2016 }
2017
2018 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
2019 Convert to: src=RI_Imm, dst=Mem
2020 */
2021 if (i->tag == Ain_Alu64R
2022 && (i->Ain.Alu64R.op == Aalu_CMP)
2023 && i->Ain.Alu64R.src->tag == Armi_Imm
2024 && sameHReg(i->Ain.Alu64R.dst, vreg)) {
2025 return AMD64Instr_Alu64M(
2026 i->Ain.Alu64R.op,
2027 AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ),
2028 AMD64AMode_IR( spill_off, hregAMD64_RBP())
2029 );
2030 }
2031
2032 return NULL;
2033}
2034
sewardjc33671d2005-02-01 20:30:00 +00002035
sewardj813ce9e2005-02-04 21:16:48 +00002036/* --------- The amd64 assembler (bleh.) --------- */
2037
2038/* Produce the low three bits of an integer register number. */
sewardja5b50222015-03-26 07:18:32 +00002039inline static UInt iregEnc210 ( HReg r )
sewardj813ce9e2005-02-04 21:16:48 +00002040{
2041 UInt n;
2042 vassert(hregClass(r) == HRcInt64);
2043 vassert(!hregIsVirtual(r));
sewardja5b50222015-03-26 07:18:32 +00002044 n = hregEncoding(r);
sewardj813ce9e2005-02-04 21:16:48 +00002045 vassert(n <= 15);
sewardja5b50222015-03-26 07:18:32 +00002046 return n & 7;
sewardj813ce9e2005-02-04 21:16:48 +00002047}
2048
2049/* Produce bit 3 of an integer register number. */
sewardja5b50222015-03-26 07:18:32 +00002050inline static UInt iregEnc3 ( HReg r )
sewardj813ce9e2005-02-04 21:16:48 +00002051{
2052 UInt n;
2053 vassert(hregClass(r) == HRcInt64);
2054 vassert(!hregIsVirtual(r));
sewardja5b50222015-03-26 07:18:32 +00002055 n = hregEncoding(r);
sewardj813ce9e2005-02-04 21:16:48 +00002056 vassert(n <= 15);
sewardja5b50222015-03-26 07:18:32 +00002057 return (n >> 3) & 1;
sewardj813ce9e2005-02-04 21:16:48 +00002058}
2059
sewardjdc2ca892005-04-07 02:01:23 +00002060/* Produce a complete 4-bit integer register number. */
sewardja5b50222015-03-26 07:18:32 +00002061inline static UInt iregEnc3210 ( HReg r )
sewardjdc2ca892005-04-07 02:01:23 +00002062{
2063 UInt n;
2064 vassert(hregClass(r) == HRcInt64);
2065 vassert(!hregIsVirtual(r));
sewardja5b50222015-03-26 07:18:32 +00002066 n = hregEncoding(r);
sewardjdc2ca892005-04-07 02:01:23 +00002067 vassert(n <= 15);
sewardja5b50222015-03-26 07:18:32 +00002068 return n;
sewardjdc2ca892005-04-07 02:01:23 +00002069}
2070
sewardja5b50222015-03-26 07:18:32 +00002071/* Produce a complete 4-bit integer register number. */
2072inline static UInt vregEnc3210 ( HReg r )
sewardj1001dc42005-02-21 08:25:55 +00002073{
2074 UInt n;
2075 vassert(hregClass(r) == HRcVec128);
2076 vassert(!hregIsVirtual(r));
sewardja5b50222015-03-26 07:18:32 +00002077 n = hregEncoding(r);
sewardj1001dc42005-02-21 08:25:55 +00002078 vassert(n <= 15);
sewardja5b50222015-03-26 07:18:32 +00002079 return n;
sewardj1001dc42005-02-21 08:25:55 +00002080}
sewardj813ce9e2005-02-04 21:16:48 +00002081
sewardja5b50222015-03-26 07:18:32 +00002082inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
sewardj813ce9e2005-02-04 21:16:48 +00002083{
sewardjb5e7ced2013-01-24 08:55:25 +00002084 vassert(mod < 4);
2085 vassert((reg|regmem) < 8);
sewardja5b50222015-03-26 07:18:32 +00002086 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
sewardj813ce9e2005-02-04 21:16:48 +00002087}
2088
sewardja5b50222015-03-26 07:18:32 +00002089inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
sewardj813ce9e2005-02-04 21:16:48 +00002090{
sewardjb5e7ced2013-01-24 08:55:25 +00002091 vassert(shift < 4);
2092 vassert((regindex|regbase) < 8);
sewardja5b50222015-03-26 07:18:32 +00002093 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
sewardj813ce9e2005-02-04 21:16:48 +00002094}
2095
2096static UChar* emit32 ( UChar* p, UInt w32 )
2097{
sewardj03ccf852005-03-21 02:47:42 +00002098 *p++ = toUChar((w32) & 0x000000FF);
2099 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2100 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2101 *p++ = toUChar((w32 >> 24) & 0x000000FF);
sewardj813ce9e2005-02-04 21:16:48 +00002102 return p;
2103}
2104
sewardj1b8d58e2005-02-05 14:34:18 +00002105static UChar* emit64 ( UChar* p, ULong w64 )
2106{
sewardj03ccf852005-03-21 02:47:42 +00002107 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2108 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
sewardj1b8d58e2005-02-05 14:34:18 +00002109 return p;
2110}
2111
sewardj813ce9e2005-02-04 21:16:48 +00002112/* Does a sign-extend of the lowest 8 bits give
2113 the original number? */
2114static Bool fits8bits ( UInt w32 )
2115{
2116 Int i32 = (Int)w32;
florian108e03f2015-03-10 16:11:58 +00002117 return toBool(i32 == ((Int)(w32 << 24) >> 24));
sewardj813ce9e2005-02-04 21:16:48 +00002118}
sewardj4d77a9c2007-08-25 23:21:08 +00002119/* Can the lower 32 bits be signedly widened to produce the whole
2120 64-bit value? In other words, are the top 33 bits either all 0 or
2121 all 1 ? */
2122static Bool fitsIn32Bits ( ULong x )
2123{
florian108e03f2015-03-10 16:11:58 +00002124 Long y1;
2125 y1 = x << 32;
sewardj4d77a9c2007-08-25 23:21:08 +00002126 y1 >>=/*s*/ 32;
2127 return toBool(x == y1);
2128}
sewardj813ce9e2005-02-04 21:16:48 +00002129
2130
2131/* Forming mod-reg-rm bytes and scale-index-base bytes.
2132
sewardje95b04a2005-02-07 17:47:21 +00002133 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
sewardj813ce9e2005-02-04 21:16:48 +00002134 = 00 greg ereg
2135
sewardje95b04a2005-02-07 17:47:21 +00002136 greg, d8(ereg) | ereg is neither of: RSP R12
sewardj813ce9e2005-02-04 21:16:48 +00002137 = 01 greg ereg, d8
2138
sewardje95b04a2005-02-07 17:47:21 +00002139 greg, d32(ereg) | ereg is neither of: RSP R12
sewardj813ce9e2005-02-04 21:16:48 +00002140 = 10 greg ereg, d32
2141
sewardje95b04a2005-02-07 17:47:21 +00002142 greg, d8(ereg) | ereg is either: RSP R12
2143 = 01 greg 100, 0x24, d8
2144 (lowest bit of rex distinguishes R12/RSP)
2145
sewardj7de0d3c2005-02-13 02:26:41 +00002146 greg, d32(ereg) | ereg is either: RSP R12
2147 = 10 greg 100, 0x24, d32
2148 (lowest bit of rex distinguishes R12/RSP)
sewardj813ce9e2005-02-04 21:16:48 +00002149
2150 -----------------------------------------------
2151
2152 greg, d8(base,index,scale)
2153 | index != RSP
2154 = 01 greg 100, scale index base, d8
2155
2156 greg, d32(base,index,scale)
2157 | index != RSP
2158 = 10 greg 100, scale index base, d32
2159*/
sewardja5b50222015-03-26 07:18:32 +00002160static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
sewardj813ce9e2005-02-04 21:16:48 +00002161{
sewardja5b50222015-03-26 07:18:32 +00002162 UInt gregEnc210 = gregEnc3210 & 7;
sewardj813ce9e2005-02-04 21:16:48 +00002163 if (am->tag == Aam_IR) {
2164 if (am->Aam.IR.imm == 0
florian79efdc62013-02-11 00:47:35 +00002165 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2166 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2167 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2168 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
sewardje95b04a2005-02-07 17:47:21 +00002169 ) {
sewardja5b50222015-03-26 07:18:32 +00002170 *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
sewardj813ce9e2005-02-04 21:16:48 +00002171 return p;
2172 }
2173 if (fits8bits(am->Aam.IR.imm)
florian79efdc62013-02-11 00:47:35 +00002174 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2175 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
sewardje95b04a2005-02-07 17:47:21 +00002176 ) {
sewardja5b50222015-03-26 07:18:32 +00002177 *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
sewardj03ccf852005-03-21 02:47:42 +00002178 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
sewardj813ce9e2005-02-04 21:16:48 +00002179 return p;
2180 }
florian79efdc62013-02-11 00:47:35 +00002181 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2182 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
sewardje95b04a2005-02-07 17:47:21 +00002183 ) {
sewardja5b50222015-03-26 07:18:32 +00002184 *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
sewardj813ce9e2005-02-04 21:16:48 +00002185 p = emit32(p, am->Aam.IR.imm);
2186 return p;
2187 }
florian79efdc62013-02-11 00:47:35 +00002188 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2189 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
sewardj813ce9e2005-02-04 21:16:48 +00002190 && fits8bits(am->Aam.IR.imm)) {
sewardja5b50222015-03-26 07:18:32 +00002191 *p++ = mkModRegRM(1, gregEnc210, 4);
sewardj813ce9e2005-02-04 21:16:48 +00002192 *p++ = 0x24;
sewardj03ccf852005-03-21 02:47:42 +00002193 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
sewardj813ce9e2005-02-04 21:16:48 +00002194 return p;
2195 }
florian79efdc62013-02-11 00:47:35 +00002196 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
sewardja5b50222015-03-26 07:18:32 +00002197 || wait for test case for RSP case */
florian79efdc62013-02-11 00:47:35 +00002198 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
sewardja5b50222015-03-26 07:18:32 +00002199 *p++ = mkModRegRM(2, gregEnc210, 4);
sewardj7de0d3c2005-02-13 02:26:41 +00002200 *p++ = 0x24;
2201 p = emit32(p, am->Aam.IR.imm);
2202 return p;
2203 }
sewardj813ce9e2005-02-04 21:16:48 +00002204 ppAMD64AMode(am);
2205 vpanic("doAMode_M: can't emit amode IR");
2206 /*NOTREACHED*/
2207 }
2208 if (am->tag == Aam_IRRS) {
2209 if (fits8bits(am->Aam.IRRS.imm)
florian79efdc62013-02-11 00:47:35 +00002210 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
sewardja5b50222015-03-26 07:18:32 +00002211 *p++ = mkModRegRM(1, gregEnc210, 4);
2212 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2213 iregEnc210(am->Aam.IRRS.base));
sewardj03ccf852005-03-21 02:47:42 +00002214 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
sewardj813ce9e2005-02-04 21:16:48 +00002215 return p;
2216 }
florian79efdc62013-02-11 00:47:35 +00002217 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
sewardja5b50222015-03-26 07:18:32 +00002218 *p++ = mkModRegRM(2, gregEnc210, 4);
2219 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2220 iregEnc210(am->Aam.IRRS.base));
sewardj813ce9e2005-02-04 21:16:48 +00002221 p = emit32(p, am->Aam.IRRS.imm);
2222 return p;
2223 }
2224 ppAMD64AMode(am);
2225 vpanic("doAMode_M: can't emit amode IRRS");
2226 /*NOTREACHED*/
2227 }
2228 vpanic("doAMode_M: unknown amode");
2229 /*NOTREACHED*/
2230}
2231
sewardja5b50222015-03-26 07:18:32 +00002232static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2233{
2234 return doAMode_M__wrk(p, iregEnc3210(greg), am);
2235}
2236
2237static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2238{
2239 vassert(gregEnc3210 < 16);
2240 return doAMode_M__wrk(p, gregEnc3210, am);
2241}
2242
sewardj813ce9e2005-02-04 21:16:48 +00002243
2244/* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
sewardja5b50222015-03-26 07:18:32 +00002245inline
2246static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
sewardj813ce9e2005-02-04 21:16:48 +00002247{
sewardja5b50222015-03-26 07:18:32 +00002248 *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
sewardj813ce9e2005-02-04 21:16:48 +00002249 return p;
2250}
2251
sewardja5b50222015-03-26 07:18:32 +00002252static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2253{
2254 return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
2255}
2256
2257static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
2258{
2259 vassert(gregEnc3210 < 16);
2260 return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
2261}
2262
2263static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
2264{
2265 vassert(eregEnc3210 < 16);
2266 return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
2267}
2268
2269static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2270{
2271 vassert( (gregEnc3210|eregEnc3210) < 16);
2272 return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
2273}
2274
sewardj813ce9e2005-02-04 21:16:48 +00002275
sewardj549e0642005-02-05 12:00:14 +00002276/* Clear the W bit on a REX byte, thereby changing the operand size
2277 back to whatever that instruction's default operand size is. */
2278static inline UChar clearWBit ( UChar rex )
2279{
sewardja5b50222015-03-26 07:18:32 +00002280 return rex & ~(1<<3);
sewardj549e0642005-02-05 12:00:14 +00002281}
2282
2283
2284/* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
sewardja5b50222015-03-26 07:18:32 +00002285inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
sewardj813ce9e2005-02-04 21:16:48 +00002286{
2287 if (am->tag == Aam_IR) {
2288 UChar W = 1; /* we want 64-bit mode */
sewardja5b50222015-03-26 07:18:32 +00002289 UChar R = (gregEnc3210 >> 3) & 1;
sewardj813ce9e2005-02-04 21:16:48 +00002290 UChar X = 0; /* not relevant */
sewardja5b50222015-03-26 07:18:32 +00002291 UChar B = iregEnc3(am->Aam.IR.reg);
2292 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
sewardj813ce9e2005-02-04 21:16:48 +00002293 }
2294 if (am->tag == Aam_IRRS) {
2295 UChar W = 1; /* we want 64-bit mode */
sewardja5b50222015-03-26 07:18:32 +00002296 UChar R = (gregEnc3210 >> 3) & 1;
2297 UChar X = iregEnc3(am->Aam.IRRS.index);
2298 UChar B = iregEnc3(am->Aam.IRRS.base);
2299 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
sewardj813ce9e2005-02-04 21:16:48 +00002300 }
2301 vassert(0);
sewardj03ccf852005-03-21 02:47:42 +00002302 return 0; /*NOTREACHED*/
sewardj813ce9e2005-02-04 21:16:48 +00002303}
2304
sewardja5b50222015-03-26 07:18:32 +00002305static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2306{
2307 return rexAMode_M__wrk(iregEnc3210(greg), am);
2308}
2309
2310static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
2311{
2312 vassert(gregEnc3210 < 16);
2313 return rexAMode_M__wrk(gregEnc3210, am);
2314}
2315
2316
sewardj549e0642005-02-05 12:00:14 +00002317/* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
sewardja5b50222015-03-26 07:18:32 +00002318inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
sewardj549e0642005-02-05 12:00:14 +00002319{
2320 UChar W = 1; /* we want 64-bit mode */
sewardja5b50222015-03-26 07:18:32 +00002321 UChar R = (gregEnc3210 >> 3) & 1;
sewardj549e0642005-02-05 12:00:14 +00002322 UChar X = 0; /* not relevant */
sewardja5b50222015-03-26 07:18:32 +00002323 UChar B = (eregEnc3210 >> 3) & 1;
2324 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2325}
2326
2327static UChar rexAMode_R ( HReg greg, HReg ereg )
2328{
2329 return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
2330}
2331
2332static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
2333{
2334 vassert(gregEnc3210 < 16);
2335 return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
2336}
2337
2338static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
2339{
2340 vassert(eregEnc3210 < 16);
2341 return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
2342}
2343
2344static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
2345{
2346 vassert((gregEnc3210|eregEnc3210) < 16);
2347 return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
sewardj549e0642005-02-05 12:00:14 +00002348}
2349
sewardj813ce9e2005-02-04 21:16:48 +00002350
sewardj3616a2e2012-05-27 16:18:13 +00002351//uu /* May 2012: this VEX prefix stuff is currently unused, but has
2352//uu verified correct (I reckon). Certainly it has been known to
2353//uu produce correct VEX prefixes during testing. */
2354//uu
2355//uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2356//uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2357//uu in verbatim. There's no range checking on the bits. */
2358//uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2359//uu UInt mmmmm, UInt rexW, UInt notVvvv,
2360//uu UInt L, UInt pp )
2361//uu {
2362//uu UChar byte0 = 0;
2363//uu UChar byte1 = 0;
2364//uu UChar byte2 = 0;
2365//uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2366//uu /* 2 byte encoding is possible. */
2367//uu byte0 = 0xC5;
2368//uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2369//uu | (L << 2) | pp;
2370//uu } else {
2371//uu /* 3 byte encoding is needed. */
2372//uu byte0 = 0xC4;
2373//uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2374//uu | ((rexB ^ 1) << 5) | mmmmm;
2375//uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2376//uu }
2377//uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2378//uu }
2379//uu
2380//uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2381//uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2382//uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2383//uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2384//uu vvvv=1111 (unused 3rd reg). */
2385//uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2386//uu {
2387//uu UChar L = 1; /* size = 256 */
2388//uu UChar pp = 0; /* no SIMD prefix */
2389//uu UChar mmmmm = 1; /* 0F */
2390//uu UChar notVvvv = 0; /* unused */
2391//uu UChar rexW = 0;
2392//uu UChar rexR = 0;
2393//uu UChar rexX = 0;
2394//uu UChar rexB = 0;
2395//uu /* Same logic as in rexAMode_M. */
2396//uu if (am->tag == Aam_IR) {
sewardja5b50222015-03-26 07:18:32 +00002397//uu rexR = iregEnc3(greg);
sewardj3616a2e2012-05-27 16:18:13 +00002398//uu rexX = 0; /* not relevant */
sewardja5b50222015-03-26 07:18:32 +00002399//uu rexB = iregEnc3(am->Aam.IR.reg);
sewardj3616a2e2012-05-27 16:18:13 +00002400//uu }
2401//uu else if (am->tag == Aam_IRRS) {
sewardja5b50222015-03-26 07:18:32 +00002402//uu rexR = iregEnc3(greg);
2403//uu rexX = iregEnc3(am->Aam.IRRS.index);
2404//uu rexB = iregEnc3(am->Aam.IRRS.base);
sewardj3616a2e2012-05-27 16:18:13 +00002405//uu } else {
2406//uu vassert(0);
2407//uu }
2408//uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2409//uu }
2410//uu
2411//uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2412//uu {
2413//uu switch (vex & 0xFF) {
2414//uu case 0xC5:
2415//uu *p++ = 0xC5;
2416//uu *p++ = (vex >> 8) & 0xFF;
2417//uu vassert(0 == (vex >> 16));
2418//uu break;
2419//uu case 0xC4:
2420//uu *p++ = 0xC4;
2421//uu *p++ = (vex >> 8) & 0xFF;
2422//uu *p++ = (vex >> 16) & 0xFF;
2423//uu vassert(0 == (vex >> 24));
2424//uu break;
2425//uu default:
2426//uu vassert(0);
2427//uu }
2428//uu return p;
2429//uu }
sewardjc4530ae2012-05-21 10:18:49 +00002430
2431
sewardj25a85812005-05-08 23:03:48 +00002432/* Emit ffree %st(N) */
2433static UChar* do_ffree_st ( UChar* p, Int n )
2434{
2435 vassert(n >= 0 && n <= 7);
2436 *p++ = 0xDD;
2437 *p++ = toUChar(0xC0 + n);
2438 return p;
2439}
2440
sewardjc33671d2005-02-01 20:30:00 +00002441/* Emit an instruction into buf and return the number of bytes used.
2442 Note that buf is not the insn's final place, and therefore it is
sewardjc6f970f2012-04-02 21:54:49 +00002443 imperative to emit position-independent code. If the emitted
2444 instruction was a profiler inc, set *is_profInc to True, else
2445 leave it unchanged. */
sewardjc33671d2005-02-01 20:30:00 +00002446
sewardjc6f970f2012-04-02 21:54:49 +00002447Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
floriand8c64e02014-10-08 08:54:44 +00002448 UChar* buf, Int nbuf, const AMD64Instr* i,
sewardj9b769162014-07-24 12:42:03 +00002449 Bool mode64, VexEndness endness_host,
florian8462d112014-09-24 15:18:09 +00002450 const void* disp_cp_chain_me_to_slowEP,
2451 const void* disp_cp_chain_me_to_fastEP,
2452 const void* disp_cp_xindir,
2453 const void* disp_cp_xassisted )
sewardjc33671d2005-02-01 20:30:00 +00002454{
sewardjc2bcb6f2005-02-07 00:17:12 +00002455 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
sewardj1001dc42005-02-21 08:25:55 +00002456 UInt xtra;
sewardja5bd0af2005-03-24 20:40:12 +00002457 UInt reg;
sewardj1001dc42005-02-21 08:25:55 +00002458 UChar rex;
sewardjc33671d2005-02-01 20:30:00 +00002459 UChar* p = &buf[0];
sewardj549e0642005-02-05 12:00:14 +00002460 UChar* ptmp;
sewardj25a85812005-05-08 23:03:48 +00002461 Int j;
sewardj70dbeb02015-08-12 11:15:53 +00002462 vassert(nbuf >= 64);
cerion92b64362005-12-13 12:02:26 +00002463 vassert(mode64 == True);
sewardj549e0642005-02-05 12:00:14 +00002464
cerion92b64362005-12-13 12:02:26 +00002465 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
sewardjc33671d2005-02-01 20:30:00 +00002466
2467 switch (i->tag) {
2468
sewardj1b8d58e2005-02-05 14:34:18 +00002469 case Ain_Imm64:
sewardj7cf5bd02011-03-22 16:51:38 +00002470 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2471 /* Use the short form (load into 32 bit reg, + default
2472 widening rule) for constants under 1 million. We could
2473 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2474 limit it to a smaller range for verifiability purposes. */
sewardja5b50222015-03-26 07:18:32 +00002475 if (1 & iregEnc3(i->Ain.Imm64.dst))
sewardj7cf5bd02011-03-22 16:51:38 +00002476 *p++ = 0x41;
sewardja5b50222015-03-26 07:18:32 +00002477 *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
sewardj7cf5bd02011-03-22 16:51:38 +00002478 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2479 } else {
sewardja5b50222015-03-26 07:18:32 +00002480 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
2481 *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
sewardj7cf5bd02011-03-22 16:51:38 +00002482 p = emit64(p, i->Ain.Imm64.imm64);
2483 }
sewardj1b8d58e2005-02-05 14:34:18 +00002484 goto done;
2485
sewardj813ce9e2005-02-04 21:16:48 +00002486 case Ain_Alu64R:
2487 /* Deal specially with MOV */
2488 if (i->Ain.Alu64R.op == Aalu_MOV) {
2489 switch (i->Ain.Alu64R.src->tag) {
2490 case Armi_Imm:
sewardj7cf5bd02011-03-22 16:51:38 +00002491 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
sewardj95e154c2009-11-22 23:43:17 +00002492 /* Actually we could use this form for constants in
2493 the range 0 through 0x7FFFFFFF inclusive, but
2494 limit it to a small range for verifiability
2495 purposes. */
2496 /* Generate "movl $imm32, 32-bit-register" and let
2497 the default zero-extend rule cause the upper half
2498 of the dst to be zeroed out too. This saves 1
2499 and sometimes 2 bytes compared to the more
2500 obvious encoding in the 'else' branch. */
sewardja5b50222015-03-26 07:18:32 +00002501 if (1 & iregEnc3(i->Ain.Alu64R.dst))
sewardj95e154c2009-11-22 23:43:17 +00002502 *p++ = 0x41;
sewardja5b50222015-03-26 07:18:32 +00002503 *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
sewardj95e154c2009-11-22 23:43:17 +00002504 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2505 } else {
sewardja5b50222015-03-26 07:18:32 +00002506 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
sewardj95e154c2009-11-22 23:43:17 +00002507 *p++ = 0xC7;
sewardja5b50222015-03-26 07:18:32 +00002508 *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
sewardj95e154c2009-11-22 23:43:17 +00002509 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2510 }
sewardj813ce9e2005-02-04 21:16:48 +00002511 goto done;
2512 case Armi_Reg:
sewardj1b8d58e2005-02-05 14:34:18 +00002513 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2514 i->Ain.Alu64R.dst );
sewardj813ce9e2005-02-04 21:16:48 +00002515 *p++ = 0x89;
2516 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2517 i->Ain.Alu64R.dst);
2518 goto done;
2519 case Armi_Mem:
sewardj549e0642005-02-05 12:00:14 +00002520 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
sewardj813ce9e2005-02-04 21:16:48 +00002521 i->Ain.Alu64R.src->Armi.Mem.am);
2522 *p++ = 0x8B;
2523 p = doAMode_M(p, i->Ain.Alu64R.dst,
2524 i->Ain.Alu64R.src->Armi.Mem.am);
2525 goto done;
2526 default:
2527 goto bad;
2528 }
2529 }
sewardjd0a12df2005-02-10 02:07:43 +00002530 /* MUL */
2531 if (i->Ain.Alu64R.op == Aalu_MUL) {
2532 switch (i->Ain.Alu64R.src->tag) {
sewardj7de0d3c2005-02-13 02:26:41 +00002533 case Armi_Reg:
2534 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2535 i->Ain.Alu64R.src->Armi.Reg.reg);
2536 *p++ = 0x0F;
2537 *p++ = 0xAF;
2538 p = doAMode_R(p, i->Ain.Alu64R.dst,
2539 i->Ain.Alu64R.src->Armi.Reg.reg);
2540 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00002541 case Armi_Mem:
2542 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2543 i->Ain.Alu64R.src->Armi.Mem.am);
2544 *p++ = 0x0F;
2545 *p++ = 0xAF;
2546 p = doAMode_M(p, i->Ain.Alu64R.dst,
2547 i->Ain.Alu64R.src->Armi.Mem.am);
2548 goto done;
sewardj7de0d3c2005-02-13 02:26:41 +00002549 case Armi_Imm:
2550 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2551 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2552 *p++ = 0x6B;
2553 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
sewardj03ccf852005-03-21 02:47:42 +00002554 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
sewardj7de0d3c2005-02-13 02:26:41 +00002555 } else {
2556 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2557 *p++ = 0x69;
2558 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2559 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2560 }
2561 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00002562 default:
2563 goto bad;
2564 }
2565 }
sewardj549e0642005-02-05 12:00:14 +00002566 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2567 opc = opc_rr = subopc_imm = opc_imma = 0;
2568 switch (i->Ain.Alu64R.op) {
2569 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2570 subopc_imm = 2; opc_imma = 0x15; break;
2571 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2572 subopc_imm = 0; opc_imma = 0x05; break;
2573 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2574 subopc_imm = 5; opc_imma = 0x2D; break;
2575 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2576 subopc_imm = 3; opc_imma = 0x1D; break;
2577 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2578 subopc_imm = 4; opc_imma = 0x25; break;
2579 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2580 subopc_imm = 6; opc_imma = 0x35; break;
2581 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2582 subopc_imm = 1; opc_imma = 0x0D; break;
2583 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2584 subopc_imm = 7; opc_imma = 0x3D; break;
2585 default: goto bad;
2586 }
2587 switch (i->Ain.Alu64R.src->tag) {
2588 case Armi_Imm:
florian79efdc62013-02-11 00:47:35 +00002589 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
sewardj549e0642005-02-05 12:00:14 +00002590 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
sewardj18303862005-02-21 12:36:54 +00002591 goto bad; /* FIXME: awaiting test case */
sewardj03ccf852005-03-21 02:47:42 +00002592 *p++ = toUChar(opc_imma);
sewardj549e0642005-02-05 12:00:14 +00002593 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2594 } else
2595 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
sewardja5b50222015-03-26 07:18:32 +00002596 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
sewardj549e0642005-02-05 12:00:14 +00002597 *p++ = 0x83;
sewardja5b50222015-03-26 07:18:32 +00002598 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
sewardj03ccf852005-03-21 02:47:42 +00002599 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
sewardj549e0642005-02-05 12:00:14 +00002600 } else {
sewardja5b50222015-03-26 07:18:32 +00002601 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
sewardj549e0642005-02-05 12:00:14 +00002602 *p++ = 0x81;
sewardja5b50222015-03-26 07:18:32 +00002603 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
sewardj549e0642005-02-05 12:00:14 +00002604 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2605 }
2606 goto done;
2607 case Armi_Reg:
2608 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2609 i->Ain.Alu64R.dst);
sewardj03ccf852005-03-21 02:47:42 +00002610 *p++ = toUChar(opc_rr);
sewardj549e0642005-02-05 12:00:14 +00002611 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2612 i->Ain.Alu64R.dst);
2613 goto done;
2614 case Armi_Mem:
sewardj31191072005-02-05 18:24:47 +00002615 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2616 i->Ain.Alu64R.src->Armi.Mem.am);
sewardj03ccf852005-03-21 02:47:42 +00002617 *p++ = toUChar(opc);
sewardj549e0642005-02-05 12:00:14 +00002618 p = doAMode_M(p, i->Ain.Alu64R.dst,
2619 i->Ain.Alu64R.src->Armi.Mem.am);
2620 goto done;
2621 default:
2622 goto bad;
2623 }
sewardj813ce9e2005-02-04 21:16:48 +00002624 break;
2625
sewardj549e0642005-02-05 12:00:14 +00002626 case Ain_Alu64M:
2627 /* Deal specially with MOV */
2628 if (i->Ain.Alu64M.op == Aalu_MOV) {
2629 switch (i->Ain.Alu64M.src->tag) {
2630 case Ari_Reg:
2631 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2632 i->Ain.Alu64M.dst);
2633 *p++ = 0x89;
2634 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2635 i->Ain.Alu64M.dst);
2636 goto done;
2637 case Ari_Imm:
sewardja5b50222015-03-26 07:18:32 +00002638 *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
sewardj549e0642005-02-05 12:00:14 +00002639 *p++ = 0xC7;
sewardja5b50222015-03-26 07:18:32 +00002640 p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
sewardj549e0642005-02-05 12:00:14 +00002641 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2642 goto done;
2643 default:
2644 goto bad;
2645 }
2646 }
Elliott Hughesed398002017-06-21 14:41:24 -07002647 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2648 allowed here. (This is derived from the x86 version of same). */
2649 opc = subopc_imm = opc_imma = 0;
2650 switch (i->Ain.Alu64M.op) {
2651 case Aalu_CMP: opc = 0x39; subopc_imm = 7; break;
2652 default: goto bad;
2653 }
2654 switch (i->Ain.Alu64M.src->tag) {
2655 /*
2656 case Xri_Reg:
2657 *p++ = toUChar(opc);
2658 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2659 i->Xin.Alu32M.dst);
2660 goto done;
2661 */
2662 case Ari_Imm:
2663 if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) {
2664 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2665 *p++ = 0x83;
2666 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2667 *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32);
2668 goto done;
2669 } else {
2670 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2671 *p++ = 0x81;
2672 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2673 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2674 goto done;
2675 }
2676 default:
2677 goto bad;
2678 }
2679
sewardj549e0642005-02-05 12:00:14 +00002680 break;
2681
sewardj1b8d58e2005-02-05 14:34:18 +00002682 case Ain_Sh64:
2683 opc_cl = opc_imm = subopc = 0;
2684 switch (i->Ain.Sh64.op) {
2685 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2686 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2687 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2688 default: goto bad;
2689 }
2690 if (i->Ain.Sh64.src == 0) {
sewardja5b50222015-03-26 07:18:32 +00002691 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
sewardj03ccf852005-03-21 02:47:42 +00002692 *p++ = toUChar(opc_cl);
sewardja5b50222015-03-26 07:18:32 +00002693 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
sewardj501a3392005-05-11 15:37:50 +00002694 goto done;
sewardj1b8d58e2005-02-05 14:34:18 +00002695 } else {
sewardja5b50222015-03-26 07:18:32 +00002696 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
sewardj03ccf852005-03-21 02:47:42 +00002697 *p++ = toUChar(opc_imm);
sewardja5b50222015-03-26 07:18:32 +00002698 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
sewardj501a3392005-05-11 15:37:50 +00002699 *p++ = (UChar)(i->Ain.Sh64.src);
sewardj1b8d58e2005-02-05 14:34:18 +00002700 goto done;
2701 }
2702 break;
2703
sewardj501a3392005-05-11 15:37:50 +00002704 case Ain_Test64:
2705 /* testq sign-extend($imm32), %reg */
sewardja5b50222015-03-26 07:18:32 +00002706 *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
sewardj501a3392005-05-11 15:37:50 +00002707 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002708 p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
sewardj501a3392005-05-11 15:37:50 +00002709 p = emit32(p, i->Ain.Test64.imm32);
2710 goto done;
2711
sewardjd0a12df2005-02-10 02:07:43 +00002712 case Ain_Unary64:
2713 if (i->Ain.Unary64.op == Aun_NOT) {
sewardja5b50222015-03-26 07:18:32 +00002714 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
sewardj501a3392005-05-11 15:37:50 +00002715 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002716 p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
sewardj501a3392005-05-11 15:37:50 +00002717 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00002718 }
sewardjb5220772005-04-27 11:53:23 +00002719 if (i->Ain.Unary64.op == Aun_NEG) {
sewardja5b50222015-03-26 07:18:32 +00002720 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
sewardj501a3392005-05-11 15:37:50 +00002721 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002722 p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
sewardj501a3392005-05-11 15:37:50 +00002723 goto done;
sewardjb5220772005-04-27 11:53:23 +00002724 }
sewardjd0a12df2005-02-10 02:07:43 +00002725 break;
sewardj9b967672005-02-08 11:13:09 +00002726
sewardj6ce1a232007-03-31 19:12:38 +00002727 case Ain_Lea64:
2728 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2729 *p++ = 0x8D;
2730 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2731 goto done;
2732
sewardj9cc2bbf2011-06-05 17:56:03 +00002733 case Ain_Alu32R:
2734 /* ADD/SUB/AND/OR/XOR/CMP */
2735 opc = opc_rr = subopc_imm = opc_imma = 0;
2736 switch (i->Ain.Alu32R.op) {
2737 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2738 subopc_imm = 0; opc_imma = 0x05; break;
2739 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2740 subopc_imm = 5; opc_imma = 0x2D; break;
2741 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2742 subopc_imm = 4; opc_imma = 0x25; break;
2743 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2744 subopc_imm = 6; opc_imma = 0x35; break;
2745 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2746 subopc_imm = 1; opc_imma = 0x0D; break;
2747 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2748 subopc_imm = 7; opc_imma = 0x3D; break;
2749 default: goto bad;
2750 }
2751 switch (i->Ain.Alu32R.src->tag) {
2752 case Armi_Imm:
florian79efdc62013-02-11 00:47:35 +00002753 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
sewardj9cc2bbf2011-06-05 17:56:03 +00002754 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2755 goto bad; /* FIXME: awaiting test case */
2756 *p++ = toUChar(opc_imma);
2757 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2758 } else
2759 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
sewardja5b50222015-03-26 07:18:32 +00002760 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
sewardj9cc2bbf2011-06-05 17:56:03 +00002761 if (rex != 0x40) *p++ = rex;
2762 *p++ = 0x83;
sewardja5b50222015-03-26 07:18:32 +00002763 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
sewardj9cc2bbf2011-06-05 17:56:03 +00002764 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2765 } else {
sewardja5b50222015-03-26 07:18:32 +00002766 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
sewardj9cc2bbf2011-06-05 17:56:03 +00002767 if (rex != 0x40) *p++ = rex;
2768 *p++ = 0x81;
sewardja5b50222015-03-26 07:18:32 +00002769 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
sewardj9cc2bbf2011-06-05 17:56:03 +00002770 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2771 }
2772 goto done;
2773 case Armi_Reg:
2774 rex = clearWBit(
2775 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2776 i->Ain.Alu32R.dst) );
2777 if (rex != 0x40) *p++ = rex;
2778 *p++ = toUChar(opc_rr);
2779 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2780 i->Ain.Alu32R.dst);
2781 goto done;
2782 case Armi_Mem:
2783 rex = clearWBit(
2784 rexAMode_M( i->Ain.Alu32R.dst,
2785 i->Ain.Alu32R.src->Armi.Mem.am) );
2786 if (rex != 0x40) *p++ = rex;
2787 *p++ = toUChar(opc);
2788 p = doAMode_M(p, i->Ain.Alu32R.dst,
2789 i->Ain.Alu32R.src->Armi.Mem.am);
2790 goto done;
2791 default:
2792 goto bad;
2793 }
2794 break;
2795
sewardj9b967672005-02-08 11:13:09 +00002796 case Ain_MulL:
2797 subopc = i->Ain.MulL.syned ? 5 : 4;
sewardj501a3392005-05-11 15:37:50 +00002798 switch (i->Ain.MulL.src->tag) {
2799 case Arm_Mem:
sewardja5b50222015-03-26 07:18:32 +00002800 *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
sewardj501a3392005-05-11 15:37:50 +00002801 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002802 p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
sewardj501a3392005-05-11 15:37:50 +00002803 goto done;
2804 case Arm_Reg:
sewardja5b50222015-03-26 07:18:32 +00002805 *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
sewardj501a3392005-05-11 15:37:50 +00002806 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002807 p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
sewardj501a3392005-05-11 15:37:50 +00002808 goto done;
2809 default:
2810 goto bad;
sewardj9b967672005-02-08 11:13:09 +00002811 }
2812 break;
2813
sewardj7de0d3c2005-02-13 02:26:41 +00002814 case Ain_Div:
2815 subopc = i->Ain.Div.syned ? 7 : 6;
2816 if (i->Ain.Div.sz == 4) {
2817 switch (i->Ain.Div.src->tag) {
2818 case Arm_Mem:
sewardja6b93d12005-02-17 09:28:28 +00002819 goto bad;
2820 /*FIXME*/
sewardj7de0d3c2005-02-13 02:26:41 +00002821 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002822 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
sewardj7de0d3c2005-02-13 02:26:41 +00002823 goto done;
2824 case Arm_Reg:
2825 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00002826 rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
sewardj7de0d3c2005-02-13 02:26:41 +00002827 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002828 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
sewardj7de0d3c2005-02-13 02:26:41 +00002829 goto done;
2830 default:
2831 goto bad;
2832 }
2833 }
sewardja6b93d12005-02-17 09:28:28 +00002834 if (i->Ain.Div.sz == 8) {
2835 switch (i->Ain.Div.src->tag) {
2836 case Arm_Mem:
sewardja5b50222015-03-26 07:18:32 +00002837 *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
sewardja6b93d12005-02-17 09:28:28 +00002838 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002839 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
sewardja6b93d12005-02-17 09:28:28 +00002840 goto done;
2841 case Arm_Reg:
sewardja5b50222015-03-26 07:18:32 +00002842 *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
sewardja6b93d12005-02-17 09:28:28 +00002843 *p++ = 0xF7;
sewardja5b50222015-03-26 07:18:32 +00002844 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
sewardja6b93d12005-02-17 09:28:28 +00002845 goto done;
2846 default:
2847 goto bad;
2848 }
2849 }
sewardj7de0d3c2005-02-13 02:26:41 +00002850 break;
2851
sewardj1001dc42005-02-21 08:25:55 +00002852 case Ain_Push:
2853 switch (i->Ain.Push.src->tag) {
2854 case Armi_Mem:
2855 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00002856 rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
sewardj1001dc42005-02-21 08:25:55 +00002857 *p++ = 0xFF;
sewardja5b50222015-03-26 07:18:32 +00002858 p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
sewardj1001dc42005-02-21 08:25:55 +00002859 goto done;
sewardj1a01e652005-02-23 11:39:21 +00002860 case Armi_Imm:
2861 *p++ = 0x68;
2862 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2863 goto done;
2864 case Armi_Reg:
sewardja5b50222015-03-26 07:18:32 +00002865 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
2866 *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
sewardj1a01e652005-02-23 11:39:21 +00002867 goto done;
sewardj1001dc42005-02-21 08:25:55 +00002868 default:
2869 goto bad;
2870 }
sewardj1b8d58e2005-02-05 14:34:18 +00002871
sewardj4d77a9c2007-08-25 23:21:08 +00002872 case Ain_Call: {
sewardjbdea5502015-01-27 23:17:02 +00002873 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2874 above, %r11 is used as an address temporary. */
2875 /* If we don't need to do any fixup actions in the case that the
2876 call doesn't happen, just do the simple thing and emit
2877 straight-line code. This is usually the case. */
2878 if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
2879 || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
2880 /* jump over the following two insns if the condition does
2881 not hold */
2882 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2883 if (i->Ain.Call.cond != Acc_ALWAYS) {
2884 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2885 *p++ = shortImm ? 10 : 13;
2886 /* 10 or 13 bytes in the next two insns */
2887 }
2888 if (shortImm) {
2889 /* 7 bytes: movl sign-extend(imm32), %r11 */
2890 *p++ = 0x49;
2891 *p++ = 0xC7;
2892 *p++ = 0xC3;
2893 p = emit32(p, (UInt)i->Ain.Call.target);
2894 } else {
2895 /* 10 bytes: movabsq $target, %r11 */
2896 *p++ = 0x49;
2897 *p++ = 0xBB;
2898 p = emit64(p, i->Ain.Call.target);
2899 }
2900 /* 3 bytes: call *%r11 */
2901 *p++ = 0x41;
2902 *p++ = 0xFF;
2903 *p++ = 0xD3;
sewardj4d77a9c2007-08-25 23:21:08 +00002904 } else {
sewardjbdea5502015-01-27 23:17:02 +00002905 Int delta;
2906 /* Complex case. We have to generate an if-then-else diamond. */
2907 // before:
2908 // j{!cond} else:
2909 // movabsq $target, %r11
2910 // call* %r11
2911 // preElse:
2912 // jmp after:
2913 // else:
2914 // movabsq $0x5555555555555555, %rax // possibly
2915 // movq %rax, %rdx // possibly
2916 // after:
2917
2918 // before:
2919 UChar* pBefore = p;
2920
2921 // j{!cond} else:
2922 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2923 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2924
2925 // movabsq $target, %r11
sewardj4d77a9c2007-08-25 23:21:08 +00002926 *p++ = 0x49;
2927 *p++ = 0xBB;
2928 p = emit64(p, i->Ain.Call.target);
sewardjbdea5502015-01-27 23:17:02 +00002929
2930 // call* %r11
2931 *p++ = 0x41;
2932 *p++ = 0xFF;
2933 *p++ = 0xD3;
2934
2935 // preElse:
2936 UChar* pPreElse = p;
2937
2938 // jmp after:
2939 *p++ = 0xEB;
2940 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2941
2942 // else:
2943 UChar* pElse = p;
2944
2945 /* Do the 'else' actions */
2946 switch (i->Ain.Call.rloc.pri) {
2947 case RLPri_Int:
2948 // movabsq $0x5555555555555555, %rax
2949 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
2950 break;
2951 case RLPri_2Int:
sewardj70dbeb02015-08-12 11:15:53 +00002952 goto bad; //ATC
sewardjbdea5502015-01-27 23:17:02 +00002953 // movabsq $0x5555555555555555, %rax
2954 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
2955 // movq %rax, %rdx
2956 *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
sewardj70dbeb02015-08-12 11:15:53 +00002957 break;
2958 case RLPri_V128SpRel:
2959 if (i->Ain.Call.rloc.spOff == 0) {
2960 // We could accept any |spOff| here, but that's more
2961 // hassle and the only value we're ever going to get
2962 // is zero (I believe.) Hence take the easy path :)
2963 // We need a scag register -- r11 can be it.
2964 // movabsq $0x5555555555555555, %r11
2965 *p++ = 0x49; *p++ = 0xBB;
2966 p = emit64(p, 0x5555555555555555ULL);
2967 // movq %r11, 0(%rsp)
2968 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x1C; *p++ = 0x24;
2969 // movq %r11, 8(%rsp)
2970 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x5C; *p++ = 0x24;
2971 *p++ = 0x08;
2972 break;
2973 }
2974 goto bad; //ATC for all other spOff values
2975 case RLPri_V256SpRel:
2976 goto bad; //ATC
sewardjbdea5502015-01-27 23:17:02 +00002977 case RLPri_None: case RLPri_INVALID: default:
sewardj70dbeb02015-08-12 11:15:53 +00002978 vassert(0); // should never get here
sewardjbdea5502015-01-27 23:17:02 +00002979 }
2980
2981 // after:
2982 UChar* pAfter = p;
2983
2984 // Fix up the branch offsets. The +2s in the offset
2985 // calculations are there because x86 requires conditional
2986 // branches to have their offset stated relative to the
2987 // instruction immediately following the branch insn. And in
2988 // both cases the branch insns are 2 bytes long.
2989
2990 // First, the "j{!cond} else:" at pBefore.
2991 delta = (Int)(Long)(pElse - (pBefore + 2));
2992 vassert(delta >= 0 && delta < 100/*arbitrary*/);
2993 *(pBefore+1) = (UChar)delta;
2994
2995 // And secondly, the "jmp after:" at pPreElse.
2996 delta = (Int)(Long)(pAfter - (pPreElse + 2));
2997 vassert(delta >= 0 && delta < 100/*arbitrary*/);
2998 *(pPreElse+1) = (UChar)delta;
sewardj4d77a9c2007-08-25 23:21:08 +00002999 }
sewardj1b8d58e2005-02-05 14:34:18 +00003000 goto done;
sewardj4d77a9c2007-08-25 23:21:08 +00003001 }
sewardj549e0642005-02-05 12:00:14 +00003002
sewardjc6f970f2012-04-02 21:54:49 +00003003 case Ain_XDirect: {
3004 /* NB: what goes on here has to be very closely coordinated with the
3005 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
3006 /* We're generating chain-me requests here, so we need to be
3007 sure this is actually allowed -- no-redir translations can't
3008 use chain-me's. Hence: */
3009 vassert(disp_cp_chain_me_to_slowEP != NULL);
3010 vassert(disp_cp_chain_me_to_fastEP != NULL);
3011
3012 HReg r11 = hregAMD64_R11();
sewardj010ac542011-05-29 09:29:18 +00003013
sewardj549e0642005-02-05 12:00:14 +00003014 /* Use ptmp for backpatching conditional jumps. */
3015 ptmp = NULL;
3016
3017 /* First off, if this is conditional, create a conditional
3018 jump over the rest of it. */
sewardjc6f970f2012-04-02 21:54:49 +00003019 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
sewardj549e0642005-02-05 12:00:14 +00003020 /* jmp fwds if !condition */
sewardjc6f970f2012-04-02 21:54:49 +00003021 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
sewardj549e0642005-02-05 12:00:14 +00003022 ptmp = p; /* fill in this bit later */
3023 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3024 }
3025
sewardjc6f970f2012-04-02 21:54:49 +00003026 /* Update the guest RIP. */
sewardj3e8ba602012-04-21 08:18:02 +00003027 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
3028 /* use a shorter encoding */
3029 /* movl sign-extend(dstGA), %r11 */
3030 *p++ = 0x49;
3031 *p++ = 0xC7;
3032 *p++ = 0xC3;
3033 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
3034 } else {
3035 /* movabsq $dstGA, %r11 */
3036 *p++ = 0x49;
3037 *p++ = 0xBB;
3038 p = emit64(p, i->Ain.XDirect.dstGA);
3039 }
3040
sewardjc6f970f2012-04-02 21:54:49 +00003041 /* movq %r11, amRIP */
3042 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
3043 *p++ = 0x89;
3044 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
sewardj010ac542011-05-29 09:29:18 +00003045
sewardjc6f970f2012-04-02 21:54:49 +00003046 /* --- FIRST PATCHABLE BYTE follows --- */
3047 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
3048 to) backs up the return address, so as to find the address of
3049 the first patchable byte. So: don't change the length of the
3050 two instructions below. */
3051 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
3052 *p++ = 0x49;
3053 *p++ = 0xBB;
florian8462d112014-09-24 15:18:09 +00003054 const void* disp_cp_chain_me
sewardjc6f970f2012-04-02 21:54:49 +00003055 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3056 : disp_cp_chain_me_to_slowEP;
florian93a09742015-01-07 20:14:48 +00003057 p = emit64(p, (Addr)disp_cp_chain_me);
sewardjc6f970f2012-04-02 21:54:49 +00003058 /* call *%r11 */
3059 *p++ = 0x41;
sewardj0528bb52005-12-15 15:45:20 +00003060 *p++ = 0xFF;
sewardjc6f970f2012-04-02 21:54:49 +00003061 *p++ = 0xD3;
3062 /* --- END of PATCHABLE BYTES --- */
sewardj549e0642005-02-05 12:00:14 +00003063
3064 /* Fix up the conditional jump, if there was one. */
sewardjc6f970f2012-04-02 21:54:49 +00003065 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
sewardj549e0642005-02-05 12:00:14 +00003066 Int delta = p - ptmp;
sewardjc6f970f2012-04-02 21:54:49 +00003067 vassert(delta > 0 && delta < 40);
3068 *ptmp = toUChar(delta-1);
3069 }
3070 goto done;
3071 }
3072
3073 case Ain_XIndir: {
3074 /* We're generating transfers that could lead indirectly to a
3075 chain-me, so we need to be sure this is actually allowed --
3076 no-redir translations are not allowed to reach normal
3077 translations without going through the scheduler. That means
3078 no XDirects or XIndirs out from no-redir translations.
3079 Hence: */
3080 vassert(disp_cp_xindir != NULL);
3081
3082 /* Use ptmp for backpatching conditional jumps. */
3083 ptmp = NULL;
3084
3085 /* First off, if this is conditional, create a conditional
3086 jump over the rest of it. */
3087 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3088 /* jmp fwds if !condition */
3089 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
3090 ptmp = p; /* fill in this bit later */
3091 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3092 }
3093
3094 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3095 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3096 *p++ = 0x89;
3097 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
sewardj3e8ba602012-04-21 08:18:02 +00003098
3099 /* get $disp_cp_xindir into %r11 */
florian93a09742015-01-07 20:14:48 +00003100 if (fitsIn32Bits((Addr)disp_cp_xindir)) {
sewardj3e8ba602012-04-21 08:18:02 +00003101 /* use a shorter encoding */
3102 /* movl sign-extend(disp_cp_xindir), %r11 */
3103 *p++ = 0x49;
3104 *p++ = 0xC7;
3105 *p++ = 0xC3;
florian93a09742015-01-07 20:14:48 +00003106 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
sewardj3e8ba602012-04-21 08:18:02 +00003107 } else {
3108 /* movabsq $disp_cp_xindir, %r11 */
3109 *p++ = 0x49;
3110 *p++ = 0xBB;
florian93a09742015-01-07 20:14:48 +00003111 p = emit64(p, (Addr)disp_cp_xindir);
sewardj3e8ba602012-04-21 08:18:02 +00003112 }
3113
sewardjc6f970f2012-04-02 21:54:49 +00003114 /* jmp *%r11 */
3115 *p++ = 0x41;
3116 *p++ = 0xFF;
3117 *p++ = 0xE3;
3118
3119 /* Fix up the conditional jump, if there was one. */
3120 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3121 Int delta = p - ptmp;
3122 vassert(delta > 0 && delta < 40);
3123 *ptmp = toUChar(delta-1);
3124 }
3125 goto done;
3126 }
3127
3128 case Ain_XAssisted: {
3129 /* Use ptmp for backpatching conditional jumps. */
3130 ptmp = NULL;
3131
3132 /* First off, if this is conditional, create a conditional
3133 jump over the rest of it. */
3134 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3135 /* jmp fwds if !condition */
3136 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
3137 ptmp = p; /* fill in this bit later */
3138 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3139 }
3140
3141 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3142 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3143 *p++ = 0x89;
3144 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3145 /* movl $magic_number, %ebp. Since these numbers are all small positive
3146 integers, we can get away with "movl $N, %ebp" rather than
3147 the longer "movq $N, %rbp". */
3148 UInt trcval = 0;
3149 switch (i->Ain.XAssisted.jk) {
3150 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3151 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3152 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
sewardj3e5d82d2015-07-21 14:43:23 +00003153 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
sewardjc6f970f2012-04-02 21:54:49 +00003154 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3155 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3156 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3157 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
sewardj05f5e012014-05-04 10:52:11 +00003158 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
sewardjc6f970f2012-04-02 21:54:49 +00003159 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3160 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3161 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3162 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3163 /* We don't expect to see the following being assisted. */
3164 case Ijk_Ret:
3165 case Ijk_Call:
3166 /* fallthrough */
3167 default:
3168 ppIRJumpKind(i->Ain.XAssisted.jk);
3169 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3170 }
3171 vassert(trcval != 0);
3172 *p++ = 0xBD;
3173 p = emit32(p, trcval);
3174 /* movabsq $disp_assisted, %r11 */
3175 *p++ = 0x49;
3176 *p++ = 0xBB;
florian93a09742015-01-07 20:14:48 +00003177 p = emit64(p, (Addr)disp_cp_xassisted);
sewardjc6f970f2012-04-02 21:54:49 +00003178 /* jmp *%r11 */
3179 *p++ = 0x41;
3180 *p++ = 0xFF;
3181 *p++ = 0xE3;
3182
3183 /* Fix up the conditional jump, if there was one. */
3184 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3185 Int delta = p - ptmp;
3186 vassert(delta > 0 && delta < 40);
sewardj03ccf852005-03-21 02:47:42 +00003187 *ptmp = toUChar(delta-1);
sewardj549e0642005-02-05 12:00:14 +00003188 }
3189 goto done;
sewardj010ac542011-05-29 09:29:18 +00003190 }
sewardj549e0642005-02-05 12:00:14 +00003191
sewardj1b8d58e2005-02-05 14:34:18 +00003192 case Ain_CMov64:
3193 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
sewardje357c672015-01-27 23:35:58 +00003194 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src);
3195 *p++ = 0x0F;
3196 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
3197 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src);
3198 goto done;
sewardj549e0642005-02-05 12:00:14 +00003199
sewardjbdea5502015-01-27 23:17:02 +00003200 case Ain_CLoad: {
3201 vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
3202
3203 /* Only 32- or 64-bit variants are allowed. */
3204 vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
3205
3206 /* Use ptmp for backpatching conditional jumps. */
3207 ptmp = NULL;
3208
3209 /* jmp fwds if !condition */
3210 *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
3211 ptmp = p; /* fill in this bit later */
3212 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3213
3214 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3215 load, which, by the default zero-extension rule, zeroes out
3216 the upper half of the destination, as required. */
3217 rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3218 *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
3219 *p++ = 0x8B;
3220 p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3221
3222 /* Fix up the conditional branch */
3223 Int delta = p - ptmp;
3224 vassert(delta > 0 && delta < 40);
3225 *ptmp = toUChar(delta-1);
3226 goto done;
3227 }
3228
sewardj6f1ec582015-01-28 10:52:36 +00003229 case Ain_CStore: {
sewardj70dbeb02015-08-12 11:15:53 +00003230 /* AFAICS this is identical to Ain_CLoad except that the opcode
sewardj6f1ec582015-01-28 10:52:36 +00003231 is 0x89 instead of 0x8B. */
3232 vassert(i->Ain.CStore.cond != Acc_ALWAYS);
3233
3234 /* Only 32- or 64-bit variants are allowed. */
3235 vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
3236
3237 /* Use ptmp for backpatching conditional jumps. */
3238 ptmp = NULL;
3239
3240 /* jmp fwds if !condition */
3241 *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
3242 ptmp = p; /* fill in this bit later */
3243 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3244
3245 /* Now the store. */
3246 rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
3247 *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
3248 *p++ = 0x89;
3249 p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
3250
3251 /* Fix up the conditional branch */
3252 Int delta = p - ptmp;
3253 vassert(delta > 0 && delta < 40);
3254 *ptmp = toUChar(delta-1);
3255 goto done;
3256 }
3257
sewardjca257bc2010-09-08 08:34:52 +00003258 case Ain_MovxLQ:
3259 /* No, _don't_ ask me why the sense of the args has to be
3260 different in the S vs Z case. I don't know. */
3261 if (i->Ain.MovxLQ.syned) {
3262 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3263 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3264 *p++ = 0x63;
3265 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3266 } else {
3267 /* Produce a 32-bit reg-reg move, since the implicit
3268 zero-extend does what we want. */
3269 *p++ = clearWBit (
3270 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
3271 *p++ = 0x89;
3272 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
3273 }
sewardj549e0642005-02-05 12:00:14 +00003274 goto done;
3275
3276 case Ain_LoadEX:
sewardj1b8d58e2005-02-05 14:34:18 +00003277 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
3278 /* movzbq */
3279 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3280 *p++ = 0x0F;
3281 *p++ = 0xB6;
3282 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3283 goto done;
3284 }
3285 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
3286 /* movzwq */
3287 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3288 *p++ = 0x0F;
3289 *p++ = 0xB7;
3290 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3291 goto done;
3292 }
sewardj549e0642005-02-05 12:00:14 +00003293 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
3294 /* movzlq */
sewardj1b8d58e2005-02-05 14:34:18 +00003295 /* This isn't really an existing AMD64 instruction per se.
sewardj549e0642005-02-05 12:00:14 +00003296 Rather, we have to do a 32-bit load. Because a 32-bit
3297 write implicitly clears the upper 32 bits of the target
3298 register, we get what we want. */
3299 *p++ = clearWBit(
3300 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
3301 *p++ = 0x8B;
3302 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3303 goto done;
3304 }
3305 break;
3306
sewardja5bd0af2005-03-24 20:40:12 +00003307 case Ain_Set64:
3308 /* Make the destination register be 1 or 0, depending on whether
3309 the relevant condition holds. Complication: the top 56 bits
3310 of the destination should be forced to zero, but doing 'xorq
3311 %r,%r' kills the flag(s) we are about to read. Sigh. So
3312 start off my moving $0 into the dest. */
sewardja5b50222015-03-26 07:18:32 +00003313 reg = iregEnc3210(i->Ain.Set64.dst);
sewardja5bd0af2005-03-24 20:40:12 +00003314 vassert(reg < 16);
3315
3316 /* movq $0, %dst */
3317 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
3318 *p++ = 0xC7;
3319 *p++ = toUChar(0xC0 + (reg & 7));
3320 p = emit32(p, 0);
3321
3322 /* setb lo8(%dst) */
3323 /* note, 8-bit register rex trickyness. Be careful here. */
3324 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
3325 *p++ = 0x0F;
3326 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
3327 *p++ = toUChar(0xC0 + (reg & 7));
3328 goto done;
3329
sewardjf53b7352005-04-06 20:01:56 +00003330 case Ain_Bsfr64:
3331 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3332 *p++ = 0x0F;
3333 if (i->Ain.Bsfr64.isFwds) {
3334 *p++ = 0xBC;
3335 } else {
3336 *p++ = 0xBD;
3337 }
3338 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3339 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00003340
3341 case Ain_MFence:
3342 /* mfence */
3343 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3344 goto done;
sewardj1b8d58e2005-02-05 14:34:18 +00003345
sewardje9d8a262009-07-01 08:06:34 +00003346 case Ain_ACAS:
3347 /* lock */
3348 *p++ = 0xF0;
3349 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3350 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3351 in %rbx. The new-value register is hardwired to be %rbx
3352 since dealing with byte integer registers is too much hassle,
3353 so we force the register operand to %rbx (could equally be
3354 %rcx or %rdx). */
3355 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3356 if (i->Ain.ACAS.sz != 8)
3357 rex = clearWBit(rex);
3358
3359 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3360 *p++ = 0x0F;
3361 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3362 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3363 goto done;
3364
3365 case Ain_DACAS:
3366 /* lock */
3367 *p++ = 0xF0;
3368 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3369 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3370 aren't encoded in the insn. */
sewardja5b50222015-03-26 07:18:32 +00003371 rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
sewardje9d8a262009-07-01 08:06:34 +00003372 if (i->Ain.ACAS.sz != 8)
3373 rex = clearWBit(rex);
3374 *p++ = rex;
3375 *p++ = 0x0F;
3376 *p++ = 0xC7;
sewardja5b50222015-03-26 07:18:32 +00003377 p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
sewardje9d8a262009-07-01 08:06:34 +00003378 goto done;
3379
sewardj25a85812005-05-08 23:03:48 +00003380 case Ain_A87Free:
3381 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3382 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3383 p = do_ffree_st(p, 7-j);
3384 }
3385 goto done;
3386
3387 case Ain_A87PushPop:
sewardjd15b5972010-06-27 09:06:34 +00003388 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
sewardj25a85812005-05-08 23:03:48 +00003389 if (i->Ain.A87PushPop.isPush) {
sewardjd15b5972010-06-27 09:06:34 +00003390 /* Load from memory into %st(0): flds/fldl amode */
sewardj25a85812005-05-08 23:03:48 +00003391 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003392 rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
sewardjd15b5972010-06-27 09:06:34 +00003393 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
sewardja5b50222015-03-26 07:18:32 +00003394 p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
sewardj25a85812005-05-08 23:03:48 +00003395 } else {
sewardjd15b5972010-06-27 09:06:34 +00003396 /* Dump %st(0) to memory: fstps/fstpl amode */
sewardj25a85812005-05-08 23:03:48 +00003397 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003398 rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
sewardjd15b5972010-06-27 09:06:34 +00003399 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
sewardja5b50222015-03-26 07:18:32 +00003400 p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
sewardj25a85812005-05-08 23:03:48 +00003401 goto done;
3402 }
3403 goto done;
3404
3405 case Ain_A87FpOp:
3406 switch (i->Ain.A87FpOp.op) {
sewardj5e205372005-05-09 02:57:08 +00003407 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3408 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3409 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
sewardj5e205372005-05-09 02:57:08 +00003410 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3411 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3412 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3413 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3414 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3415 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
sewardjf4c803b2006-09-11 11:07:34 +00003416 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
sewardj4970e4e2008-10-11 10:07:55 +00003417 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
sewardje9c51c92014-04-30 22:50:34 +00003418 case Afp_TAN:
3419 /* fptan pushes 1.0 on the FP stack, except when the
3420 argument is out of range. Hence we have to do the
3421 instruction, then inspect C2 to see if there is an out
3422 of range condition. If there is, we skip the fincstp
3423 that is used by the in-range case to get rid of this
3424 extra 1.0 value. */
3425 *p++ = 0xD9; *p++ = 0xF2; // fptan
3426 *p++ = 0x50; // pushq %rax
3427 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3428 *p++ = 0x66; *p++ = 0xA9;
3429 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3430 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3431 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3432 *p++ = 0x58; // after_fincstp: popq %rax
3433 break;
3434 default:
3435 goto bad;
sewardj25a85812005-05-08 23:03:48 +00003436 }
3437 goto done;
3438
3439 case Ain_A87LdCW:
3440 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003441 rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
sewardj25a85812005-05-08 23:03:48 +00003442 *p++ = 0xD9;
sewardja5b50222015-03-26 07:18:32 +00003443 p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
sewardj25a85812005-05-08 23:03:48 +00003444 goto done;
3445
sewardjf4c803b2006-09-11 11:07:34 +00003446 case Ain_A87StSW:
3447 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003448 rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
sewardjf4c803b2006-09-11 11:07:34 +00003449 *p++ = 0xDD;
sewardja5b50222015-03-26 07:18:32 +00003450 p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
sewardjf4c803b2006-09-11 11:07:34 +00003451 goto done;
3452
sewardj1b8d58e2005-02-05 14:34:18 +00003453 case Ain_Store:
3454 if (i->Ain.Store.sz == 2) {
3455 /* This just goes to show the crazyness of the instruction
3456 set encoding. We have to insert two prefix bytes, but be
3457 careful to avoid a conflict in what the size should be, by
3458 ensuring that REX.W = 0. */
3459 *p++ = 0x66; /* override to 16-bits */
3460 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3461 *p++ = 0x89;
3462 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3463 goto done;
3464 }
3465 if (i->Ain.Store.sz == 4) {
3466 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3467 *p++ = 0x89;
3468 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3469 goto done;
3470 }
sewardje1698952005-02-08 15:02:39 +00003471 if (i->Ain.Store.sz == 1) {
sewardja8903672005-02-27 13:31:42 +00003472 /* This is one place where it would be wrong to skip emitting
3473 a rex byte of 0x40, since the mere presence of rex changes
3474 the meaning of the byte register access. Be careful. */
sewardje1698952005-02-08 15:02:39 +00003475 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3476 *p++ = 0x88;
3477 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3478 goto done;
3479 }
sewardj1b8d58e2005-02-05 14:34:18 +00003480 break;
3481
sewardj1a01e652005-02-23 11:39:21 +00003482 case Ain_LdMXCSR:
sewardja5b50222015-03-26 07:18:32 +00003483 *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
sewardj1a01e652005-02-23 11:39:21 +00003484 *p++ = 0x0F;
3485 *p++ = 0xAE;
sewardja5b50222015-03-26 07:18:32 +00003486 p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
sewardj1a01e652005-02-23 11:39:21 +00003487 goto done;
3488
sewardj18303862005-02-21 12:36:54 +00003489 case Ain_SseUComIS:
3490 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3491 /* ucomi[sd] %srcL, %srcR */
3492 if (i->Ain.SseUComIS.sz == 8) {
3493 *p++ = 0x66;
3494 } else {
3495 goto bad;
3496 vassert(i->Ain.SseUComIS.sz == 4);
3497 }
3498 *p++ = clearWBit (
sewardja5b50222015-03-26 07:18:32 +00003499 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
3500 vregEnc3210(i->Ain.SseUComIS.srcR) ));
sewardj18303862005-02-21 12:36:54 +00003501 *p++ = 0x0F;
3502 *p++ = 0x2E;
sewardja5b50222015-03-26 07:18:32 +00003503 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
3504 vregEnc3210(i->Ain.SseUComIS.srcR) );
sewardj18303862005-02-21 12:36:54 +00003505 /* pushfq */
3506 *p++ = 0x9C;
3507 /* popq %dst */
sewardja5b50222015-03-26 07:18:32 +00003508 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
3509 *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
sewardj18303862005-02-21 12:36:54 +00003510 goto done;
3511
sewardj1a01e652005-02-23 11:39:21 +00003512 case Ain_SseSI2SF:
3513 /* cvssi2s[sd] %src, %dst */
sewardja5b50222015-03-26 07:18:32 +00003514 rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
3515 i->Ain.SseSI2SF.src );
sewardj03ccf852005-03-21 02:47:42 +00003516 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3517 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
sewardj1a01e652005-02-23 11:39:21 +00003518 *p++ = 0x0F;
3519 *p++ = 0x2A;
sewardja5b50222015-03-26 07:18:32 +00003520 p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
3521 i->Ain.SseSI2SF.src );
sewardj1a01e652005-02-23 11:39:21 +00003522 goto done;
3523
3524 case Ain_SseSF2SI:
3525 /* cvss[sd]2si %src, %dst */
sewardja5b50222015-03-26 07:18:32 +00003526 rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
3527 vregEnc3210(i->Ain.SseSF2SI.src) );
sewardj03ccf852005-03-21 02:47:42 +00003528 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3529 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
sewardj1a01e652005-02-23 11:39:21 +00003530 *p++ = 0x0F;
3531 *p++ = 0x2D;
sewardja5b50222015-03-26 07:18:32 +00003532 p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
3533 vregEnc3210(i->Ain.SseSF2SI.src) );
sewardj1a01e652005-02-23 11:39:21 +00003534 goto done;
3535
sewardj8d965312005-02-25 02:48:47 +00003536 case Ain_SseSDSS:
3537 /* cvtsd2ss/cvtss2sd %src, %dst */
sewardj03ccf852005-03-21 02:47:42 +00003538 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
sewardj8d965312005-02-25 02:48:47 +00003539 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003540 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
3541 vregEnc3210(i->Ain.SseSDSS.src) ));
sewardj8d965312005-02-25 02:48:47 +00003542 *p++ = 0x0F;
3543 *p++ = 0x5A;
sewardja5b50222015-03-26 07:18:32 +00003544 p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
3545 vregEnc3210(i->Ain.SseSDSS.src) );
sewardj8d965312005-02-25 02:48:47 +00003546 goto done;
3547
sewardj1001dc42005-02-21 08:25:55 +00003548 case Ain_SseLdSt:
sewardj18303862005-02-21 12:36:54 +00003549 if (i->Ain.SseLdSt.sz == 8) {
3550 *p++ = 0xF2;
3551 } else
3552 if (i->Ain.SseLdSt.sz == 4) {
sewardj18303862005-02-21 12:36:54 +00003553 *p++ = 0xF3;
3554 } else
3555 if (i->Ain.SseLdSt.sz != 16) {
3556 vassert(0);
3557 }
sewardj1001dc42005-02-21 08:25:55 +00003558 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003559 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
3560 i->Ain.SseLdSt.addr));
sewardj1001dc42005-02-21 08:25:55 +00003561 *p++ = 0x0F;
sewardj03ccf852005-03-21 02:47:42 +00003562 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
sewardja5b50222015-03-26 07:18:32 +00003563 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
3564 i->Ain.SseLdSt.addr);
sewardj1001dc42005-02-21 08:25:55 +00003565 goto done;
3566
sewardj70dbeb02015-08-12 11:15:53 +00003567 case Ain_SseCStore: {
3568 vassert(i->Ain.SseCStore.cond != Acc_ALWAYS);
3569
3570 /* Use ptmp for backpatching conditional jumps. */
3571 ptmp = NULL;
3572
3573 /* jmp fwds if !condition */
3574 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCStore.cond ^ 1)));
3575 ptmp = p; /* fill in this bit later */
3576 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3577
3578 /* Now the store. */
3579 *p++ = clearWBit(
3580 rexAMode_M_enc(vregEnc3210(i->Ain.SseCStore.src),
3581 i->Ain.SseCStore.addr));
3582 *p++ = 0x0F;
3583 *p++ = toUChar(0x11);
3584 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCStore.src),
3585 i->Ain.SseCStore.addr);
3586
3587 /* Fix up the conditional branch */
3588 Int delta = p - ptmp;
3589 vassert(delta > 0 && delta < 40);
3590 *ptmp = toUChar(delta-1);
3591 goto done;
3592 }
3593
3594 case Ain_SseCLoad: {
3595 vassert(i->Ain.SseCLoad.cond != Acc_ALWAYS);
3596
3597 /* Use ptmp for backpatching conditional jumps. */
3598 ptmp = NULL;
3599
3600 /* jmp fwds if !condition */
3601 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCLoad.cond ^ 1)));
3602 ptmp = p; /* fill in this bit later */
3603 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3604
3605 /* Now the load. */
3606 *p++ = clearWBit(
3607 rexAMode_M_enc(vregEnc3210(i->Ain.SseCLoad.dst),
3608 i->Ain.SseCLoad.addr));
3609 *p++ = 0x0F;
3610 *p++ = toUChar(0x10);
3611 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCLoad.dst),
3612 i->Ain.SseCLoad.addr);
3613
3614 /* Fix up the conditional branch */
3615 Int delta = p - ptmp;
3616 vassert(delta > 0 && delta < 40);
3617 *ptmp = toUChar(delta-1);
3618 goto done;
3619 }
3620
sewardj1001dc42005-02-21 08:25:55 +00003621 case Ain_SseLdzLO:
3622 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3623 /* movs[sd] amode, %xmm-dst */
sewardj03ccf852005-03-21 02:47:42 +00003624 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
sewardj1001dc42005-02-21 08:25:55 +00003625 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003626 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
3627 i->Ain.SseLdzLO.addr));
sewardj1001dc42005-02-21 08:25:55 +00003628 *p++ = 0x0F;
3629 *p++ = 0x10;
sewardja5b50222015-03-26 07:18:32 +00003630 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
3631 i->Ain.SseLdzLO.addr);
sewardj1001dc42005-02-21 08:25:55 +00003632 goto done;
3633
sewardj8d965312005-02-25 02:48:47 +00003634 case Ain_Sse32Fx4:
3635 xtra = 0;
3636 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003637 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32Fx4.dst),
3638 vregEnc3210(i->Ain.Sse32Fx4.src) ));
sewardj8d965312005-02-25 02:48:47 +00003639 *p++ = 0x0F;
3640 switch (i->Ain.Sse32Fx4.op) {
sewardj432f8b62005-05-10 02:50:05 +00003641 case Asse_ADDF: *p++ = 0x58; break;
3642 case Asse_DIVF: *p++ = 0x5E; break;
3643 case Asse_MAXF: *p++ = 0x5F; break;
3644 case Asse_MINF: *p++ = 0x5D; break;
3645 case Asse_MULF: *p++ = 0x59; break;
sewardja7ba8c42005-05-10 20:08:34 +00003646 case Asse_RCPF: *p++ = 0x53; break;
3647 case Asse_RSQRTF: *p++ = 0x52; break;
3648 case Asse_SQRTF: *p++ = 0x51; break;
sewardj432f8b62005-05-10 02:50:05 +00003649 case Asse_SUBF: *p++ = 0x5C; break;
sewardj8d965312005-02-25 02:48:47 +00003650 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
sewardj432f8b62005-05-10 02:50:05 +00003651 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3652 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003653 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj8d965312005-02-25 02:48:47 +00003654 default: goto bad;
3655 }
sewardja5b50222015-03-26 07:18:32 +00003656 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32Fx4.dst),
3657 vregEnc3210(i->Ain.Sse32Fx4.src) );
sewardj8d965312005-02-25 02:48:47 +00003658 if (xtra & 0x100)
sewardj03ccf852005-03-21 02:47:42 +00003659 *p++ = toUChar(xtra & 0xFF);
sewardj8d965312005-02-25 02:48:47 +00003660 goto done;
3661
sewardj4c328cf2005-05-05 12:05:54 +00003662 case Ain_Sse64Fx2:
3663 xtra = 0;
3664 *p++ = 0x66;
3665 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003666 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
3667 vregEnc3210(i->Ain.Sse64Fx2.src) ));
sewardj4c328cf2005-05-05 12:05:54 +00003668 *p++ = 0x0F;
3669 switch (i->Ain.Sse64Fx2.op) {
3670 case Asse_ADDF: *p++ = 0x58; break;
sewardj5992bd02005-05-11 02:13:42 +00003671 case Asse_DIVF: *p++ = 0x5E; break;
3672 case Asse_MAXF: *p++ = 0x5F; break;
3673 case Asse_MINF: *p++ = 0x5D; break;
sewardj4c328cf2005-05-05 12:05:54 +00003674 case Asse_MULF: *p++ = 0x59; break;
sewardj97628592005-05-10 22:42:54 +00003675 case Asse_SQRTF: *p++ = 0x51; break;
sewardj4c328cf2005-05-05 12:05:54 +00003676 case Asse_SUBF: *p++ = 0x5C; break;
sewardj97628592005-05-10 22:42:54 +00003677 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3678 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3679 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003680 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj4c328cf2005-05-05 12:05:54 +00003681 default: goto bad;
3682 }
sewardja5b50222015-03-26 07:18:32 +00003683 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
3684 vregEnc3210(i->Ain.Sse64Fx2.src) );
sewardj4c328cf2005-05-05 12:05:54 +00003685 if (xtra & 0x100)
sewardjca673ab2005-05-11 10:03:08 +00003686 *p++ = toUChar(xtra & 0xFF);
sewardj4c328cf2005-05-05 12:05:54 +00003687 goto done;
sewardj8d965312005-02-25 02:48:47 +00003688
3689 case Ain_Sse32FLo:
3690 xtra = 0;
3691 *p++ = 0xF3;
3692 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003693 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
3694 vregEnc3210(i->Ain.Sse32FLo.src) ));
sewardj8d965312005-02-25 02:48:47 +00003695 *p++ = 0x0F;
3696 switch (i->Ain.Sse32FLo.op) {
3697 case Asse_ADDF: *p++ = 0x58; break;
sewardjc49ce232005-02-25 13:03:03 +00003698 case Asse_DIVF: *p++ = 0x5E; break;
sewardj37d52572005-02-25 14:22:12 +00003699 case Asse_MAXF: *p++ = 0x5F; break;
3700 case Asse_MINF: *p++ = 0x5D; break;
sewardj8d965312005-02-25 02:48:47 +00003701 case Asse_MULF: *p++ = 0x59; break;
sewardja7ba8c42005-05-10 20:08:34 +00003702 case Asse_RCPF: *p++ = 0x53; break;
3703 case Asse_RSQRTF: *p++ = 0x52; break;
3704 case Asse_SQRTF: *p++ = 0x51; break;
sewardj8d965312005-02-25 02:48:47 +00003705 case Asse_SUBF: *p++ = 0x5C; break;
sewardj432f8b62005-05-10 02:50:05 +00003706 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
sewardj3aba9eb2005-03-30 23:20:47 +00003707 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
sewardj4c328cf2005-05-05 12:05:54 +00003708 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003709 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj8d965312005-02-25 02:48:47 +00003710 default: goto bad;
3711 }
sewardja5b50222015-03-26 07:18:32 +00003712 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
3713 vregEnc3210(i->Ain.Sse32FLo.src) );
sewardj8d965312005-02-25 02:48:47 +00003714 if (xtra & 0x100)
sewardj03ccf852005-03-21 02:47:42 +00003715 *p++ = toUChar(xtra & 0xFF);
sewardj8d965312005-02-25 02:48:47 +00003716 goto done;
sewardj1001dc42005-02-21 08:25:55 +00003717
3718 case Ain_Sse64FLo:
3719 xtra = 0;
3720 *p++ = 0xF2;
3721 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003722 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
3723 vregEnc3210(i->Ain.Sse64FLo.src) ));
sewardj1001dc42005-02-21 08:25:55 +00003724 *p++ = 0x0F;
3725 switch (i->Ain.Sse64FLo.op) {
3726 case Asse_ADDF: *p++ = 0x58; break;
3727 case Asse_DIVF: *p++ = 0x5E; break;
sewardj1a01e652005-02-23 11:39:21 +00003728 case Asse_MAXF: *p++ = 0x5F; break;
sewardjc49ce232005-02-25 13:03:03 +00003729 case Asse_MINF: *p++ = 0x5D; break;
sewardj1001dc42005-02-21 08:25:55 +00003730 case Asse_MULF: *p++ = 0x59; break;
sewardj18303862005-02-21 12:36:54 +00003731 case Asse_SQRTF: *p++ = 0x51; break;
sewardj1001dc42005-02-21 08:25:55 +00003732 case Asse_SUBF: *p++ = 0x5C; break;
sewardj137015d2005-03-27 04:01:15 +00003733 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
sewardj8d965312005-02-25 02:48:47 +00003734 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
sewardj137015d2005-03-27 04:01:15 +00003735 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003736 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj1001dc42005-02-21 08:25:55 +00003737 default: goto bad;
3738 }
sewardja5b50222015-03-26 07:18:32 +00003739 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
3740 vregEnc3210(i->Ain.Sse64FLo.src) );
sewardj1001dc42005-02-21 08:25:55 +00003741 if (xtra & 0x100)
sewardj03ccf852005-03-21 02:47:42 +00003742 *p++ = toUChar(xtra & 0xFF);
sewardj1001dc42005-02-21 08:25:55 +00003743 goto done;
3744
3745 case Ain_SseReRg:
3746# define XX(_n) *p++ = (_n)
3747
3748 rex = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003749 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
3750 vregEnc3210(i->Ain.SseReRg.src) ));
sewardj1001dc42005-02-21 08:25:55 +00003751
3752 switch (i->Ain.SseReRg.op) {
3753 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
sewardj8d965312005-02-25 02:48:47 +00003754 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
sewardj9da16972005-02-21 13:58:26 +00003755 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
sewardj1a01e652005-02-23 11:39:21 +00003756 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
sewardj137015d2005-03-27 04:01:15 +00003757 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
sewardj97628592005-05-10 22:42:54 +00003758 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3759 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3760 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3761 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
sewardj5992bd02005-05-11 02:13:42 +00003762 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
sewardj97628592005-05-10 22:42:54 +00003763 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
sewardj09717342005-05-05 21:34:02 +00003764 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
sewardj5992bd02005-05-11 02:13:42 +00003765 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3766 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3767 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3768 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3769 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3770 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3771 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3772 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
sewardj09717342005-05-05 21:34:02 +00003773 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
sewardj5992bd02005-05-11 02:13:42 +00003774 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3775 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3776 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
sewardjadffcef2005-05-11 00:03:06 +00003777 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3778 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3779 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3780 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3781 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3782 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3783 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3784 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3785 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3786 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3787 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3788 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3789 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3790 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
sewardj09717342005-05-05 21:34:02 +00003791 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
sewardj97628592005-05-10 22:42:54 +00003792 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3793 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3794 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
sewardj09717342005-05-05 21:34:02 +00003795 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
sewardj97628592005-05-10 22:42:54 +00003796 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3797 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3798 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3799 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3800 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3801 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3802 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3803 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3804 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3805 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3806 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3807 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
sewardj1001dc42005-02-21 08:25:55 +00003808 default: goto bad;
3809 }
sewardja5b50222015-03-26 07:18:32 +00003810 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
3811 vregEnc3210(i->Ain.SseReRg.src) );
sewardj1001dc42005-02-21 08:25:55 +00003812# undef XX
3813 goto done;
3814
sewardj8d965312005-02-25 02:48:47 +00003815 case Ain_SseCMov:
3816 /* jmp fwds if !condition */
sewardj03ccf852005-03-21 02:47:42 +00003817 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
sewardj8d965312005-02-25 02:48:47 +00003818 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3819 ptmp = p;
3820
3821 /* movaps %src, %dst */
3822 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003823 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
3824 vregEnc3210(i->Ain.SseCMov.src) ));
sewardj8d965312005-02-25 02:48:47 +00003825 *p++ = 0x0F;
3826 *p++ = 0x28;
sewardja5b50222015-03-26 07:18:32 +00003827 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
3828 vregEnc3210(i->Ain.SseCMov.src) );
sewardj8d965312005-02-25 02:48:47 +00003829
3830 /* Fill in the jump offset. */
sewardj03ccf852005-03-21 02:47:42 +00003831 *(ptmp-1) = toUChar(p - ptmp);
sewardj8d965312005-02-25 02:48:47 +00003832 goto done;
3833
sewardj09717342005-05-05 21:34:02 +00003834 case Ain_SseShuf:
3835 *p++ = 0x66;
3836 *p++ = clearWBit(
sewardja5b50222015-03-26 07:18:32 +00003837 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
3838 vregEnc3210(i->Ain.SseShuf.src) ));
sewardj09717342005-05-05 21:34:02 +00003839 *p++ = 0x0F;
3840 *p++ = 0x70;
sewardja5b50222015-03-26 07:18:32 +00003841 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
3842 vregEnc3210(i->Ain.SseShuf.src) );
sewardj09717342005-05-05 21:34:02 +00003843 *p++ = (UChar)(i->Ain.SseShuf.order);
3844 goto done;
3845
sewardj3616a2e2012-05-27 16:18:13 +00003846 //uu case Ain_AvxLdSt: {
3847 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
3848 //uu i->Ain.AvxLdSt.addr );
3849 //uu p = emitVexPrefix(p, vex);
3850 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
3851 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
3852 //uu goto done;
3853 //uu }
sewardjc4530ae2012-05-21 10:18:49 +00003854
sewardjc6f970f2012-04-02 21:54:49 +00003855 case Ain_EvCheck: {
3856 /* We generate:
3857 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
3858 (2 bytes) jns nofail expected taken
3859 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
3860 nofail:
3861 */
3862 /* This is heavily asserted re instruction lengths. It needs to
3863 be. If we get given unexpected forms of .amCounter or
3864 .amFailAddr -- basically, anything that's not of the form
3865 uimm7(%rbp) -- they are likely to fail. */
3866 /* Note also that after the decl we must be very careful not to
3867 read the carry flag, else we get a partial flags stall.
3868 js/jns avoids that, though. */
3869 UChar* p0 = p;
3870 /* --- decl 8(%rbp) --- */
3871 /* Need to compute the REX byte for the decl in order to prove
3872 that we don't need it, since this is a 32-bit inc and all
sewardja5b50222015-03-26 07:18:32 +00003873 registers involved in the amode are < r8. "1" because
sewardjc6f970f2012-04-02 21:54:49 +00003874 there's no register in this encoding; instead the register
3875 field is used as a sub opcode. The encoding for "decl r/m32"
sewardja5b50222015-03-26 07:18:32 +00003876 is FF /1, hence the "1". */
3877 rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
sewardjc6f970f2012-04-02 21:54:49 +00003878 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
3879 *p++ = 0xFF;
sewardja5b50222015-03-26 07:18:32 +00003880 p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
sewardjc6f970f2012-04-02 21:54:49 +00003881 vassert(p - p0 == 3);
3882 /* --- jns nofail --- */
3883 *p++ = 0x79;
3884 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3885 vassert(p - p0 == 5);
3886 /* --- jmp* 0(%rbp) --- */
3887 /* Once again, verify we don't need REX. The encoding is FF /4.
3888 We don't need REX.W since by default FF /4 in 64-bit mode
3889 implies a 64 bit load. */
sewardja5b50222015-03-26 07:18:32 +00003890 rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
sewardjc6f970f2012-04-02 21:54:49 +00003891 if (rex != 0x40) goto bad;
3892 *p++ = 0xFF;
sewardja5b50222015-03-26 07:18:32 +00003893 p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
sewardjc6f970f2012-04-02 21:54:49 +00003894 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3895 /* And crosscheck .. */
florian7ce2cc82015-01-10 16:10:58 +00003896 vassert(evCheckSzB_AMD64() == 8);
sewardjc6f970f2012-04-02 21:54:49 +00003897 goto done;
3898 }
3899
3900 case Ain_ProfInc: {
3901 /* We generate movabsq $0, %r11
3902 incq (%r11)
3903 in the expectation that a later call to LibVEX_patchProfCtr
3904 will be used to fill in the immediate field once the right
3905 value is known.
3906 49 BB 00 00 00 00 00 00 00 00
3907 49 FF 03
3908 */
3909 *p++ = 0x49; *p++ = 0xBB;
3910 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3911 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3912 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
3913 /* Tell the caller .. */
3914 vassert(!(*is_profInc));
3915 *is_profInc = True;
3916 goto done;
3917 }
3918
sewardjc33671d2005-02-01 20:30:00 +00003919 default:
3920 goto bad;
3921 }
3922
3923 bad:
cerion92b64362005-12-13 12:02:26 +00003924 ppAMD64Instr(i, mode64);
sewardjc33671d2005-02-01 20:30:00 +00003925 vpanic("emit_AMD64Instr");
3926 /*NOTREACHED*/
3927
sewardj813ce9e2005-02-04 21:16:48 +00003928 done:
sewardj70dbeb02015-08-12 11:15:53 +00003929 vassert(p - &buf[0] <= 64);
sewardjc33671d2005-02-01 20:30:00 +00003930 return p - &buf[0];
sewardjc33671d2005-02-01 20:30:00 +00003931}
sewardja3e98302005-02-01 15:55:05 +00003932
sewardjc6f970f2012-04-02 21:54:49 +00003933
3934/* How big is an event check? See case for Ain_EvCheck in
3935 emit_AMD64Instr just above. That crosschecks what this returns, so
3936 we can tell if we're inconsistent. */
florian7ce2cc82015-01-10 16:10:58 +00003937Int evCheckSzB_AMD64 (void)
sewardjc6f970f2012-04-02 21:54:49 +00003938{
3939 return 8;
3940}
3941
3942
3943/* NB: what goes on here has to be very closely coordinated with the
3944 emitInstr case for XDirect, above. */
sewardj9b769162014-07-24 12:42:03 +00003945VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
3946 void* place_to_chain,
florian7d6f81d2014-09-22 21:43:37 +00003947 const void* disp_cp_chain_me_EXPECTED,
3948 const void* place_to_jump_to )
sewardjc6f970f2012-04-02 21:54:49 +00003949{
sewardj9b769162014-07-24 12:42:03 +00003950 vassert(endness_host == VexEndnessLE);
3951
sewardjc6f970f2012-04-02 21:54:49 +00003952 /* What we're expecting to see is:
3953 movabsq $disp_cp_chain_me_EXPECTED, %r11
3954 call *%r11
3955 viz
3956 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
3957 41 FF D3
3958 */
3959 UChar* p = (UChar*)place_to_chain;
3960 vassert(p[0] == 0x49);
3961 vassert(p[1] == 0xBB);
sewardj48729062015-07-07 12:41:33 +00003962 vassert(read_misaligned_ULong_LE(&p[2]) == (Addr)disp_cp_chain_me_EXPECTED);
sewardjc6f970f2012-04-02 21:54:49 +00003963 vassert(p[10] == 0x41);
3964 vassert(p[11] == 0xFF);
3965 vassert(p[12] == 0xD3);
3966 /* And what we want to change it to is either:
3967 (general case):
3968 movabsq $place_to_jump_to, %r11
3969 jmpq *%r11
3970 viz
3971 49 BB <8 bytes value == place_to_jump_to>
3972 41 FF E3
3973 So it's the same length (convenient, huh) and we don't
3974 need to change all the bits.
3975 ---OR---
3976 in the case where the displacement falls within 32 bits
3977 jmpq disp32 where disp32 is relative to the next insn
3978 ud2; ud2; ud2; ud2
3979 viz
3980 E9 <4 bytes == disp32>
3981 0F 0B 0F 0B 0F 0B 0F 0B
3982
3983 In both cases the replacement has the same length as the original.
3984 To remain sane & verifiable,
3985 (1) limit the displacement for the short form to
3986 (say) +/- one billion, so as to avoid wraparound
3987 off-by-ones
3988 (2) even if the short form is applicable, once every (say)
3989 1024 times use the long form anyway, so as to maintain
3990 verifiability
3991 */
3992 /* This is the delta we need to put into a JMP d32 insn. It's
3993 relative to the start of the next insn, hence the -5. */
florian7d6f81d2014-09-22 21:43:37 +00003994 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
sewardjc6f970f2012-04-02 21:54:49 +00003995 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
3996
3997 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
3998 if (shortOK) {
3999 shortCTR++; // thread safety bleh
4000 if (0 == (shortCTR & 0x3FF)) {
4001 shortOK = False;
4002 if (0)
4003 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
4004 "using long jmp\n", shortCTR);
4005 }
4006 }
4007
4008 /* And make the modifications. */
4009 if (shortOK) {
4010 p[0] = 0xE9;
sewardj48729062015-07-07 12:41:33 +00004011 write_misaligned_UInt_LE(&p[1], (UInt)(Int)delta);
sewardjc6f970f2012-04-02 21:54:49 +00004012 p[5] = 0x0F; p[6] = 0x0B;
4013 p[7] = 0x0F; p[8] = 0x0B;
4014 p[9] = 0x0F; p[10] = 0x0B;
4015 p[11] = 0x0F; p[12] = 0x0B;
4016 /* sanity check on the delta -- top 32 are all 0 or all 1 */
4017 delta >>= 32;
4018 vassert(delta == 0LL || delta == -1LL);
4019 } else {
4020 /* Minimal modifications from the starting sequence. */
sewardj48729062015-07-07 12:41:33 +00004021 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)place_to_jump_to);
sewardjc6f970f2012-04-02 21:54:49 +00004022 p[12] = 0xE3;
4023 }
florian5ea257b2012-09-29 17:05:46 +00004024 VexInvalRange vir = { (HWord)place_to_chain, 13 };
sewardjc6f970f2012-04-02 21:54:49 +00004025 return vir;
4026}
4027
4028
4029/* NB: what goes on here has to be very closely coordinated with the
4030 emitInstr case for XDirect, above. */
sewardj9b769162014-07-24 12:42:03 +00004031VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
4032 void* place_to_unchain,
florian7d6f81d2014-09-22 21:43:37 +00004033 const void* place_to_jump_to_EXPECTED,
4034 const void* disp_cp_chain_me )
sewardjc6f970f2012-04-02 21:54:49 +00004035{
sewardj9b769162014-07-24 12:42:03 +00004036 vassert(endness_host == VexEndnessLE);
4037
sewardjc6f970f2012-04-02 21:54:49 +00004038 /* What we're expecting to see is either:
4039 (general case)
4040 movabsq $place_to_jump_to_EXPECTED, %r11
4041 jmpq *%r11
4042 viz
4043 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
4044 41 FF E3
4045 ---OR---
4046 in the case where the displacement falls within 32 bits
4047 jmpq d32
4048 ud2; ud2; ud2; ud2
4049 viz
4050 E9 <4 bytes == disp32>
4051 0F 0B 0F 0B 0F 0B 0F 0B
4052 */
4053 UChar* p = (UChar*)place_to_unchain;
4054 Bool valid = False;
4055 if (p[0] == 0x49 && p[1] == 0xBB
sewardj48729062015-07-07 12:41:33 +00004056 && read_misaligned_ULong_LE(&p[2])
4057 == (ULong)(Addr)place_to_jump_to_EXPECTED
sewardjc6f970f2012-04-02 21:54:49 +00004058 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
4059 /* it's the long form */
4060 valid = True;
4061 }
4062 else
4063 if (p[0] == 0xE9
4064 && p[5] == 0x0F && p[6] == 0x0B
4065 && p[7] == 0x0F && p[8] == 0x0B
4066 && p[9] == 0x0F && p[10] == 0x0B
4067 && p[11] == 0x0F && p[12] == 0x0B) {
4068 /* It's the short form. Check the offset is right. */
sewardj48729062015-07-07 12:41:33 +00004069 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
sewardjc6f970f2012-04-02 21:54:49 +00004070 Long s64 = (Long)s32;
florian7d6f81d2014-09-22 21:43:37 +00004071 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
sewardjc6f970f2012-04-02 21:54:49 +00004072 valid = True;
4073 if (0)
4074 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4075 }
4076 }
4077 vassert(valid);
4078 /* And what we want to change it to is:
4079 movabsq $disp_cp_chain_me, %r11
4080 call *%r11
4081 viz
4082 49 BB <8 bytes value == disp_cp_chain_me>
4083 41 FF D3
4084 So it's the same length (convenient, huh).
4085 */
4086 p[0] = 0x49;
4087 p[1] = 0xBB;
sewardj48729062015-07-07 12:41:33 +00004088 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)disp_cp_chain_me);
sewardjc6f970f2012-04-02 21:54:49 +00004089 p[10] = 0x41;
4090 p[11] = 0xFF;
4091 p[12] = 0xD3;
florian5ea257b2012-09-29 17:05:46 +00004092 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
sewardjc6f970f2012-04-02 21:54:49 +00004093 return vir;
4094}
4095
4096
4097/* Patch the counter address into a profile inc point, as previously
4098 created by the Ain_ProfInc case for emit_AMD64Instr. */
sewardj9b769162014-07-24 12:42:03 +00004099VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
4100 void* place_to_patch,
florian7d6f81d2014-09-22 21:43:37 +00004101 const ULong* location_of_counter )
sewardjc6f970f2012-04-02 21:54:49 +00004102{
sewardj9b769162014-07-24 12:42:03 +00004103 vassert(endness_host == VexEndnessLE);
sewardjc6f970f2012-04-02 21:54:49 +00004104 vassert(sizeof(ULong*) == 8);
4105 UChar* p = (UChar*)place_to_patch;
4106 vassert(p[0] == 0x49);
4107 vassert(p[1] == 0xBB);
4108 vassert(p[2] == 0x00);
4109 vassert(p[3] == 0x00);
4110 vassert(p[4] == 0x00);
4111 vassert(p[5] == 0x00);
4112 vassert(p[6] == 0x00);
4113 vassert(p[7] == 0x00);
4114 vassert(p[8] == 0x00);
4115 vassert(p[9] == 0x00);
4116 vassert(p[10] == 0x49);
4117 vassert(p[11] == 0xFF);
4118 vassert(p[12] == 0x03);
florian93a09742015-01-07 20:14:48 +00004119 ULong imm64 = (ULong)(Addr)location_of_counter;
sewardjc6f970f2012-04-02 21:54:49 +00004120 p[2] = imm64 & 0xFF; imm64 >>= 8;
4121 p[3] = imm64 & 0xFF; imm64 >>= 8;
4122 p[4] = imm64 & 0xFF; imm64 >>= 8;
4123 p[5] = imm64 & 0xFF; imm64 >>= 8;
4124 p[6] = imm64 & 0xFF; imm64 >>= 8;
4125 p[7] = imm64 & 0xFF; imm64 >>= 8;
4126 p[8] = imm64 & 0xFF; imm64 >>= 8;
4127 p[9] = imm64 & 0xFF; imm64 >>= 8;
florian5ea257b2012-09-29 17:05:46 +00004128 VexInvalRange vir = { (HWord)place_to_patch, 13 };
sewardjc6f970f2012-04-02 21:54:49 +00004129 return vir;
4130}
4131
4132
sewardja3e98302005-02-01 15:55:05 +00004133/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00004134/*--- end host_amd64_defs.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004135/*---------------------------------------------------------------*/