blob: 27459c5023087794d431606ceeee469536d82583 [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_defs.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2004-2013 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38#include "libvex_trc_values.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "main_util.h"
41#include "host_generic_regs.h"
42#include "host_amd64_defs.h"
sewardjc33671d2005-02-01 20:30:00 +000043
44
45/* --------- Registers. --------- */
46
47void ppHRegAMD64 ( HReg reg )
sewardj614b3fb2005-02-02 02:16:03 +000048{
49 Int r;
florian55085f82012-11-21 00:36:55 +000050 static const HChar* ireg64_names[16]
sewardj614b3fb2005-02-02 02:16:03 +000051 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
53 /* Be generic for all virtual regs. */
54 if (hregIsVirtual(reg)) {
55 ppHReg(reg);
56 return;
57 }
58 /* But specific for real regs. */
59 switch (hregClass(reg)) {
60 case HRcInt64:
61 r = hregNumber(reg);
62 vassert(r >= 0 && r < 16);
63 vex_printf("%s", ireg64_names[r]);
64 return;
65 case HRcFlt64:
66 r = hregNumber(reg);
67 vassert(r >= 0 && r < 6);
68 vex_printf("%%fake%d", r);
69 return;
70 case HRcVec128:
71 r = hregNumber(reg);
72 vassert(r >= 0 && r < 16);
73 vex_printf("%%xmm%d", r);
74 return;
75 default:
76 vpanic("ppHRegAMD64");
77 }
sewardjc33671d2005-02-01 20:30:00 +000078}
79
sewardj549e0642005-02-05 12:00:14 +000080static void ppHRegAMD64_lo32 ( HReg reg )
81{
82 Int r;
florian55085f82012-11-21 00:36:55 +000083 static const HChar* ireg32_names[16]
sewardj549e0642005-02-05 12:00:14 +000084 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
86 /* Be generic for all virtual regs. */
87 if (hregIsVirtual(reg)) {
88 ppHReg(reg);
89 vex_printf("d");
90 return;
91 }
92 /* But specific for real regs. */
93 switch (hregClass(reg)) {
94 case HRcInt64:
95 r = hregNumber(reg);
96 vassert(r >= 0 && r < 16);
97 vex_printf("%s", ireg32_names[r]);
98 return;
99 default:
100 vpanic("ppHRegAMD64_lo32: invalid regclass");
101 }
102}
103
sewardjf67eadf2005-02-03 03:53:52 +0000104HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
105HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
106HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
107HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
108HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
109HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
110HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
111HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
112HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); }
113HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); }
114HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
115HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
116HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
117HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
118HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
119HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
120
sewardjf67eadf2005-02-03 03:53:52 +0000121HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
122HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
sewardjf67eadf2005-02-03 03:53:52 +0000123HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); }
124HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); }
125HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); }
126HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); }
127HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); }
128HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); }
129HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); }
130HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
131HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
132HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
sewardjc4530ae2012-05-21 10:18:49 +0000133
sewardjc33671d2005-02-01 20:30:00 +0000134
135void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
sewardjf67eadf2005-02-03 03:53:52 +0000136{
sewardjb5220772005-04-27 11:53:23 +0000137#if 0
138 *nregs = 6;
139 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
140 (*arr)[ 0] = hregAMD64_RSI();
141 (*arr)[ 1] = hregAMD64_RDI();
142 (*arr)[ 2] = hregAMD64_RBX();
143
144 (*arr)[ 3] = hregAMD64_XMM7();
145 (*arr)[ 4] = hregAMD64_XMM8();
146 (*arr)[ 5] = hregAMD64_XMM9();
147#endif
sewardj7de0d3c2005-02-13 02:26:41 +0000148#if 1
sewardjc4293242006-11-19 02:05:47 +0000149 *nregs = 20;
sewardj7de0d3c2005-02-13 02:26:41 +0000150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
sewardj943fa542005-04-26 01:51:24 +0000151 (*arr)[ 0] = hregAMD64_RSI();
152 (*arr)[ 1] = hregAMD64_RDI();
153 (*arr)[ 2] = hregAMD64_R8();
154 (*arr)[ 3] = hregAMD64_R9();
155 (*arr)[ 4] = hregAMD64_R12();
156 (*arr)[ 5] = hregAMD64_R13();
157 (*arr)[ 6] = hregAMD64_R14();
158 (*arr)[ 7] = hregAMD64_R15();
sewardj9b457d82005-05-11 23:16:13 +0000159 (*arr)[ 8] = hregAMD64_RBX();
sewardj943fa542005-04-26 01:51:24 +0000160
sewardj9b457d82005-05-11 23:16:13 +0000161 (*arr)[ 9] = hregAMD64_XMM3();
162 (*arr)[10] = hregAMD64_XMM4();
163 (*arr)[11] = hregAMD64_XMM5();
164 (*arr)[12] = hregAMD64_XMM6();
165 (*arr)[13] = hregAMD64_XMM7();
166 (*arr)[14] = hregAMD64_XMM8();
167 (*arr)[15] = hregAMD64_XMM9();
168 (*arr)[16] = hregAMD64_XMM10();
169 (*arr)[17] = hregAMD64_XMM11();
170 (*arr)[18] = hregAMD64_XMM12();
sewardjc4293242006-11-19 02:05:47 +0000171 (*arr)[19] = hregAMD64_R10();
sewardj7de0d3c2005-02-13 02:26:41 +0000172#endif
sewardjc33671d2005-02-01 20:30:00 +0000173}
174
175
sewardjf67eadf2005-02-03 03:53:52 +0000176/* --------- Condition codes, Intel encoding. --------- */
177
florian55085f82012-11-21 00:36:55 +0000178const HChar* showAMD64CondCode ( AMD64CondCode cond )
sewardjf67eadf2005-02-03 03:53:52 +0000179{
180 switch (cond) {
181 case Acc_O: return "o";
182 case Acc_NO: return "no";
183 case Acc_B: return "b";
184 case Acc_NB: return "nb";
185 case Acc_Z: return "z";
186 case Acc_NZ: return "nz";
187 case Acc_BE: return "be";
188 case Acc_NBE: return "nbe";
189 case Acc_S: return "s";
190 case Acc_NS: return "ns";
191 case Acc_P: return "p";
192 case Acc_NP: return "np";
193 case Acc_L: return "l";
194 case Acc_NL: return "nl";
195 case Acc_LE: return "le";
196 case Acc_NLE: return "nle";
197 case Acc_ALWAYS: return "ALWAYS";
198 default: vpanic("ppAMD64CondCode");
199 }
200}
sewardj614b3fb2005-02-02 02:16:03 +0000201
202
203/* --------- AMD64AMode: memory address expressions. --------- */
204
205AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
206 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
207 am->tag = Aam_IR;
208 am->Aam.IR.imm = imm32;
209 am->Aam.IR.reg = reg;
210 return am;
211}
212AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
213 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
214 am->tag = Aam_IRRS;
215 am->Aam.IRRS.imm = imm32;
216 am->Aam.IRRS.base = base;
217 am->Aam.IRRS.index = indEx;
218 am->Aam.IRRS.shift = shift;
219 vassert(shift >= 0 && shift <= 3);
220 return am;
221}
222
sewardj614b3fb2005-02-02 02:16:03 +0000223void ppAMD64AMode ( AMD64AMode* am ) {
224 switch (am->tag) {
225 case Aam_IR:
226 if (am->Aam.IR.imm == 0)
227 vex_printf("(");
228 else
229 vex_printf("0x%x(", am->Aam.IR.imm);
230 ppHRegAMD64(am->Aam.IR.reg);
231 vex_printf(")");
232 return;
233 case Aam_IRRS:
234 vex_printf("0x%x(", am->Aam.IRRS.imm);
235 ppHRegAMD64(am->Aam.IRRS.base);
236 vex_printf(",");
237 ppHRegAMD64(am->Aam.IRRS.index);
238 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
239 return;
240 default:
241 vpanic("ppAMD64AMode");
242 }
243}
244
sewardjf67eadf2005-02-03 03:53:52 +0000245static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
246 switch (am->tag) {
247 case Aam_IR:
248 addHRegUse(u, HRmRead, am->Aam.IR.reg);
249 return;
250 case Aam_IRRS:
251 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
252 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
253 return;
254 default:
255 vpanic("addRegUsage_AMD64AMode");
256 }
257}
258
259static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
260 switch (am->tag) {
261 case Aam_IR:
262 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
263 return;
264 case Aam_IRRS:
265 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
266 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
267 return;
268 default:
269 vpanic("mapRegs_AMD64AMode");
270 }
271}
sewardj614b3fb2005-02-02 02:16:03 +0000272
273/* --------- Operand, which can be reg, immediate or memory. --------- */
274
275AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
276 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
277 op->tag = Armi_Imm;
278 op->Armi.Imm.imm32 = imm32;
279 return op;
280}
sewardj8258a8c2005-02-02 03:11:24 +0000281AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
282 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
283 op->tag = Armi_Reg;
284 op->Armi.Reg.reg = reg;
285 return op;
286}
sewardj614b3fb2005-02-02 02:16:03 +0000287AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
288 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
289 op->tag = Armi_Mem;
290 op->Armi.Mem.am = am;
291 return op;
292}
293
sewardj9cc2bbf2011-06-05 17:56:03 +0000294static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
sewardj614b3fb2005-02-02 02:16:03 +0000295 switch (op->tag) {
296 case Armi_Imm:
297 vex_printf("$0x%x", op->Armi.Imm.imm32);
298 return;
sewardj9cc2bbf2011-06-05 17:56:03 +0000299 case Armi_Reg:
300 if (lo32)
301 ppHRegAMD64_lo32(op->Armi.Reg.reg);
302 else
303 ppHRegAMD64(op->Armi.Reg.reg);
sewardj614b3fb2005-02-02 02:16:03 +0000304 return;
305 case Armi_Mem:
306 ppAMD64AMode(op->Armi.Mem.am);
307 return;
308 default:
309 vpanic("ppAMD64RMI");
310 }
311}
sewardj9cc2bbf2011-06-05 17:56:03 +0000312void ppAMD64RMI ( AMD64RMI* op ) {
313 ppAMD64RMI_wrk(op, False/*!lo32*/);
314}
315void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
316 ppAMD64RMI_wrk(op, True/*lo32*/);
317}
sewardj614b3fb2005-02-02 02:16:03 +0000318
sewardjf67eadf2005-02-03 03:53:52 +0000319/* An AMD64RMI can only be used in a "read" context (what would it mean
320 to write or modify a literal?) and so we enumerate its registers
321 accordingly. */
322static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
323 switch (op->tag) {
324 case Armi_Imm:
325 return;
326 case Armi_Reg:
327 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
328 return;
329 case Armi_Mem:
330 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
331 return;
332 default:
333 vpanic("addRegUsage_AMD64RMI");
334 }
335}
336
337static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
338 switch (op->tag) {
339 case Armi_Imm:
340 return;
341 case Armi_Reg:
342 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
343 return;
344 case Armi_Mem:
345 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
346 return;
347 default:
348 vpanic("mapRegs_AMD64RMI");
349 }
350}
351
352
353/* --------- Operand, which can be reg or immediate only. --------- */
354
355AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
356 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
357 op->tag = Ari_Imm;
358 op->Ari.Imm.imm32 = imm32;
359 return op;
360}
361AMD64RI* AMD64RI_Reg ( HReg reg ) {
362 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
363 op->tag = Ari_Reg;
364 op->Ari.Reg.reg = reg;
365 return op;
366}
367
368void ppAMD64RI ( AMD64RI* op ) {
369 switch (op->tag) {
370 case Ari_Imm:
371 vex_printf("$0x%x", op->Ari.Imm.imm32);
372 return;
373 case Ari_Reg:
374 ppHRegAMD64(op->Ari.Reg.reg);
375 return;
376 default:
377 vpanic("ppAMD64RI");
378 }
379}
380
381/* An AMD64RI can only be used in a "read" context (what would it mean
382 to write or modify a literal?) and so we enumerate its registers
383 accordingly. */
384static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
385 switch (op->tag) {
386 case Ari_Imm:
387 return;
388 case Ari_Reg:
389 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
390 return;
391 default:
392 vpanic("addRegUsage_AMD64RI");
393 }
394}
395
396static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
397 switch (op->tag) {
398 case Ari_Imm:
399 return;
400 case Ari_Reg:
401 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
402 return;
403 default:
404 vpanic("mapRegs_AMD64RI");
405 }
406}
sewardj8258a8c2005-02-02 03:11:24 +0000407
408
409/* --------- Operand, which can be reg or memory only. --------- */
410
411AMD64RM* AMD64RM_Reg ( HReg reg ) {
412 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
413 op->tag = Arm_Reg;
414 op->Arm.Reg.reg = reg;
415 return op;
416}
sewardj05b3b6a2005-02-04 01:44:33 +0000417AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
418 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
419 op->tag = Arm_Mem;
420 op->Arm.Mem.am = am;
421 return op;
422}
sewardj8258a8c2005-02-02 03:11:24 +0000423
424void ppAMD64RM ( AMD64RM* op ) {
425 switch (op->tag) {
426 case Arm_Mem:
427 ppAMD64AMode(op->Arm.Mem.am);
428 return;
429 case Arm_Reg:
430 ppHRegAMD64(op->Arm.Reg.reg);
431 return;
432 default:
433 vpanic("ppAMD64RM");
434 }
435}
436
sewardjf67eadf2005-02-03 03:53:52 +0000437/* Because an AMD64RM can be both a source or destination operand, we
438 have to supply a mode -- pertaining to the operand as a whole --
439 indicating how it's being used. */
440static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
441 switch (op->tag) {
442 case Arm_Mem:
443 /* Memory is read, written or modified. So we just want to
444 know the regs read by the amode. */
445 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
446 return;
447 case Arm_Reg:
448 /* reg is read, written or modified. Add it in the
449 appropriate way. */
450 addHRegUse(u, mode, op->Arm.Reg.reg);
451 return;
452 default:
453 vpanic("addRegUsage_AMD64RM");
454 }
455}
456
457static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
458{
459 switch (op->tag) {
460 case Arm_Mem:
461 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
462 return;
463 case Arm_Reg:
464 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
465 return;
466 default:
467 vpanic("mapRegs_AMD64RM");
468 }
469}
470
471
sewardj9b967672005-02-08 11:13:09 +0000472/* --------- Instructions. --------- */
473
florian55085f82012-11-21 00:36:55 +0000474static const HChar* showAMD64ScalarSz ( Int sz ) {
sewardj9b967672005-02-08 11:13:09 +0000475 switch (sz) {
476 case 2: return "w";
477 case 4: return "l";
478 case 8: return "q";
479 default: vpanic("showAMD64ScalarSz");
480 }
481}
482
florian55085f82012-11-21 00:36:55 +0000483const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
sewardjd0a12df2005-02-10 02:07:43 +0000484 switch (op) {
485 case Aun_NOT: return "not";
486 case Aun_NEG: return "neg";
487 default: vpanic("showAMD64UnaryOp");
488 }
489}
sewardj614b3fb2005-02-02 02:16:03 +0000490
florian55085f82012-11-21 00:36:55 +0000491const HChar* showAMD64AluOp ( AMD64AluOp op ) {
sewardj614b3fb2005-02-02 02:16:03 +0000492 switch (op) {
493 case Aalu_MOV: return "mov";
494 case Aalu_CMP: return "cmp";
495 case Aalu_ADD: return "add";
496 case Aalu_SUB: return "sub";
497 case Aalu_ADC: return "adc";
498 case Aalu_SBB: return "sbb";
499 case Aalu_AND: return "and";
500 case Aalu_OR: return "or";
501 case Aalu_XOR: return "xor";
sewardj7de0d3c2005-02-13 02:26:41 +0000502 case Aalu_MUL: return "imul";
sewardj614b3fb2005-02-02 02:16:03 +0000503 default: vpanic("showAMD64AluOp");
504 }
505}
506
florian55085f82012-11-21 00:36:55 +0000507const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
sewardj8258a8c2005-02-02 03:11:24 +0000508 switch (op) {
509 case Ash_SHL: return "shl";
510 case Ash_SHR: return "shr";
511 case Ash_SAR: return "sar";
512 default: vpanic("showAMD64ShiftOp");
513 }
514}
515
florian55085f82012-11-21 00:36:55 +0000516const HChar* showA87FpOp ( A87FpOp op ) {
sewardj25a85812005-05-08 23:03:48 +0000517 switch (op) {
sewardj25a85812005-05-08 23:03:48 +0000518 case Afp_SCALE: return "scale";
519 case Afp_ATAN: return "atan";
520 case Afp_YL2X: return "yl2x";
sewardj5e205372005-05-09 02:57:08 +0000521 case Afp_YL2XP1: return "yl2xp1";
sewardjf4c803b2006-09-11 11:07:34 +0000522 case Afp_PREM: return "prem";
sewardj4970e4e2008-10-11 10:07:55 +0000523 case Afp_PREM1: return "prem1";
sewardj25a85812005-05-08 23:03:48 +0000524 case Afp_SQRT: return "sqrt";
sewardj25a85812005-05-08 23:03:48 +0000525 case Afp_SIN: return "sin";
526 case Afp_COS: return "cos";
sewardj5e205372005-05-09 02:57:08 +0000527 case Afp_TAN: return "tan";
sewardj25a85812005-05-08 23:03:48 +0000528 case Afp_ROUND: return "round";
529 case Afp_2XM1: return "2xm1";
530 default: vpanic("showA87FpOp");
531 }
532}
sewardj1001dc42005-02-21 08:25:55 +0000533
florian55085f82012-11-21 00:36:55 +0000534const HChar* showAMD64SseOp ( AMD64SseOp op ) {
sewardj1001dc42005-02-21 08:25:55 +0000535 switch (op) {
sewardj18303862005-02-21 12:36:54 +0000536 case Asse_MOV: return "movups";
sewardj1001dc42005-02-21 08:25:55 +0000537 case Asse_ADDF: return "add";
538 case Asse_SUBF: return "sub";
539 case Asse_MULF: return "mul";
540 case Asse_DIVF: return "div";
sewardj1a01e652005-02-23 11:39:21 +0000541 case Asse_MAXF: return "max";
542 case Asse_MINF: return "min";
sewardj8d965312005-02-25 02:48:47 +0000543 case Asse_CMPEQF: return "cmpFeq";
544 case Asse_CMPLTF: return "cmpFlt";
545 case Asse_CMPLEF: return "cmpFle";
546 case Asse_CMPUNF: return "cmpFun";
sewardja7ba8c42005-05-10 20:08:34 +0000547 case Asse_RCPF: return "rcp";
548 case Asse_RSQRTF: return "rsqrt";
sewardj18303862005-02-21 12:36:54 +0000549 case Asse_SQRTF: return "sqrt";
sewardj1001dc42005-02-21 08:25:55 +0000550 case Asse_AND: return "and";
551 case Asse_OR: return "or";
552 case Asse_XOR: return "xor";
553 case Asse_ANDN: return "andn";
sewardj97628592005-05-10 22:42:54 +0000554 case Asse_ADD8: return "paddb";
555 case Asse_ADD16: return "paddw";
556 case Asse_ADD32: return "paddd";
sewardj09717342005-05-05 21:34:02 +0000557 case Asse_ADD64: return "paddq";
sewardj5992bd02005-05-11 02:13:42 +0000558 case Asse_QADD8U: return "paddusb";
559 case Asse_QADD16U: return "paddusw";
560 case Asse_QADD8S: return "paddsb";
561 case Asse_QADD16S: return "paddsw";
sewardj97628592005-05-10 22:42:54 +0000562 case Asse_SUB8: return "psubb";
563 case Asse_SUB16: return "psubw";
564 case Asse_SUB32: return "psubd";
sewardj09717342005-05-05 21:34:02 +0000565 case Asse_SUB64: return "psubq";
sewardj97628592005-05-10 22:42:54 +0000566 case Asse_QSUB8U: return "psubusb";
567 case Asse_QSUB16U: return "psubusw";
568 case Asse_QSUB8S: return "psubsb";
569 case Asse_QSUB16S: return "psubsw";
sewardjadffcef2005-05-11 00:03:06 +0000570 case Asse_MUL16: return "pmullw";
571 case Asse_MULHI16U: return "pmulhuw";
572 case Asse_MULHI16S: return "pmulhw";
sewardj5992bd02005-05-11 02:13:42 +0000573 case Asse_AVG8U: return "pavgb";
574 case Asse_AVG16U: return "pavgw";
sewardjadffcef2005-05-11 00:03:06 +0000575 case Asse_MAX16S: return "pmaxw";
576 case Asse_MAX8U: return "pmaxub";
577 case Asse_MIN16S: return "pminw";
578 case Asse_MIN8U: return "pminub";
sewardj5992bd02005-05-11 02:13:42 +0000579 case Asse_CMPEQ8: return "pcmpeqb";
580 case Asse_CMPEQ16: return "pcmpeqw";
sewardj09717342005-05-05 21:34:02 +0000581 case Asse_CMPEQ32: return "pcmpeqd";
sewardj5992bd02005-05-11 02:13:42 +0000582 case Asse_CMPGT8S: return "pcmpgtb";
583 case Asse_CMPGT16S: return "pcmpgtw";
584 case Asse_CMPGT32S: return "pcmpgtd";
sewardjadffcef2005-05-11 00:03:06 +0000585 case Asse_SHL16: return "psllw";
586 case Asse_SHL32: return "pslld";
587 case Asse_SHL64: return "psllq";
588 case Asse_SHR16: return "psrlw";
589 case Asse_SHR32: return "psrld";
sewardj09717342005-05-05 21:34:02 +0000590 case Asse_SHR64: return "psrlq";
sewardjadffcef2005-05-11 00:03:06 +0000591 case Asse_SAR16: return "psraw";
592 case Asse_SAR32: return "psrad";
sewardj97628592005-05-10 22:42:54 +0000593 case Asse_PACKSSD: return "packssdw";
594 case Asse_PACKSSW: return "packsswb";
595 case Asse_PACKUSW: return "packuswb";
596 case Asse_UNPCKHB: return "punpckhb";
597 case Asse_UNPCKHW: return "punpckhw";
598 case Asse_UNPCKHD: return "punpckhd";
599 case Asse_UNPCKHQ: return "punpckhq";
600 case Asse_UNPCKLB: return "punpcklb";
601 case Asse_UNPCKLW: return "punpcklw";
602 case Asse_UNPCKLD: return "punpckld";
603 case Asse_UNPCKLQ: return "punpcklq";
sewardj1001dc42005-02-21 08:25:55 +0000604 default: vpanic("showAMD64SseOp");
605 }
606}
sewardj614b3fb2005-02-02 02:16:03 +0000607
sewardj813ce9e2005-02-04 21:16:48 +0000608AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
609 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
610 i->tag = Ain_Imm64;
611 i->Ain.Imm64.imm64 = imm64;
612 i->Ain.Imm64.dst = dst;
613 return i;
614}
sewardj614b3fb2005-02-02 02:16:03 +0000615AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
616 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
617 i->tag = Ain_Alu64R;
618 i->Ain.Alu64R.op = op;
619 i->Ain.Alu64R.src = src;
620 i->Ain.Alu64R.dst = dst;
621 return i;
622}
sewardjf67eadf2005-02-03 03:53:52 +0000623AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
624 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
625 i->tag = Ain_Alu64M;
626 i->Ain.Alu64M.op = op;
627 i->Ain.Alu64M.src = src;
628 i->Ain.Alu64M.dst = dst;
629 vassert(op != Aalu_MUL);
630 return i;
631}
sewardj501a3392005-05-11 15:37:50 +0000632AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
sewardj8258a8c2005-02-02 03:11:24 +0000633 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
634 i->tag = Ain_Sh64;
635 i->Ain.Sh64.op = op;
636 i->Ain.Sh64.src = src;
637 i->Ain.Sh64.dst = dst;
638 return i;
639}
sewardj501a3392005-05-11 15:37:50 +0000640AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
641 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
642 i->tag = Ain_Test64;
643 i->Ain.Test64.imm32 = imm32;
644 i->Ain.Test64.dst = dst;
sewardj05b3b6a2005-02-04 01:44:33 +0000645 return i;
646}
sewardj501a3392005-05-11 15:37:50 +0000647AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
sewardjd0a12df2005-02-10 02:07:43 +0000648 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
649 i->tag = Ain_Unary64;
650 i->Ain.Unary64.op = op;
651 i->Ain.Unary64.dst = dst;
652 return i;
653}
sewardj6ce1a232007-03-31 19:12:38 +0000654AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
655 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
656 i->tag = Ain_Lea64;
657 i->Ain.Lea64.am = am;
658 i->Ain.Lea64.dst = dst;
659 return i;
660}
sewardj9cc2bbf2011-06-05 17:56:03 +0000661AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
662 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
663 i->tag = Ain_Alu32R;
664 i->Ain.Alu32R.op = op;
665 i->Ain.Alu32R.src = src;
666 i->Ain.Alu32R.dst = dst;
667 switch (op) {
668 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
669 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
670 default: vassert(0);
671 }
672 return i;
673}
sewardj501a3392005-05-11 15:37:50 +0000674AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
sewardj9b967672005-02-08 11:13:09 +0000675 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
676 i->tag = Ain_MulL;
677 i->Ain.MulL.syned = syned;
sewardj9b967672005-02-08 11:13:09 +0000678 i->Ain.MulL.src = src;
sewardj9b967672005-02-08 11:13:09 +0000679 return i;
680}
sewardj7de0d3c2005-02-13 02:26:41 +0000681AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
682 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
683 i->tag = Ain_Div;
684 i->Ain.Div.syned = syned;
685 i->Ain.Div.sz = sz;
686 i->Ain.Div.src = src;
687 vassert(sz == 4 || sz == 8);
688 return i;
689}
sewardj1001dc42005-02-21 08:25:55 +0000690AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
691 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
692 i->tag = Ain_Push;
693 i->Ain.Push.src = src;
694 return i;
695}
sewardjcfe046e2013-01-17 14:23:53 +0000696AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
697 RetLoc rloc ) {
sewardj05b3b6a2005-02-04 01:44:33 +0000698 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
699 i->tag = Ain_Call;
700 i->Ain.Call.cond = cond;
701 i->Ain.Call.target = target;
702 i->Ain.Call.regparms = regparms;
sewardjcfe046e2013-01-17 14:23:53 +0000703 i->Ain.Call.rloc = rloc;
sewardj05b3b6a2005-02-04 01:44:33 +0000704 vassert(regparms >= 0 && regparms <= 6);
sewardj74142b82013-08-08 10:28:59 +0000705 vassert(is_sane_RetLoc(rloc));
sewardj05b3b6a2005-02-04 01:44:33 +0000706 return i;
707}
sewardjc6f970f2012-04-02 21:54:49 +0000708
709AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
710 AMD64CondCode cond, Bool toFastEP ) {
711 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
712 i->tag = Ain_XDirect;
713 i->Ain.XDirect.dstGA = dstGA;
714 i->Ain.XDirect.amRIP = amRIP;
715 i->Ain.XDirect.cond = cond;
716 i->Ain.XDirect.toFastEP = toFastEP;
sewardjf67eadf2005-02-03 03:53:52 +0000717 return i;
718}
sewardjc6f970f2012-04-02 21:54:49 +0000719AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
720 AMD64CondCode cond ) {
721 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
722 i->tag = Ain_XIndir;
723 i->Ain.XIndir.dstGA = dstGA;
724 i->Ain.XIndir.amRIP = amRIP;
725 i->Ain.XIndir.cond = cond;
726 return i;
727}
728AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
729 AMD64CondCode cond, IRJumpKind jk ) {
730 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
731 i->tag = Ain_XAssisted;
732 i->Ain.XAssisted.dstGA = dstGA;
733 i->Ain.XAssisted.amRIP = amRIP;
734 i->Ain.XAssisted.cond = cond;
735 i->Ain.XAssisted.jk = jk;
736 return i;
737}
738
sewardj05b3b6a2005-02-04 01:44:33 +0000739AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
740 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
741 i->tag = Ain_CMov64;
742 i->Ain.CMov64.cond = cond;
743 i->Ain.CMov64.src = src;
744 i->Ain.CMov64.dst = dst;
745 vassert(cond != Acc_ALWAYS);
746 return i;
747}
sewardjca257bc2010-09-08 08:34:52 +0000748AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
749 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
750 i->tag = Ain_MovxLQ;
751 i->Ain.MovxLQ.syned = syned;
752 i->Ain.MovxLQ.src = src;
753 i->Ain.MovxLQ.dst = dst;
sewardjf67eadf2005-02-03 03:53:52 +0000754 return i;
755}
sewardj8258a8c2005-02-02 03:11:24 +0000756AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
757 AMD64AMode* src, HReg dst ) {
758 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
759 i->tag = Ain_LoadEX;
760 i->Ain.LoadEX.szSmall = szSmall;
761 i->Ain.LoadEX.syned = syned;
762 i->Ain.LoadEX.src = src;
763 i->Ain.LoadEX.dst = dst;
764 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
765 return i;
766}
sewardjf67eadf2005-02-03 03:53:52 +0000767AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
768 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
769 i->tag = Ain_Store;
770 i->Ain.Store.sz = sz;
771 i->Ain.Store.src = src;
772 i->Ain.Store.dst = dst;
773 vassert(sz == 1 || sz == 2 || sz == 4);
774 return i;
775}
sewardja5bd0af2005-03-24 20:40:12 +0000776AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
777 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
778 i->tag = Ain_Set64;
779 i->Ain.Set64.cond = cond;
780 i->Ain.Set64.dst = dst;
781 return i;
782}
sewardjf53b7352005-04-06 20:01:56 +0000783AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
784 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
785 i->tag = Ain_Bsfr64;
786 i->Ain.Bsfr64.isFwds = isFwds;
787 i->Ain.Bsfr64.src = src;
788 i->Ain.Bsfr64.dst = dst;
789 return i;
790}
sewardje9d8a262009-07-01 08:06:34 +0000791AMD64Instr* AMD64Instr_MFence ( void ) {
sewardj25a85812005-05-08 23:03:48 +0000792 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
793 i->tag = Ain_MFence;
794 return i;
795}
sewardje9d8a262009-07-01 08:06:34 +0000796AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
797 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
798 i->tag = Ain_ACAS;
799 i->Ain.ACAS.addr = addr;
800 i->Ain.ACAS.sz = sz;
801 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
802 return i;
803}
804AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
805 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
806 i->tag = Ain_DACAS;
807 i->Ain.DACAS.addr = addr;
808 i->Ain.DACAS.sz = sz;
809 vassert(sz == 8 || sz == 4);
810 return i;
811}
812
sewardj25a85812005-05-08 23:03:48 +0000813AMD64Instr* AMD64Instr_A87Free ( Int nregs )
814{
815 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
816 i->tag = Ain_A87Free;
817 i->Ain.A87Free.nregs = nregs;
818 vassert(nregs >= 1 && nregs <= 7);
819 return i;
820}
sewardjd15b5972010-06-27 09:06:34 +0000821AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
sewardj25a85812005-05-08 23:03:48 +0000822{
823 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
824 i->tag = Ain_A87PushPop;
825 i->Ain.A87PushPop.addr = addr;
826 i->Ain.A87PushPop.isPush = isPush;
sewardjd15b5972010-06-27 09:06:34 +0000827 i->Ain.A87PushPop.szB = szB;
828 vassert(szB == 8 || szB == 4);
sewardj25a85812005-05-08 23:03:48 +0000829 return i;
830}
831AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
832{
833 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
834 i->tag = Ain_A87FpOp;
835 i->Ain.A87FpOp.op = op;
836 return i;
837}
838AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
839{
840 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
841 i->tag = Ain_A87LdCW;
842 i->Ain.A87LdCW.addr = addr;
sewardjd0a12df2005-02-10 02:07:43 +0000843 return i;
844}
sewardjf4c803b2006-09-11 11:07:34 +0000845AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
846{
847 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
848 i->tag = Ain_A87StSW;
849 i->Ain.A87StSW.addr = addr;
850 return i;
851}
sewardj1a01e652005-02-23 11:39:21 +0000852AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
853 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
854 i->tag = Ain_LdMXCSR;
855 i->Ain.LdMXCSR.addr = addr;
856 return i;
857}
sewardj18303862005-02-21 12:36:54 +0000858AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
859 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
860 i->tag = Ain_SseUComIS;
sewardj03ccf852005-03-21 02:47:42 +0000861 i->Ain.SseUComIS.sz = toUChar(sz);
sewardj18303862005-02-21 12:36:54 +0000862 i->Ain.SseUComIS.srcL = srcL;
863 i->Ain.SseUComIS.srcR = srcR;
864 i->Ain.SseUComIS.dst = dst;
865 vassert(sz == 4 || sz == 8);
866 return i;
867}
sewardj1a01e652005-02-23 11:39:21 +0000868AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
869 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
870 i->tag = Ain_SseSI2SF;
sewardj03ccf852005-03-21 02:47:42 +0000871 i->Ain.SseSI2SF.szS = toUChar(szS);
872 i->Ain.SseSI2SF.szD = toUChar(szD);
sewardj1a01e652005-02-23 11:39:21 +0000873 i->Ain.SseSI2SF.src = src;
874 i->Ain.SseSI2SF.dst = dst;
875 vassert(szS == 4 || szS == 8);
876 vassert(szD == 4 || szD == 8);
877 return i;
878}
879AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
880 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
881 i->tag = Ain_SseSF2SI;
sewardj03ccf852005-03-21 02:47:42 +0000882 i->Ain.SseSF2SI.szS = toUChar(szS);
883 i->Ain.SseSF2SI.szD = toUChar(szD);
sewardj1a01e652005-02-23 11:39:21 +0000884 i->Ain.SseSF2SI.src = src;
885 i->Ain.SseSF2SI.dst = dst;
886 vassert(szS == 4 || szS == 8);
887 vassert(szD == 4 || szD == 8);
888 return i;
889}
sewardj8d965312005-02-25 02:48:47 +0000890AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
891{
892 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
893 i->tag = Ain_SseSDSS;
894 i->Ain.SseSDSS.from64 = from64;
895 i->Ain.SseSDSS.src = src;
896 i->Ain.SseSDSS.dst = dst;
897 return i;
898}
sewardj18303862005-02-21 12:36:54 +0000899AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
900 HReg reg, AMD64AMode* addr ) {
sewardj1001dc42005-02-21 08:25:55 +0000901 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
902 i->tag = Ain_SseLdSt;
903 i->Ain.SseLdSt.isLoad = isLoad;
sewardj03ccf852005-03-21 02:47:42 +0000904 i->Ain.SseLdSt.sz = toUChar(sz);
sewardj1001dc42005-02-21 08:25:55 +0000905 i->Ain.SseLdSt.reg = reg;
906 i->Ain.SseLdSt.addr = addr;
sewardj18303862005-02-21 12:36:54 +0000907 vassert(sz == 4 || sz == 8 || sz == 16);
sewardj1001dc42005-02-21 08:25:55 +0000908 return i;
909}
910AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
911{
912 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
913 i->tag = Ain_SseLdzLO;
914 i->Ain.SseLdzLO.sz = sz;
915 i->Ain.SseLdzLO.reg = reg;
916 i->Ain.SseLdzLO.addr = addr;
917 vassert(sz == 4 || sz == 8);
918 return i;
919}
sewardj8d965312005-02-25 02:48:47 +0000920AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
921 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
922 i->tag = Ain_Sse32Fx4;
923 i->Ain.Sse32Fx4.op = op;
924 i->Ain.Sse32Fx4.src = src;
925 i->Ain.Sse32Fx4.dst = dst;
926 vassert(op != Asse_MOV);
927 return i;
928}
929AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
930 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
931 i->tag = Ain_Sse32FLo;
932 i->Ain.Sse32FLo.op = op;
933 i->Ain.Sse32FLo.src = src;
934 i->Ain.Sse32FLo.dst = dst;
935 vassert(op != Asse_MOV);
936 return i;
937}
sewardj4c328cf2005-05-05 12:05:54 +0000938AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
939 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
940 i->tag = Ain_Sse64Fx2;
941 i->Ain.Sse64Fx2.op = op;
942 i->Ain.Sse64Fx2.src = src;
943 i->Ain.Sse64Fx2.dst = dst;
944 vassert(op != Asse_MOV);
945 return i;
946}
sewardj1001dc42005-02-21 08:25:55 +0000947AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
948 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
949 i->tag = Ain_Sse64FLo;
950 i->Ain.Sse64FLo.op = op;
951 i->Ain.Sse64FLo.src = src;
952 i->Ain.Sse64FLo.dst = dst;
953 vassert(op != Asse_MOV);
954 return i;
955}
956AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
957 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
958 i->tag = Ain_SseReRg;
959 i->Ain.SseReRg.op = op;
960 i->Ain.SseReRg.src = re;
961 i->Ain.SseReRg.dst = rg;
962 return i;
963}
sewardj8d965312005-02-25 02:48:47 +0000964AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
965 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
966 i->tag = Ain_SseCMov;
967 i->Ain.SseCMov.cond = cond;
968 i->Ain.SseCMov.src = src;
969 i->Ain.SseCMov.dst = dst;
970 vassert(cond != Acc_ALWAYS);
971 return i;
972}
sewardj09717342005-05-05 21:34:02 +0000973AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
974 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
975 i->tag = Ain_SseShuf;
976 i->Ain.SseShuf.order = order;
977 i->Ain.SseShuf.src = src;
978 i->Ain.SseShuf.dst = dst;
979 vassert(order >= 0 && order <= 0xFF);
980 return i;
981}
sewardj3616a2e2012-05-27 16:18:13 +0000982//uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
983//uu HReg reg, AMD64AMode* addr ) {
984//uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
985//uu i->tag = Ain_AvxLdSt;
986//uu i->Ain.AvxLdSt.isLoad = isLoad;
987//uu i->Ain.AvxLdSt.reg = reg;
988//uu i->Ain.AvxLdSt.addr = addr;
989//uu return i;
990//uu }
991//uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
992//uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
993//uu i->tag = Ain_AvxReRg;
994//uu i->Ain.AvxReRg.op = op;
995//uu i->Ain.AvxReRg.src = re;
996//uu i->Ain.AvxReRg.dst = rg;
997//uu return i;
998//uu }
sewardjc6f970f2012-04-02 21:54:49 +0000999AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1000 AMD64AMode* amFailAddr ) {
1001 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1002 i->tag = Ain_EvCheck;
1003 i->Ain.EvCheck.amCounter = amCounter;
1004 i->Ain.EvCheck.amFailAddr = amFailAddr;
1005 return i;
1006}
1007AMD64Instr* AMD64Instr_ProfInc ( void ) {
1008 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1009 i->tag = Ain_ProfInc;
1010 return i;
1011}
sewardjc33671d2005-02-01 20:30:00 +00001012
floriand8c64e02014-10-08 08:54:44 +00001013void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
sewardjc33671d2005-02-01 20:30:00 +00001014{
cerion92b64362005-12-13 12:02:26 +00001015 vassert(mode64 == True);
sewardjc33671d2005-02-01 20:30:00 +00001016 switch (i->tag) {
sewardj813ce9e2005-02-04 21:16:48 +00001017 case Ain_Imm64:
sewardj1b8d58e2005-02-05 14:34:18 +00001018 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
sewardj813ce9e2005-02-04 21:16:48 +00001019 ppHRegAMD64(i->Ain.Imm64.dst);
1020 return;
sewardj614b3fb2005-02-02 02:16:03 +00001021 case Ain_Alu64R:
1022 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1023 ppAMD64RMI(i->Ain.Alu64R.src);
1024 vex_printf(",");
1025 ppHRegAMD64(i->Ain.Alu64R.dst);
1026 return;
sewardjf67eadf2005-02-03 03:53:52 +00001027 case Ain_Alu64M:
1028 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1029 ppAMD64RI(i->Ain.Alu64M.src);
1030 vex_printf(",");
1031 ppAMD64AMode(i->Ain.Alu64M.dst);
1032 return;
sewardj8258a8c2005-02-02 03:11:24 +00001033 case Ain_Sh64:
sewardj1b8d58e2005-02-05 14:34:18 +00001034 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
sewardj8258a8c2005-02-02 03:11:24 +00001035 if (i->Ain.Sh64.src == 0)
1036 vex_printf("%%cl,");
1037 else
sewardj03ccf852005-03-21 02:47:42 +00001038 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
sewardj501a3392005-05-11 15:37:50 +00001039 ppHRegAMD64(i->Ain.Sh64.dst);
sewardj8258a8c2005-02-02 03:11:24 +00001040 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001041 case Ain_Test64:
sewardj501a3392005-05-11 15:37:50 +00001042 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1043 ppHRegAMD64(i->Ain.Test64.dst);
sewardj05b3b6a2005-02-04 01:44:33 +00001044 return;
sewardjd0a12df2005-02-10 02:07:43 +00001045 case Ain_Unary64:
sewardjb5220772005-04-27 11:53:23 +00001046 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
sewardj501a3392005-05-11 15:37:50 +00001047 ppHRegAMD64(i->Ain.Unary64.dst);
sewardjd0a12df2005-02-10 02:07:43 +00001048 return;
sewardj6ce1a232007-03-31 19:12:38 +00001049 case Ain_Lea64:
1050 vex_printf("leaq ");
1051 ppAMD64AMode(i->Ain.Lea64.am);
1052 vex_printf(",");
1053 ppHRegAMD64(i->Ain.Lea64.dst);
1054 return;
sewardj9cc2bbf2011-06-05 17:56:03 +00001055 case Ain_Alu32R:
1056 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1057 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1058 vex_printf(",");
1059 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1060 return;
sewardj9b967672005-02-08 11:13:09 +00001061 case Ain_MulL:
sewardj501a3392005-05-11 15:37:50 +00001062 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
sewardj9b967672005-02-08 11:13:09 +00001063 ppAMD64RM(i->Ain.MulL.src);
1064 return;
sewardj7de0d3c2005-02-13 02:26:41 +00001065 case Ain_Div:
1066 vex_printf("%cdiv%s ",
1067 i->Ain.Div.syned ? 's' : 'u',
1068 showAMD64ScalarSz(i->Ain.Div.sz));
1069 ppAMD64RM(i->Ain.Div.src);
1070 return;
sewardj1001dc42005-02-21 08:25:55 +00001071 case Ain_Push:
1072 vex_printf("pushq ");
1073 ppAMD64RMI(i->Ain.Push.src);
1074 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001075 case Ain_Call:
sewardjcfe046e2013-01-17 14:23:53 +00001076 vex_printf("call%s[%d,",
sewardj05b3b6a2005-02-04 01:44:33 +00001077 i->Ain.Call.cond==Acc_ALWAYS
1078 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1079 i->Ain.Call.regparms );
sewardjcfe046e2013-01-17 14:23:53 +00001080 ppRetLoc(i->Ain.Call.rloc);
1081 vex_printf("] 0x%llx", i->Ain.Call.target);
sewardj05b3b6a2005-02-04 01:44:33 +00001082 break;
sewardjc6f970f2012-04-02 21:54:49 +00001083
1084 case Ain_XDirect:
1085 vex_printf("(xDirect) ");
1086 vex_printf("if (%%rflags.%s) { ",
1087 showAMD64CondCode(i->Ain.XDirect.cond));
1088 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1089 vex_printf("movq %%r11,");
1090 ppAMD64AMode(i->Ain.XDirect.amRIP);
1091 vex_printf("; ");
1092 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1093 i->Ain.XDirect.toFastEP ? "fast" : "slow");
sewardjf67eadf2005-02-03 03:53:52 +00001094 return;
sewardjc6f970f2012-04-02 21:54:49 +00001095 case Ain_XIndir:
1096 vex_printf("(xIndir) ");
1097 vex_printf("if (%%rflags.%s) { ",
1098 showAMD64CondCode(i->Ain.XIndir.cond));
1099 vex_printf("movq ");
1100 ppHRegAMD64(i->Ain.XIndir.dstGA);
1101 vex_printf(",");
1102 ppAMD64AMode(i->Ain.XIndir.amRIP);
1103 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1104 return;
1105 case Ain_XAssisted:
1106 vex_printf("(xAssisted) ");
1107 vex_printf("if (%%rflags.%s) { ",
1108 showAMD64CondCode(i->Ain.XAssisted.cond));
1109 vex_printf("movq ");
1110 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1111 vex_printf(",");
1112 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1113 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1114 (Int)i->Ain.XAssisted.jk);
1115 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1116 return;
1117
sewardj05b3b6a2005-02-04 01:44:33 +00001118 case Ain_CMov64:
1119 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1120 ppAMD64RM(i->Ain.CMov64.src);
1121 vex_printf(",");
1122 ppHRegAMD64(i->Ain.CMov64.dst);
1123 return;
sewardjca257bc2010-09-08 08:34:52 +00001124 case Ain_MovxLQ:
1125 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1126 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
sewardjf67eadf2005-02-03 03:53:52 +00001127 vex_printf(",");
sewardjca257bc2010-09-08 08:34:52 +00001128 ppHRegAMD64(i->Ain.MovxLQ.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001129 return;
sewardj8258a8c2005-02-02 03:11:24 +00001130 case Ain_LoadEX:
sewardj549e0642005-02-05 12:00:14 +00001131 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1132 vex_printf("movl ");
1133 ppAMD64AMode(i->Ain.LoadEX.src);
1134 vex_printf(",");
1135 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1136 } else {
1137 vex_printf("mov%c%cq ",
1138 i->Ain.LoadEX.syned ? 's' : 'z',
1139 i->Ain.LoadEX.szSmall==1
1140 ? 'b'
1141 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1142 ppAMD64AMode(i->Ain.LoadEX.src);
1143 vex_printf(",");
1144 ppHRegAMD64(i->Ain.LoadEX.dst);
1145 }
sewardj8258a8c2005-02-02 03:11:24 +00001146 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001147 case Ain_Store:
1148 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1149 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1150 ppHRegAMD64(i->Ain.Store.src);
1151 vex_printf(",");
1152 ppAMD64AMode(i->Ain.Store.dst);
1153 return;
sewardja5bd0af2005-03-24 20:40:12 +00001154 case Ain_Set64:
1155 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1156 ppHRegAMD64(i->Ain.Set64.dst);
1157 return;
sewardjf53b7352005-04-06 20:01:56 +00001158 case Ain_Bsfr64:
1159 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1160 ppHRegAMD64(i->Ain.Bsfr64.src);
1161 vex_printf(",");
1162 ppHRegAMD64(i->Ain.Bsfr64.dst);
1163 return;
sewardjd0a12df2005-02-10 02:07:43 +00001164 case Ain_MFence:
1165 vex_printf("mfence" );
1166 return;
sewardje9d8a262009-07-01 08:06:34 +00001167 case Ain_ACAS:
1168 vex_printf("lock cmpxchg%c ",
1169 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1170 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1171 vex_printf("{%%rax->%%rbx},");
1172 ppAMD64AMode(i->Ain.ACAS.addr);
1173 return;
1174 case Ain_DACAS:
1175 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1176 (Int)(2 * i->Ain.DACAS.sz));
1177 ppAMD64AMode(i->Ain.DACAS.addr);
1178 return;
sewardj25a85812005-05-08 23:03:48 +00001179 case Ain_A87Free:
sewardjf4c803b2006-09-11 11:07:34 +00001180 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
sewardj25a85812005-05-08 23:03:48 +00001181 break;
1182 case Ain_A87PushPop:
sewardjd15b5972010-06-27 09:06:34 +00001183 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1184 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
sewardj25a85812005-05-08 23:03:48 +00001185 ppAMD64AMode(i->Ain.A87PushPop.addr);
1186 break;
1187 case Ain_A87FpOp:
sewardjf4c803b2006-09-11 11:07:34 +00001188 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
sewardj25a85812005-05-08 23:03:48 +00001189 break;
1190 case Ain_A87LdCW:
1191 vex_printf("fldcw ");
1192 ppAMD64AMode(i->Ain.A87LdCW.addr);
1193 break;
sewardjf4c803b2006-09-11 11:07:34 +00001194 case Ain_A87StSW:
1195 vex_printf("fstsw ");
1196 ppAMD64AMode(i->Ain.A87StSW.addr);
1197 break;
sewardj1a01e652005-02-23 11:39:21 +00001198 case Ain_LdMXCSR:
1199 vex_printf("ldmxcsr ");
1200 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1201 break;
sewardj18303862005-02-21 12:36:54 +00001202 case Ain_SseUComIS:
1203 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1204 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1205 vex_printf(",");
1206 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1207 vex_printf(" ; pushfq ; popq ");
1208 ppHRegAMD64(i->Ain.SseUComIS.dst);
1209 break;
sewardj1a01e652005-02-23 11:39:21 +00001210 case Ain_SseSI2SF:
1211 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1212 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1213 (i->Ain.SseSI2SF.src);
1214 vex_printf(",");
1215 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1216 break;
1217 case Ain_SseSF2SI:
1218 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1219 ppHRegAMD64(i->Ain.SseSF2SI.src);
1220 vex_printf(",");
1221 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1222 (i->Ain.SseSF2SI.dst);
1223 break;
sewardj8d965312005-02-25 02:48:47 +00001224 case Ain_SseSDSS:
1225 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1226 ppHRegAMD64(i->Ain.SseSDSS.src);
1227 vex_printf(",");
1228 ppHRegAMD64(i->Ain.SseSDSS.dst);
1229 break;
sewardj1001dc42005-02-21 08:25:55 +00001230 case Ain_SseLdSt:
sewardj18303862005-02-21 12:36:54 +00001231 switch (i->Ain.SseLdSt.sz) {
1232 case 4: vex_printf("movss "); break;
1233 case 8: vex_printf("movsd "); break;
1234 case 16: vex_printf("movups "); break;
1235 default: vassert(0);
1236 }
sewardj1001dc42005-02-21 08:25:55 +00001237 if (i->Ain.SseLdSt.isLoad) {
1238 ppAMD64AMode(i->Ain.SseLdSt.addr);
1239 vex_printf(",");
1240 ppHRegAMD64(i->Ain.SseLdSt.reg);
1241 } else {
1242 ppHRegAMD64(i->Ain.SseLdSt.reg);
1243 vex_printf(",");
1244 ppAMD64AMode(i->Ain.SseLdSt.addr);
1245 }
1246 return;
1247 case Ain_SseLdzLO:
1248 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1249 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1250 vex_printf(",");
1251 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1252 return;
sewardj8d965312005-02-25 02:48:47 +00001253 case Ain_Sse32Fx4:
1254 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1255 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1256 vex_printf(",");
1257 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1258 return;
1259 case Ain_Sse32FLo:
1260 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1261 ppHRegAMD64(i->Ain.Sse32FLo.src);
1262 vex_printf(",");
1263 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1264 return;
sewardj4c328cf2005-05-05 12:05:54 +00001265 case Ain_Sse64Fx2:
1266 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1267 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1268 vex_printf(",");
1269 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1270 return;
sewardj1001dc42005-02-21 08:25:55 +00001271 case Ain_Sse64FLo:
1272 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1273 ppHRegAMD64(i->Ain.Sse64FLo.src);
1274 vex_printf(",");
1275 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1276 return;
1277 case Ain_SseReRg:
1278 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1279 ppHRegAMD64(i->Ain.SseReRg.src);
1280 vex_printf(",");
1281 ppHRegAMD64(i->Ain.SseReRg.dst);
1282 return;
sewardj8d965312005-02-25 02:48:47 +00001283 case Ain_SseCMov:
1284 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1285 ppHRegAMD64(i->Ain.SseCMov.src);
1286 vex_printf(",");
1287 ppHRegAMD64(i->Ain.SseCMov.dst);
1288 return;
sewardj09717342005-05-05 21:34:02 +00001289 case Ain_SseShuf:
1290 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
1291 ppHRegAMD64(i->Ain.SseShuf.src);
1292 vex_printf(",");
1293 ppHRegAMD64(i->Ain.SseShuf.dst);
1294 return;
sewardj3616a2e2012-05-27 16:18:13 +00001295 //uu case Ain_AvxLdSt:
1296 //uu vex_printf("vmovups ");
1297 //uu if (i->Ain.AvxLdSt.isLoad) {
1298 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1299 //uu vex_printf(",");
1300 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1301 //uu } else {
1302 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1303 //uu vex_printf(",");
1304 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1305 //uu }
1306 //uu return;
1307 //uu case Ain_AvxReRg:
1308 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1309 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1310 //uu vex_printf(",");
1311 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1312 //uu return;
sewardjc6f970f2012-04-02 21:54:49 +00001313 case Ain_EvCheck:
1314 vex_printf("(evCheck) decl ");
1315 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1316 vex_printf("; jns nofail; jmp *");
1317 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1318 vex_printf("; nofail:");
1319 return;
1320 case Ain_ProfInc:
1321 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1322 return;
sewardjc33671d2005-02-01 20:30:00 +00001323 default:
1324 vpanic("ppAMD64Instr");
1325 }
1326}
1327
1328/* --------- Helpers for register allocation. --------- */
1329
floriand8c64e02014-10-08 08:54:44 +00001330void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
sewardjc33671d2005-02-01 20:30:00 +00001331{
sewardj1001dc42005-02-21 08:25:55 +00001332 Bool unary;
cerion92b64362005-12-13 12:02:26 +00001333 vassert(mode64 == True);
sewardjc33671d2005-02-01 20:30:00 +00001334 initHRegUsage(u);
1335 switch (i->tag) {
sewardj813ce9e2005-02-04 21:16:48 +00001336 case Ain_Imm64:
1337 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1338 return;
sewardjf67eadf2005-02-03 03:53:52 +00001339 case Ain_Alu64R:
1340 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1341 if (i->Ain.Alu64R.op == Aalu_MOV) {
1342 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1343 return;
1344 }
1345 if (i->Ain.Alu64R.op == Aalu_CMP) {
1346 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1347 return;
1348 }
1349 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1350 return;
1351 case Ain_Alu64M:
1352 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1353 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1354 return;
1355 case Ain_Sh64:
sewardj501a3392005-05-11 15:37:50 +00001356 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001357 if (i->Ain.Sh64.src == 0)
1358 addHRegUse(u, HRmRead, hregAMD64_RCX());
1359 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001360 case Ain_Test64:
sewardj501a3392005-05-11 15:37:50 +00001361 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
sewardj05b3b6a2005-02-04 01:44:33 +00001362 return;
sewardjd0a12df2005-02-10 02:07:43 +00001363 case Ain_Unary64:
sewardj501a3392005-05-11 15:37:50 +00001364 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
sewardjd0a12df2005-02-10 02:07:43 +00001365 return;
sewardj6ce1a232007-03-31 19:12:38 +00001366 case Ain_Lea64:
1367 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1368 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1369 return;
sewardj9cc2bbf2011-06-05 17:56:03 +00001370 case Ain_Alu32R:
1371 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1372 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1373 if (i->Ain.Alu32R.op == Aalu_CMP) {
1374 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1375 return;
1376 }
1377 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1378 return;
sewardj9b967672005-02-08 11:13:09 +00001379 case Ain_MulL:
1380 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1381 addHRegUse(u, HRmModify, hregAMD64_RAX());
1382 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1383 return;
sewardj7de0d3c2005-02-13 02:26:41 +00001384 case Ain_Div:
1385 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1386 addHRegUse(u, HRmModify, hregAMD64_RAX());
1387 addHRegUse(u, HRmModify, hregAMD64_RDX());
1388 return;
sewardj1001dc42005-02-21 08:25:55 +00001389 case Ain_Push:
1390 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1391 addHRegUse(u, HRmModify, hregAMD64_RSP());
1392 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001393 case Ain_Call:
1394 /* This is a bit subtle. */
1395 /* First off, claim it trashes all the caller-saved regs
1396 which fall within the register allocator's jurisdiction.
1397 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
sewardj3616a2e2012-05-27 16:18:13 +00001398 and all the xmm registers.
sewardj05b3b6a2005-02-04 01:44:33 +00001399 */
1400 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1401 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1402 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1403 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1404 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1405 addHRegUse(u, HRmWrite, hregAMD64_R8());
1406 addHRegUse(u, HRmWrite, hregAMD64_R9());
1407 addHRegUse(u, HRmWrite, hregAMD64_R10());
1408 addHRegUse(u, HRmWrite, hregAMD64_R11());
sewardj1001dc42005-02-21 08:25:55 +00001409 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1410 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
sewardj1001dc42005-02-21 08:25:55 +00001411 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1412 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1413 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1414 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1415 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1416 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1417 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1418 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1419 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1420 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
sewardj05b3b6a2005-02-04 01:44:33 +00001421
1422 /* Now we have to state any parameter-carrying registers
1423 which might be read. This depends on the regparmness. */
1424 switch (i->Ain.Call.regparms) {
1425 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1426 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1427 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1428 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1429 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1430 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1431 case 0: break;
1432 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1433 }
1434 /* Finally, there is the issue that the insn trashes a
1435 register because the literal target address has to be
1436 loaded into a register. Fortunately, r11 is stated in the
1437 ABI as a scratch register, and so seems a suitable victim. */
1438 addHRegUse(u, HRmWrite, hregAMD64_R11());
1439 /* Upshot of this is that the assembler really must use r11,
1440 and no other, as a destination temporary. */
1441 return;
sewardjc6f970f2012-04-02 21:54:49 +00001442 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1443 conditionally exit the block. Hence we only need to list (1)
1444 the registers that they read, and (2) the registers that they
1445 write in the case where the block is not exited. (2) is
1446 empty, hence only (1) is relevant here. */
1447 case Ain_XDirect:
1448 /* Don't bother to mention the write to %r11, since it is not
1449 available to the allocator. */
1450 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1451 return;
1452 case Ain_XIndir:
1453 /* Ditto re %r11 */
1454 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1455 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1456 return;
1457 case Ain_XAssisted:
1458 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1459 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1460 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
sewardjf67eadf2005-02-03 03:53:52 +00001461 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001462 case Ain_CMov64:
1463 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
1464 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1465 return;
sewardjca257bc2010-09-08 08:34:52 +00001466 case Ain_MovxLQ:
1467 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1468 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001469 return;
1470 case Ain_LoadEX:
1471 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1472 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1473 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001474 case Ain_Store:
1475 addHRegUse(u, HRmRead, i->Ain.Store.src);
1476 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1477 return;
sewardja5bd0af2005-03-24 20:40:12 +00001478 case Ain_Set64:
1479 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1480 return;
sewardjf53b7352005-04-06 20:01:56 +00001481 case Ain_Bsfr64:
1482 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1483 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1484 return;
sewardjd0a12df2005-02-10 02:07:43 +00001485 case Ain_MFence:
1486 return;
sewardje9d8a262009-07-01 08:06:34 +00001487 case Ain_ACAS:
1488 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1489 addHRegUse(u, HRmRead, hregAMD64_RBX());
1490 addHRegUse(u, HRmModify, hregAMD64_RAX());
1491 return;
1492 case Ain_DACAS:
1493 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1494 addHRegUse(u, HRmRead, hregAMD64_RCX());
1495 addHRegUse(u, HRmRead, hregAMD64_RBX());
1496 addHRegUse(u, HRmModify, hregAMD64_RDX());
1497 addHRegUse(u, HRmModify, hregAMD64_RAX());
1498 return;
sewardj25a85812005-05-08 23:03:48 +00001499 case Ain_A87Free:
1500 return;
1501 case Ain_A87PushPop:
1502 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1503 return;
1504 case Ain_A87FpOp:
1505 return;
1506 case Ain_A87LdCW:
1507 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1508 return;
sewardjf4c803b2006-09-11 11:07:34 +00001509 case Ain_A87StSW:
1510 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1511 return;
sewardj1a01e652005-02-23 11:39:21 +00001512 case Ain_LdMXCSR:
1513 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1514 return;
sewardj18303862005-02-21 12:36:54 +00001515 case Ain_SseUComIS:
1516 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1517 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1518 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1519 return;
sewardj1a01e652005-02-23 11:39:21 +00001520 case Ain_SseSI2SF:
1521 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1522 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1523 return;
1524 case Ain_SseSF2SI:
1525 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1526 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1527 return;
sewardj8d965312005-02-25 02:48:47 +00001528 case Ain_SseSDSS:
1529 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1530 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1531 return;
sewardj1001dc42005-02-21 08:25:55 +00001532 case Ain_SseLdSt:
1533 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1534 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1535 i->Ain.SseLdSt.reg);
1536 return;
1537 case Ain_SseLdzLO:
1538 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1539 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1540 return;
sewardj8d965312005-02-25 02:48:47 +00001541 case Ain_Sse32Fx4:
1542 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
sewardj03ccf852005-03-21 02:47:42 +00001543 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1544 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1545 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
sewardj8d965312005-02-25 02:48:47 +00001546 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1547 addHRegUse(u, unary ? HRmWrite : HRmModify,
1548 i->Ain.Sse32Fx4.dst);
1549 return;
1550 case Ain_Sse32FLo:
1551 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
sewardj03ccf852005-03-21 02:47:42 +00001552 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1553 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1554 || i->Ain.Sse32FLo.op == Asse_SQRTF );
sewardj8d965312005-02-25 02:48:47 +00001555 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1556 addHRegUse(u, unary ? HRmWrite : HRmModify,
1557 i->Ain.Sse32FLo.dst);
1558 return;
sewardj4c328cf2005-05-05 12:05:54 +00001559 case Ain_Sse64Fx2:
1560 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
sewardjca673ab2005-05-11 10:03:08 +00001561 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1562 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1563 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
sewardj4c328cf2005-05-05 12:05:54 +00001564 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1565 addHRegUse(u, unary ? HRmWrite : HRmModify,
1566 i->Ain.Sse64Fx2.dst);
1567 return;
sewardj1001dc42005-02-21 08:25:55 +00001568 case Ain_Sse64FLo:
1569 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
sewardj03ccf852005-03-21 02:47:42 +00001570 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1571 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1572 || i->Ain.Sse64FLo.op == Asse_SQRTF );
sewardj1001dc42005-02-21 08:25:55 +00001573 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1574 addHRegUse(u, unary ? HRmWrite : HRmModify,
1575 i->Ain.Sse64FLo.dst);
1576 return;
1577 case Ain_SseReRg:
sewardjac530442005-05-11 16:13:37 +00001578 if ( (i->Ain.SseReRg.op == Asse_XOR
1579 || i->Ain.SseReRg.op == Asse_CMPEQ32)
florian79efdc62013-02-11 00:47:35 +00001580 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
sewardjac530442005-05-11 16:13:37 +00001581 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1582 r,r' as a write of a value to r, and independent of any
1583 previous value in r */
sewardj1001dc42005-02-21 08:25:55 +00001584 /* (as opposed to a rite of passage :-) */
1585 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1586 } else {
1587 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1588 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1589 ? HRmWrite : HRmModify,
1590 i->Ain.SseReRg.dst);
1591 }
1592 return;
sewardj8d965312005-02-25 02:48:47 +00001593 case Ain_SseCMov:
1594 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1595 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1596 return;
sewardj09717342005-05-05 21:34:02 +00001597 case Ain_SseShuf:
1598 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1599 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1600 return;
sewardj3616a2e2012-05-27 16:18:13 +00001601 //uu case Ain_AvxLdSt:
1602 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1603 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1604 //uu i->Ain.AvxLdSt.reg);
1605 //uu return;
1606 //uu case Ain_AvxReRg:
1607 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1608 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1609 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1610 //uu /* See comments on the case for Ain_SseReRg. */
1611 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1612 //uu } else {
1613 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1614 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1615 //uu ? HRmWrite : HRmModify,
1616 //uu i->Ain.AvxReRg.dst);
1617 //uu }
1618 //uu return;
sewardjc6f970f2012-04-02 21:54:49 +00001619 case Ain_EvCheck:
1620 /* We expect both amodes only to mention %rbp, so this is in
1621 fact pointless, since %rbp isn't allocatable, but anyway.. */
1622 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1623 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1624 return;
1625 case Ain_ProfInc:
1626 addHRegUse(u, HRmWrite, hregAMD64_R11());
1627 return;
sewardjc33671d2005-02-01 20:30:00 +00001628 default:
cerion92b64362005-12-13 12:02:26 +00001629 ppAMD64Instr(i, mode64);
sewardjc33671d2005-02-01 20:30:00 +00001630 vpanic("getRegUsage_AMD64Instr");
1631 }
1632}
sewardjf67eadf2005-02-03 03:53:52 +00001633
1634/* local helper */
sewardj25a85812005-05-08 23:03:48 +00001635static inline void mapReg(HRegRemap* m, HReg* r)
sewardjf67eadf2005-02-03 03:53:52 +00001636{
1637 *r = lookupHRegRemap(m, *r);
1638}
sewardjc33671d2005-02-01 20:30:00 +00001639
cerion92b64362005-12-13 12:02:26 +00001640void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
sewardjc33671d2005-02-01 20:30:00 +00001641{
cerion92b64362005-12-13 12:02:26 +00001642 vassert(mode64 == True);
sewardjc33671d2005-02-01 20:30:00 +00001643 switch (i->tag) {
sewardj813ce9e2005-02-04 21:16:48 +00001644 case Ain_Imm64:
1645 mapReg(m, &i->Ain.Imm64.dst);
1646 return;
sewardjf67eadf2005-02-03 03:53:52 +00001647 case Ain_Alu64R:
1648 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1649 mapReg(m, &i->Ain.Alu64R.dst);
1650 return;
1651 case Ain_Alu64M:
1652 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1653 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1654 return;
1655 case Ain_Sh64:
sewardj501a3392005-05-11 15:37:50 +00001656 mapReg(m, &i->Ain.Sh64.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001657 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001658 case Ain_Test64:
sewardj501a3392005-05-11 15:37:50 +00001659 mapReg(m, &i->Ain.Test64.dst);
sewardj05b3b6a2005-02-04 01:44:33 +00001660 return;
sewardjd0a12df2005-02-10 02:07:43 +00001661 case Ain_Unary64:
sewardj501a3392005-05-11 15:37:50 +00001662 mapReg(m, &i->Ain.Unary64.dst);
sewardjd0a12df2005-02-10 02:07:43 +00001663 return;
sewardj6ce1a232007-03-31 19:12:38 +00001664 case Ain_Lea64:
1665 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1666 mapReg(m, &i->Ain.Lea64.dst);
1667 return;
sewardj9cc2bbf2011-06-05 17:56:03 +00001668 case Ain_Alu32R:
1669 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1670 mapReg(m, &i->Ain.Alu32R.dst);
1671 return;
sewardj9b967672005-02-08 11:13:09 +00001672 case Ain_MulL:
1673 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1674 return;
sewardj7de0d3c2005-02-13 02:26:41 +00001675 case Ain_Div:
1676 mapRegs_AMD64RM(m, i->Ain.Div.src);
1677 return;
sewardj1001dc42005-02-21 08:25:55 +00001678 case Ain_Push:
1679 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1680 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001681 case Ain_Call:
1682 return;
sewardjc6f970f2012-04-02 21:54:49 +00001683 case Ain_XDirect:
1684 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1685 return;
1686 case Ain_XIndir:
1687 mapReg(m, &i->Ain.XIndir.dstGA);
1688 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1689 return;
1690 case Ain_XAssisted:
1691 mapReg(m, &i->Ain.XAssisted.dstGA);
1692 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
sewardjf67eadf2005-02-03 03:53:52 +00001693 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001694 case Ain_CMov64:
1695 mapRegs_AMD64RM(m, i->Ain.CMov64.src);
1696 mapReg(m, &i->Ain.CMov64.dst);
1697 return;
sewardjca257bc2010-09-08 08:34:52 +00001698 case Ain_MovxLQ:
1699 mapReg(m, &i->Ain.MovxLQ.src);
1700 mapReg(m, &i->Ain.MovxLQ.dst);
sewardjf67eadf2005-02-03 03:53:52 +00001701 return;
1702 case Ain_LoadEX:
1703 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1704 mapReg(m, &i->Ain.LoadEX.dst);
1705 return;
sewardj05b3b6a2005-02-04 01:44:33 +00001706 case Ain_Store:
1707 mapReg(m, &i->Ain.Store.src);
1708 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1709 return;
sewardja5bd0af2005-03-24 20:40:12 +00001710 case Ain_Set64:
1711 mapReg(m, &i->Ain.Set64.dst);
1712 return;
sewardjf53b7352005-04-06 20:01:56 +00001713 case Ain_Bsfr64:
1714 mapReg(m, &i->Ain.Bsfr64.src);
1715 mapReg(m, &i->Ain.Bsfr64.dst);
1716 return;
sewardjd0a12df2005-02-10 02:07:43 +00001717 case Ain_MFence:
1718 return;
sewardje9d8a262009-07-01 08:06:34 +00001719 case Ain_ACAS:
1720 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1721 return;
1722 case Ain_DACAS:
1723 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1724 return;
sewardj25a85812005-05-08 23:03:48 +00001725 case Ain_A87Free:
1726 return;
1727 case Ain_A87PushPop:
1728 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1729 return;
1730 case Ain_A87FpOp:
1731 return;
1732 case Ain_A87LdCW:
1733 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1734 return;
sewardjf4c803b2006-09-11 11:07:34 +00001735 case Ain_A87StSW:
1736 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1737 return;
sewardj1a01e652005-02-23 11:39:21 +00001738 case Ain_LdMXCSR:
1739 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1740 return;
sewardj18303862005-02-21 12:36:54 +00001741 case Ain_SseUComIS:
1742 mapReg(m, &i->Ain.SseUComIS.srcL);
1743 mapReg(m, &i->Ain.SseUComIS.srcR);
1744 mapReg(m, &i->Ain.SseUComIS.dst);
1745 return;
sewardj1a01e652005-02-23 11:39:21 +00001746 case Ain_SseSI2SF:
1747 mapReg(m, &i->Ain.SseSI2SF.src);
1748 mapReg(m, &i->Ain.SseSI2SF.dst);
1749 return;
1750 case Ain_SseSF2SI:
1751 mapReg(m, &i->Ain.SseSF2SI.src);
1752 mapReg(m, &i->Ain.SseSF2SI.dst);
1753 return;
sewardj8d965312005-02-25 02:48:47 +00001754 case Ain_SseSDSS:
1755 mapReg(m, &i->Ain.SseSDSS.src);
1756 mapReg(m, &i->Ain.SseSDSS.dst);
1757 return;
sewardj1001dc42005-02-21 08:25:55 +00001758 case Ain_SseLdSt:
1759 mapReg(m, &i->Ain.SseLdSt.reg);
1760 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1761 break;
1762 case Ain_SseLdzLO:
1763 mapReg(m, &i->Ain.SseLdzLO.reg);
1764 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1765 break;
sewardj8d965312005-02-25 02:48:47 +00001766 case Ain_Sse32Fx4:
1767 mapReg(m, &i->Ain.Sse32Fx4.src);
1768 mapReg(m, &i->Ain.Sse32Fx4.dst);
1769 return;
1770 case Ain_Sse32FLo:
1771 mapReg(m, &i->Ain.Sse32FLo.src);
1772 mapReg(m, &i->Ain.Sse32FLo.dst);
1773 return;
sewardj4c328cf2005-05-05 12:05:54 +00001774 case Ain_Sse64Fx2:
1775 mapReg(m, &i->Ain.Sse64Fx2.src);
1776 mapReg(m, &i->Ain.Sse64Fx2.dst);
1777 return;
sewardj1001dc42005-02-21 08:25:55 +00001778 case Ain_Sse64FLo:
1779 mapReg(m, &i->Ain.Sse64FLo.src);
1780 mapReg(m, &i->Ain.Sse64FLo.dst);
1781 return;
1782 case Ain_SseReRg:
1783 mapReg(m, &i->Ain.SseReRg.src);
1784 mapReg(m, &i->Ain.SseReRg.dst);
1785 return;
sewardj8d965312005-02-25 02:48:47 +00001786 case Ain_SseCMov:
1787 mapReg(m, &i->Ain.SseCMov.src);
1788 mapReg(m, &i->Ain.SseCMov.dst);
1789 return;
sewardj09717342005-05-05 21:34:02 +00001790 case Ain_SseShuf:
1791 mapReg(m, &i->Ain.SseShuf.src);
1792 mapReg(m, &i->Ain.SseShuf.dst);
1793 return;
sewardj3616a2e2012-05-27 16:18:13 +00001794 //uu case Ain_AvxLdSt:
1795 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1796 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1797 //uu break;
1798 //uu case Ain_AvxReRg:
1799 //uu mapReg(m, &i->Ain.AvxReRg.src);
1800 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1801 //uu return;
sewardjc6f970f2012-04-02 21:54:49 +00001802 case Ain_EvCheck:
1803 /* We expect both amodes only to mention %rbp, so this is in
1804 fact pointless, since %rbp isn't allocatable, but anyway.. */
1805 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1806 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1807 return;
1808 case Ain_ProfInc:
1809 /* hardwires r11 -- nothing to modify. */
1810 return;
sewardjc33671d2005-02-01 20:30:00 +00001811 default:
cerion92b64362005-12-13 12:02:26 +00001812 ppAMD64Instr(i, mode64);
sewardjc33671d2005-02-01 20:30:00 +00001813 vpanic("mapRegs_AMD64Instr");
1814 }
1815}
1816
1817/* Figure out if i represents a reg-reg move, and if so assign the
1818 source and destination to *src and *dst. If in doubt say No. Used
1819 by the register allocator to do move coalescing.
1820*/
floriand8c64e02014-10-08 08:54:44 +00001821Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
sewardjc33671d2005-02-01 20:30:00 +00001822{
sewardjc4530ae2012-05-21 10:18:49 +00001823 switch (i->tag) {
1824 case Ain_Alu64R:
1825 /* Moves between integer regs */
1826 if (i->Ain.Alu64R.op != Aalu_MOV)
1827 return False;
1828 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1829 return False;
1830 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1831 *dst = i->Ain.Alu64R.dst;
1832 return True;
1833 case Ain_SseReRg:
1834 /* Moves between SSE regs */
1835 if (i->Ain.SseReRg.op != Asse_MOV)
1836 return False;
1837 *src = i->Ain.SseReRg.src;
1838 *dst = i->Ain.SseReRg.dst;
1839 return True;
sewardj3616a2e2012-05-27 16:18:13 +00001840 //uu case Ain_AvxReRg:
1841 //uu /* Moves between AVX regs */
1842 //uu if (i->Ain.AvxReRg.op != Asse_MOV)
1843 //uu return False;
1844 //uu *src = i->Ain.AvxReRg.src;
1845 //uu *dst = i->Ain.AvxReRg.dst;
1846 //uu return True;
sewardjc4530ae2012-05-21 10:18:49 +00001847 default:
sewardjf67eadf2005-02-03 03:53:52 +00001848 return False;
sewardjf67eadf2005-02-03 03:53:52 +00001849 }
sewardjc4530ae2012-05-21 10:18:49 +00001850 /*NOTREACHED*/
sewardjc33671d2005-02-01 20:30:00 +00001851}
1852
1853
1854/* Generate amd64 spill/reload instructions under the direction of the
1855 register allocator. Note it's critical these don't write the
1856 condition codes. */
1857
sewardj2a1ed8e2009-12-31 19:26:03 +00001858void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1859 HReg rreg, Int offsetB, Bool mode64 )
sewardjd0a12df2005-02-10 02:07:43 +00001860{
1861 AMD64AMode* am;
1862 vassert(offsetB >= 0);
1863 vassert(!hregIsVirtual(rreg));
cerion92b64362005-12-13 12:02:26 +00001864 vassert(mode64 == True);
sewardj2a1ed8e2009-12-31 19:26:03 +00001865 *i1 = *i2 = NULL;
sewardjd0a12df2005-02-10 02:07:43 +00001866 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
sewardjd0a12df2005-02-10 02:07:43 +00001867 switch (hregClass(rreg)) {
1868 case HRcInt64:
sewardj2a1ed8e2009-12-31 19:26:03 +00001869 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1870 return;
sewardj1001dc42005-02-21 08:25:55 +00001871 case HRcVec128:
sewardj2a1ed8e2009-12-31 19:26:03 +00001872 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1873 return;
sewardjd0a12df2005-02-10 02:07:43 +00001874 default:
1875 ppHRegClass(hregClass(rreg));
1876 vpanic("genSpill_AMD64: unimplemented regclass");
1877 }
sewardjc33671d2005-02-01 20:30:00 +00001878}
1879
sewardj2a1ed8e2009-12-31 19:26:03 +00001880void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1881 HReg rreg, Int offsetB, Bool mode64 )
sewardjd0a12df2005-02-10 02:07:43 +00001882{
1883 AMD64AMode* am;
1884 vassert(offsetB >= 0);
1885 vassert(!hregIsVirtual(rreg));
cerion92b64362005-12-13 12:02:26 +00001886 vassert(mode64 == True);
sewardj2a1ed8e2009-12-31 19:26:03 +00001887 *i1 = *i2 = NULL;
sewardjd0a12df2005-02-10 02:07:43 +00001888 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1889 switch (hregClass(rreg)) {
1890 case HRcInt64:
sewardj2a1ed8e2009-12-31 19:26:03 +00001891 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1892 return;
sewardj1001dc42005-02-21 08:25:55 +00001893 case HRcVec128:
sewardj2a1ed8e2009-12-31 19:26:03 +00001894 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1895 return;
sewardjd0a12df2005-02-10 02:07:43 +00001896 default:
1897 ppHRegClass(hregClass(rreg));
1898 vpanic("genReload_AMD64: unimplemented regclass");
1899 }
sewardjc33671d2005-02-01 20:30:00 +00001900}
1901
1902
sewardj813ce9e2005-02-04 21:16:48 +00001903/* --------- The amd64 assembler (bleh.) --------- */
1904
1905/* Produce the low three bits of an integer register number. */
sewardjdc2ca892005-04-07 02:01:23 +00001906static UChar iregBits210 ( HReg r )
sewardj813ce9e2005-02-04 21:16:48 +00001907{
1908 UInt n;
1909 vassert(hregClass(r) == HRcInt64);
1910 vassert(!hregIsVirtual(r));
1911 n = hregNumber(r);
1912 vassert(n <= 15);
sewardj03ccf852005-03-21 02:47:42 +00001913 return toUChar(n & 7);
sewardj813ce9e2005-02-04 21:16:48 +00001914}
1915
1916/* Produce bit 3 of an integer register number. */
sewardj03ccf852005-03-21 02:47:42 +00001917static UChar iregBit3 ( HReg r )
sewardj813ce9e2005-02-04 21:16:48 +00001918{
1919 UInt n;
1920 vassert(hregClass(r) == HRcInt64);
1921 vassert(!hregIsVirtual(r));
1922 n = hregNumber(r);
1923 vassert(n <= 15);
sewardj03ccf852005-03-21 02:47:42 +00001924 return toUChar((n >> 3) & 1);
sewardj813ce9e2005-02-04 21:16:48 +00001925}
1926
sewardjdc2ca892005-04-07 02:01:23 +00001927/* Produce a complete 4-bit integer register number. */
1928static UChar iregBits3210 ( HReg r )
1929{
1930 UInt n;
1931 vassert(hregClass(r) == HRcInt64);
1932 vassert(!hregIsVirtual(r));
1933 n = hregNumber(r);
1934 vassert(n <= 15);
1935 return toUChar(n);
1936}
1937
sewardj1001dc42005-02-21 08:25:55 +00001938/* Given an xmm (128bit V-class) register number, produce the
1939 equivalent numbered register in 64-bit I-class. This is a bit of
1940 fakery which facilitates using functions that work on integer
1941 register numbers to be used when assembling SSE instructions
1942 too. */
florianbf3bea62013-01-24 04:59:49 +00001943static HReg vreg2ireg ( HReg r )
sewardj1001dc42005-02-21 08:25:55 +00001944{
1945 UInt n;
1946 vassert(hregClass(r) == HRcVec128);
1947 vassert(!hregIsVirtual(r));
1948 n = hregNumber(r);
1949 vassert(n <= 15);
1950 return mkHReg(n, HRcInt64, False);
1951}
sewardj813ce9e2005-02-04 21:16:48 +00001952
sewardj3616a2e2012-05-27 16:18:13 +00001953//uu /* Ditto for ymm regs. */
florianbf3bea62013-01-24 04:59:49 +00001954//uu static HReg dvreg2ireg ( HReg r )
sewardj3616a2e2012-05-27 16:18:13 +00001955//uu {
1956//uu UInt n;
1957//uu vassert(hregClass(r) == HRcVec256);
1958//uu vassert(!hregIsVirtual(r));
1959//uu n = hregNumber(r);
1960//uu vassert(n <= 15);
1961//uu return mkHReg(n, HRcInt64, False);
1962//uu }
sewardjc4530ae2012-05-21 10:18:49 +00001963
sewardjb5e7ced2013-01-24 08:55:25 +00001964static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
sewardj813ce9e2005-02-04 21:16:48 +00001965{
sewardjb5e7ced2013-01-24 08:55:25 +00001966 vassert(mod < 4);
1967 vassert((reg|regmem) < 8);
sewardj03ccf852005-03-21 02:47:42 +00001968 return toUChar( ((mod & 3) << 6)
1969 | ((reg & 7) << 3)
1970 | (regmem & 7) );
sewardj813ce9e2005-02-04 21:16:48 +00001971}
1972
sewardjb5e7ced2013-01-24 08:55:25 +00001973static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
sewardj813ce9e2005-02-04 21:16:48 +00001974{
sewardjb5e7ced2013-01-24 08:55:25 +00001975 vassert(shift < 4);
1976 vassert((regindex|regbase) < 8);
sewardj03ccf852005-03-21 02:47:42 +00001977 return toUChar( ((shift & 3) << 6)
1978 | ((regindex & 7) << 3)
1979 | (regbase & 7) );
sewardj813ce9e2005-02-04 21:16:48 +00001980}
1981
1982static UChar* emit32 ( UChar* p, UInt w32 )
1983{
sewardj03ccf852005-03-21 02:47:42 +00001984 *p++ = toUChar((w32) & 0x000000FF);
1985 *p++ = toUChar((w32 >> 8) & 0x000000FF);
1986 *p++ = toUChar((w32 >> 16) & 0x000000FF);
1987 *p++ = toUChar((w32 >> 24) & 0x000000FF);
sewardj813ce9e2005-02-04 21:16:48 +00001988 return p;
1989}
1990
sewardj1b8d58e2005-02-05 14:34:18 +00001991static UChar* emit64 ( UChar* p, ULong w64 )
1992{
sewardj03ccf852005-03-21 02:47:42 +00001993 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
1994 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
sewardj1b8d58e2005-02-05 14:34:18 +00001995 return p;
1996}
1997
sewardj813ce9e2005-02-04 21:16:48 +00001998/* Does a sign-extend of the lowest 8 bits give
1999 the original number? */
2000static Bool fits8bits ( UInt w32 )
2001{
2002 Int i32 = (Int)w32;
sewardj03ccf852005-03-21 02:47:42 +00002003 return toBool(i32 == ((i32 << 24) >> 24));
sewardj813ce9e2005-02-04 21:16:48 +00002004}
sewardj4d77a9c2007-08-25 23:21:08 +00002005/* Can the lower 32 bits be signedly widened to produce the whole
2006 64-bit value? In other words, are the top 33 bits either all 0 or
2007 all 1 ? */
2008static Bool fitsIn32Bits ( ULong x )
2009{
2010 Long y0 = (Long)x;
2011 Long y1 = y0;
2012 y1 <<= 32;
2013 y1 >>=/*s*/ 32;
2014 return toBool(x == y1);
2015}
sewardj813ce9e2005-02-04 21:16:48 +00002016
2017
2018/* Forming mod-reg-rm bytes and scale-index-base bytes.
2019
sewardje95b04a2005-02-07 17:47:21 +00002020 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
sewardj813ce9e2005-02-04 21:16:48 +00002021 = 00 greg ereg
2022
sewardje95b04a2005-02-07 17:47:21 +00002023 greg, d8(ereg) | ereg is neither of: RSP R12
sewardj813ce9e2005-02-04 21:16:48 +00002024 = 01 greg ereg, d8
2025
sewardje95b04a2005-02-07 17:47:21 +00002026 greg, d32(ereg) | ereg is neither of: RSP R12
sewardj813ce9e2005-02-04 21:16:48 +00002027 = 10 greg ereg, d32
2028
sewardje95b04a2005-02-07 17:47:21 +00002029 greg, d8(ereg) | ereg is either: RSP R12
2030 = 01 greg 100, 0x24, d8
2031 (lowest bit of rex distinguishes R12/RSP)
2032
sewardj7de0d3c2005-02-13 02:26:41 +00002033 greg, d32(ereg) | ereg is either: RSP R12
2034 = 10 greg 100, 0x24, d32
2035 (lowest bit of rex distinguishes R12/RSP)
sewardj813ce9e2005-02-04 21:16:48 +00002036
2037 -----------------------------------------------
2038
2039 greg, d8(base,index,scale)
2040 | index != RSP
2041 = 01 greg 100, scale index base, d8
2042
2043 greg, d32(base,index,scale)
2044 | index != RSP
2045 = 10 greg 100, scale index base, d32
2046*/
2047static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2048{
2049 if (am->tag == Aam_IR) {
2050 if (am->Aam.IR.imm == 0
florian79efdc62013-02-11 00:47:35 +00002051 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2052 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2053 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2054 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
sewardje95b04a2005-02-07 17:47:21 +00002055 ) {
sewardjdc2ca892005-04-07 02:01:23 +00002056 *p++ = mkModRegRM(0, iregBits210(greg),
2057 iregBits210(am->Aam.IR.reg));
sewardj813ce9e2005-02-04 21:16:48 +00002058 return p;
2059 }
2060 if (fits8bits(am->Aam.IR.imm)
florian79efdc62013-02-11 00:47:35 +00002061 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2062 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
sewardje95b04a2005-02-07 17:47:21 +00002063 ) {
sewardjdc2ca892005-04-07 02:01:23 +00002064 *p++ = mkModRegRM(1, iregBits210(greg),
2065 iregBits210(am->Aam.IR.reg));
sewardj03ccf852005-03-21 02:47:42 +00002066 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
sewardj813ce9e2005-02-04 21:16:48 +00002067 return p;
2068 }
florian79efdc62013-02-11 00:47:35 +00002069 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2070 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
sewardje95b04a2005-02-07 17:47:21 +00002071 ) {
sewardjdc2ca892005-04-07 02:01:23 +00002072 *p++ = mkModRegRM(2, iregBits210(greg),
2073 iregBits210(am->Aam.IR.reg));
sewardj813ce9e2005-02-04 21:16:48 +00002074 p = emit32(p, am->Aam.IR.imm);
2075 return p;
2076 }
florian79efdc62013-02-11 00:47:35 +00002077 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2078 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
sewardj813ce9e2005-02-04 21:16:48 +00002079 && fits8bits(am->Aam.IR.imm)) {
sewardjdc2ca892005-04-07 02:01:23 +00002080 *p++ = mkModRegRM(1, iregBits210(greg), 4);
sewardj813ce9e2005-02-04 21:16:48 +00002081 *p++ = 0x24;
sewardj03ccf852005-03-21 02:47:42 +00002082 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
sewardj813ce9e2005-02-04 21:16:48 +00002083 return p;
2084 }
florian79efdc62013-02-11 00:47:35 +00002085 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
sewardj7de0d3c2005-02-13 02:26:41 +00002086 || wait for test case for RSP case */
florian79efdc62013-02-11 00:47:35 +00002087 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
sewardjdc2ca892005-04-07 02:01:23 +00002088 *p++ = mkModRegRM(2, iregBits210(greg), 4);
sewardj7de0d3c2005-02-13 02:26:41 +00002089 *p++ = 0x24;
2090 p = emit32(p, am->Aam.IR.imm);
2091 return p;
2092 }
sewardj813ce9e2005-02-04 21:16:48 +00002093 ppAMD64AMode(am);
2094 vpanic("doAMode_M: can't emit amode IR");
2095 /*NOTREACHED*/
2096 }
2097 if (am->tag == Aam_IRRS) {
2098 if (fits8bits(am->Aam.IRRS.imm)
florian79efdc62013-02-11 00:47:35 +00002099 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
sewardjdc2ca892005-04-07 02:01:23 +00002100 *p++ = mkModRegRM(1, iregBits210(greg), 4);
sewardjb5e7ced2013-01-24 08:55:25 +00002101 *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index),
2102 iregBits210(am->Aam.IRRS.base));
sewardj03ccf852005-03-21 02:47:42 +00002103 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
sewardj813ce9e2005-02-04 21:16:48 +00002104 return p;
2105 }
florian79efdc62013-02-11 00:47:35 +00002106 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
sewardjdc2ca892005-04-07 02:01:23 +00002107 *p++ = mkModRegRM(2, iregBits210(greg), 4);
sewardjb5e7ced2013-01-24 08:55:25 +00002108 *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index),
2109 iregBits210(am->Aam.IRRS.base));
sewardj813ce9e2005-02-04 21:16:48 +00002110 p = emit32(p, am->Aam.IRRS.imm);
2111 return p;
2112 }
2113 ppAMD64AMode(am);
2114 vpanic("doAMode_M: can't emit amode IRRS");
2115 /*NOTREACHED*/
2116 }
2117 vpanic("doAMode_M: unknown amode");
2118 /*NOTREACHED*/
2119}
2120
2121
2122/* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2123static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2124{
sewardjdc2ca892005-04-07 02:01:23 +00002125 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
sewardj813ce9e2005-02-04 21:16:48 +00002126 return p;
2127}
2128
2129
sewardj549e0642005-02-05 12:00:14 +00002130/* Clear the W bit on a REX byte, thereby changing the operand size
2131 back to whatever that instruction's default operand size is. */
2132static inline UChar clearWBit ( UChar rex )
2133{
sewardj03ccf852005-03-21 02:47:42 +00002134 return toUChar(rex & ~(1<<3));
sewardj549e0642005-02-05 12:00:14 +00002135}
2136
2137
2138/* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2139static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
sewardj813ce9e2005-02-04 21:16:48 +00002140{
2141 if (am->tag == Aam_IR) {
2142 UChar W = 1; /* we want 64-bit mode */
sewardj549e0642005-02-05 12:00:14 +00002143 UChar R = iregBit3(greg);
sewardj813ce9e2005-02-04 21:16:48 +00002144 UChar X = 0; /* not relevant */
sewardj549e0642005-02-05 12:00:14 +00002145 UChar B = iregBit3(am->Aam.IR.reg);
sewardj03ccf852005-03-21 02:47:42 +00002146 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
sewardj813ce9e2005-02-04 21:16:48 +00002147 }
2148 if (am->tag == Aam_IRRS) {
2149 UChar W = 1; /* we want 64-bit mode */
sewardj549e0642005-02-05 12:00:14 +00002150 UChar R = iregBit3(greg);
2151 UChar X = iregBit3(am->Aam.IRRS.index);
2152 UChar B = iregBit3(am->Aam.IRRS.base);
sewardj03ccf852005-03-21 02:47:42 +00002153 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
sewardj813ce9e2005-02-04 21:16:48 +00002154 }
2155 vassert(0);
sewardj03ccf852005-03-21 02:47:42 +00002156 return 0; /*NOTREACHED*/
sewardj813ce9e2005-02-04 21:16:48 +00002157}
2158
sewardj549e0642005-02-05 12:00:14 +00002159/* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2160static UChar rexAMode_R ( HReg greg, HReg ereg )
2161{
2162 UChar W = 1; /* we want 64-bit mode */
2163 UChar R = iregBit3(greg);
2164 UChar X = 0; /* not relevant */
2165 UChar B = iregBit3(ereg);
sewardj03ccf852005-03-21 02:47:42 +00002166 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
sewardj549e0642005-02-05 12:00:14 +00002167}
2168
sewardj813ce9e2005-02-04 21:16:48 +00002169
sewardj3616a2e2012-05-27 16:18:13 +00002170//uu /* May 2012: this VEX prefix stuff is currently unused, but has
2171//uu verified correct (I reckon). Certainly it has been known to
2172//uu produce correct VEX prefixes during testing. */
2173//uu
2174//uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2175//uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2176//uu in verbatim. There's no range checking on the bits. */
2177//uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2178//uu UInt mmmmm, UInt rexW, UInt notVvvv,
2179//uu UInt L, UInt pp )
2180//uu {
2181//uu UChar byte0 = 0;
2182//uu UChar byte1 = 0;
2183//uu UChar byte2 = 0;
2184//uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2185//uu /* 2 byte encoding is possible. */
2186//uu byte0 = 0xC5;
2187//uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2188//uu | (L << 2) | pp;
2189//uu } else {
2190//uu /* 3 byte encoding is needed. */
2191//uu byte0 = 0xC4;
2192//uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2193//uu | ((rexB ^ 1) << 5) | mmmmm;
2194//uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2195//uu }
2196//uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2197//uu }
2198//uu
2199//uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2200//uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2201//uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2202//uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2203//uu vvvv=1111 (unused 3rd reg). */
2204//uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2205//uu {
2206//uu UChar L = 1; /* size = 256 */
2207//uu UChar pp = 0; /* no SIMD prefix */
2208//uu UChar mmmmm = 1; /* 0F */
2209//uu UChar notVvvv = 0; /* unused */
2210//uu UChar rexW = 0;
2211//uu UChar rexR = 0;
2212//uu UChar rexX = 0;
2213//uu UChar rexB = 0;
2214//uu /* Same logic as in rexAMode_M. */
2215//uu if (am->tag == Aam_IR) {
2216//uu rexR = iregBit3(greg);
2217//uu rexX = 0; /* not relevant */
2218//uu rexB = iregBit3(am->Aam.IR.reg);
2219//uu }
2220//uu else if (am->tag == Aam_IRRS) {
2221//uu rexR = iregBit3(greg);
2222//uu rexX = iregBit3(am->Aam.IRRS.index);
2223//uu rexB = iregBit3(am->Aam.IRRS.base);
2224//uu } else {
2225//uu vassert(0);
2226//uu }
2227//uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2228//uu }
2229//uu
2230//uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2231//uu {
2232//uu switch (vex & 0xFF) {
2233//uu case 0xC5:
2234//uu *p++ = 0xC5;
2235//uu *p++ = (vex >> 8) & 0xFF;
2236//uu vassert(0 == (vex >> 16));
2237//uu break;
2238//uu case 0xC4:
2239//uu *p++ = 0xC4;
2240//uu *p++ = (vex >> 8) & 0xFF;
2241//uu *p++ = (vex >> 16) & 0xFF;
2242//uu vassert(0 == (vex >> 24));
2243//uu break;
2244//uu default:
2245//uu vassert(0);
2246//uu }
2247//uu return p;
2248//uu }
sewardjc4530ae2012-05-21 10:18:49 +00002249
2250
sewardj25a85812005-05-08 23:03:48 +00002251/* Emit ffree %st(N) */
2252static UChar* do_ffree_st ( UChar* p, Int n )
2253{
2254 vassert(n >= 0 && n <= 7);
2255 *p++ = 0xDD;
2256 *p++ = toUChar(0xC0 + n);
2257 return p;
2258}
2259
sewardjc33671d2005-02-01 20:30:00 +00002260/* Emit an instruction into buf and return the number of bytes used.
2261 Note that buf is not the insn's final place, and therefore it is
sewardjc6f970f2012-04-02 21:54:49 +00002262 imperative to emit position-independent code. If the emitted
2263 instruction was a profiler inc, set *is_profInc to True, else
2264 leave it unchanged. */
sewardjc33671d2005-02-01 20:30:00 +00002265
sewardjc6f970f2012-04-02 21:54:49 +00002266Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
floriand8c64e02014-10-08 08:54:44 +00002267 UChar* buf, Int nbuf, const AMD64Instr* i,
sewardj9b769162014-07-24 12:42:03 +00002268 Bool mode64, VexEndness endness_host,
florian8462d112014-09-24 15:18:09 +00002269 const void* disp_cp_chain_me_to_slowEP,
2270 const void* disp_cp_chain_me_to_fastEP,
2271 const void* disp_cp_xindir,
2272 const void* disp_cp_xassisted )
sewardjc33671d2005-02-01 20:30:00 +00002273{
sewardjc2bcb6f2005-02-07 00:17:12 +00002274 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
sewardj1001dc42005-02-21 08:25:55 +00002275 UInt xtra;
sewardja5bd0af2005-03-24 20:40:12 +00002276 UInt reg;
sewardj1001dc42005-02-21 08:25:55 +00002277 UChar rex;
sewardjc33671d2005-02-01 20:30:00 +00002278 UChar* p = &buf[0];
sewardj549e0642005-02-05 12:00:14 +00002279 UChar* ptmp;
sewardj25a85812005-05-08 23:03:48 +00002280 Int j;
sewardj549e0642005-02-05 12:00:14 +00002281 vassert(nbuf >= 32);
cerion92b64362005-12-13 12:02:26 +00002282 vassert(mode64 == True);
sewardj549e0642005-02-05 12:00:14 +00002283
2284 /* Wrap an integer as a int register, for use assembling
2285 GrpN insns, in which the greg field is used as a sub-opcode
2286 and does not really contain a register. */
2287# define fake(_n) mkHReg((_n), HRcInt64, False)
2288
cerion92b64362005-12-13 12:02:26 +00002289 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
sewardjc33671d2005-02-01 20:30:00 +00002290
2291 switch (i->tag) {
2292
sewardj1b8d58e2005-02-05 14:34:18 +00002293 case Ain_Imm64:
sewardj7cf5bd02011-03-22 16:51:38 +00002294 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2295 /* Use the short form (load into 32 bit reg, + default
2296 widening rule) for constants under 1 million. We could
2297 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2298 limit it to a smaller range for verifiability purposes. */
2299 if (1 & iregBit3(i->Ain.Imm64.dst))
2300 *p++ = 0x41;
2301 *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
2302 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2303 } else {
2304 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
2305 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
2306 p = emit64(p, i->Ain.Imm64.imm64);
2307 }
sewardj1b8d58e2005-02-05 14:34:18 +00002308 goto done;
2309
sewardj813ce9e2005-02-04 21:16:48 +00002310 case Ain_Alu64R:
2311 /* Deal specially with MOV */
2312 if (i->Ain.Alu64R.op == Aalu_MOV) {
2313 switch (i->Ain.Alu64R.src->tag) {
2314 case Armi_Imm:
sewardj7cf5bd02011-03-22 16:51:38 +00002315 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
sewardj95e154c2009-11-22 23:43:17 +00002316 /* Actually we could use this form for constants in
2317 the range 0 through 0x7FFFFFFF inclusive, but
2318 limit it to a small range for verifiability
2319 purposes. */
2320 /* Generate "movl $imm32, 32-bit-register" and let
2321 the default zero-extend rule cause the upper half
2322 of the dst to be zeroed out too. This saves 1
2323 and sometimes 2 bytes compared to the more
2324 obvious encoding in the 'else' branch. */
2325 if (1 & iregBit3(i->Ain.Alu64R.dst))
2326 *p++ = 0x41;
2327 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
2328 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2329 } else {
2330 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
2331 *p++ = 0xC7;
2332 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
2333 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2334 }
sewardj813ce9e2005-02-04 21:16:48 +00002335 goto done;
2336 case Armi_Reg:
sewardj1b8d58e2005-02-05 14:34:18 +00002337 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2338 i->Ain.Alu64R.dst );
sewardj813ce9e2005-02-04 21:16:48 +00002339 *p++ = 0x89;
2340 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2341 i->Ain.Alu64R.dst);
2342 goto done;
2343 case Armi_Mem:
sewardj549e0642005-02-05 12:00:14 +00002344 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
sewardj813ce9e2005-02-04 21:16:48 +00002345 i->Ain.Alu64R.src->Armi.Mem.am);
2346 *p++ = 0x8B;
2347 p = doAMode_M(p, i->Ain.Alu64R.dst,
2348 i->Ain.Alu64R.src->Armi.Mem.am);
2349 goto done;
2350 default:
2351 goto bad;
2352 }
2353 }
sewardjd0a12df2005-02-10 02:07:43 +00002354 /* MUL */
2355 if (i->Ain.Alu64R.op == Aalu_MUL) {
2356 switch (i->Ain.Alu64R.src->tag) {
sewardj7de0d3c2005-02-13 02:26:41 +00002357 case Armi_Reg:
2358 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2359 i->Ain.Alu64R.src->Armi.Reg.reg);
2360 *p++ = 0x0F;
2361 *p++ = 0xAF;
2362 p = doAMode_R(p, i->Ain.Alu64R.dst,
2363 i->Ain.Alu64R.src->Armi.Reg.reg);
2364 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00002365 case Armi_Mem:
2366 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2367 i->Ain.Alu64R.src->Armi.Mem.am);
2368 *p++ = 0x0F;
2369 *p++ = 0xAF;
2370 p = doAMode_M(p, i->Ain.Alu64R.dst,
2371 i->Ain.Alu64R.src->Armi.Mem.am);
2372 goto done;
sewardj7de0d3c2005-02-13 02:26:41 +00002373 case Armi_Imm:
2374 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2375 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2376 *p++ = 0x6B;
2377 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
sewardj03ccf852005-03-21 02:47:42 +00002378 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
sewardj7de0d3c2005-02-13 02:26:41 +00002379 } else {
2380 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2381 *p++ = 0x69;
2382 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2383 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2384 }
2385 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00002386 default:
2387 goto bad;
2388 }
2389 }
sewardj549e0642005-02-05 12:00:14 +00002390 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2391 opc = opc_rr = subopc_imm = opc_imma = 0;
2392 switch (i->Ain.Alu64R.op) {
2393 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2394 subopc_imm = 2; opc_imma = 0x15; break;
2395 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2396 subopc_imm = 0; opc_imma = 0x05; break;
2397 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2398 subopc_imm = 5; opc_imma = 0x2D; break;
2399 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2400 subopc_imm = 3; opc_imma = 0x1D; break;
2401 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2402 subopc_imm = 4; opc_imma = 0x25; break;
2403 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2404 subopc_imm = 6; opc_imma = 0x35; break;
2405 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2406 subopc_imm = 1; opc_imma = 0x0D; break;
2407 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2408 subopc_imm = 7; opc_imma = 0x3D; break;
2409 default: goto bad;
2410 }
2411 switch (i->Ain.Alu64R.src->tag) {
2412 case Armi_Imm:
florian79efdc62013-02-11 00:47:35 +00002413 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
sewardj549e0642005-02-05 12:00:14 +00002414 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
sewardj18303862005-02-21 12:36:54 +00002415 goto bad; /* FIXME: awaiting test case */
sewardj03ccf852005-03-21 02:47:42 +00002416 *p++ = toUChar(opc_imma);
sewardj549e0642005-02-05 12:00:14 +00002417 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2418 } else
2419 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2420 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
2421 *p++ = 0x83;
2422 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
sewardj03ccf852005-03-21 02:47:42 +00002423 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
sewardj549e0642005-02-05 12:00:14 +00002424 } else {
sewardj1b8d58e2005-02-05 14:34:18 +00002425 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
sewardj549e0642005-02-05 12:00:14 +00002426 *p++ = 0x81;
2427 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2428 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2429 }
2430 goto done;
2431 case Armi_Reg:
2432 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2433 i->Ain.Alu64R.dst);
sewardj03ccf852005-03-21 02:47:42 +00002434 *p++ = toUChar(opc_rr);
sewardj549e0642005-02-05 12:00:14 +00002435 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2436 i->Ain.Alu64R.dst);
2437 goto done;
2438 case Armi_Mem:
sewardj31191072005-02-05 18:24:47 +00002439 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2440 i->Ain.Alu64R.src->Armi.Mem.am);
sewardj03ccf852005-03-21 02:47:42 +00002441 *p++ = toUChar(opc);
sewardj549e0642005-02-05 12:00:14 +00002442 p = doAMode_M(p, i->Ain.Alu64R.dst,
2443 i->Ain.Alu64R.src->Armi.Mem.am);
2444 goto done;
2445 default:
2446 goto bad;
2447 }
sewardj813ce9e2005-02-04 21:16:48 +00002448 break;
2449
sewardj549e0642005-02-05 12:00:14 +00002450 case Ain_Alu64M:
2451 /* Deal specially with MOV */
2452 if (i->Ain.Alu64M.op == Aalu_MOV) {
2453 switch (i->Ain.Alu64M.src->tag) {
2454 case Ari_Reg:
2455 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2456 i->Ain.Alu64M.dst);
2457 *p++ = 0x89;
2458 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2459 i->Ain.Alu64M.dst);
2460 goto done;
2461 case Ari_Imm:
2462 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
2463 *p++ = 0xC7;
2464 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
2465 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2466 goto done;
2467 default:
2468 goto bad;
2469 }
2470 }
sewardj549e0642005-02-05 12:00:14 +00002471 break;
2472
sewardj1b8d58e2005-02-05 14:34:18 +00002473 case Ain_Sh64:
2474 opc_cl = opc_imm = subopc = 0;
2475 switch (i->Ain.Sh64.op) {
2476 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2477 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2478 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2479 default: goto bad;
2480 }
2481 if (i->Ain.Sh64.src == 0) {
sewardj501a3392005-05-11 15:37:50 +00002482 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
sewardj03ccf852005-03-21 02:47:42 +00002483 *p++ = toUChar(opc_cl);
sewardj501a3392005-05-11 15:37:50 +00002484 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2485 goto done;
sewardj1b8d58e2005-02-05 14:34:18 +00002486 } else {
sewardj501a3392005-05-11 15:37:50 +00002487 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
sewardj03ccf852005-03-21 02:47:42 +00002488 *p++ = toUChar(opc_imm);
sewardj501a3392005-05-11 15:37:50 +00002489 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2490 *p++ = (UChar)(i->Ain.Sh64.src);
sewardj1b8d58e2005-02-05 14:34:18 +00002491 goto done;
2492 }
2493 break;
2494
sewardj501a3392005-05-11 15:37:50 +00002495 case Ain_Test64:
2496 /* testq sign-extend($imm32), %reg */
2497 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
2498 *p++ = 0xF7;
2499 p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
2500 p = emit32(p, i->Ain.Test64.imm32);
2501 goto done;
2502
sewardjd0a12df2005-02-10 02:07:43 +00002503 case Ain_Unary64:
2504 if (i->Ain.Unary64.op == Aun_NOT) {
sewardj501a3392005-05-11 15:37:50 +00002505 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2506 *p++ = 0xF7;
2507 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
2508 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00002509 }
sewardjb5220772005-04-27 11:53:23 +00002510 if (i->Ain.Unary64.op == Aun_NEG) {
sewardj501a3392005-05-11 15:37:50 +00002511 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2512 *p++ = 0xF7;
2513 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
2514 goto done;
sewardjb5220772005-04-27 11:53:23 +00002515 }
sewardjd0a12df2005-02-10 02:07:43 +00002516 break;
sewardj9b967672005-02-08 11:13:09 +00002517
sewardj6ce1a232007-03-31 19:12:38 +00002518 case Ain_Lea64:
2519 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2520 *p++ = 0x8D;
2521 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2522 goto done;
2523
sewardj9cc2bbf2011-06-05 17:56:03 +00002524 case Ain_Alu32R:
2525 /* ADD/SUB/AND/OR/XOR/CMP */
2526 opc = opc_rr = subopc_imm = opc_imma = 0;
2527 switch (i->Ain.Alu32R.op) {
2528 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2529 subopc_imm = 0; opc_imma = 0x05; break;
2530 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2531 subopc_imm = 5; opc_imma = 0x2D; break;
2532 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2533 subopc_imm = 4; opc_imma = 0x25; break;
2534 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2535 subopc_imm = 6; opc_imma = 0x35; break;
2536 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2537 subopc_imm = 1; opc_imma = 0x0D; break;
2538 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2539 subopc_imm = 7; opc_imma = 0x3D; break;
2540 default: goto bad;
2541 }
2542 switch (i->Ain.Alu32R.src->tag) {
2543 case Armi_Imm:
florian79efdc62013-02-11 00:47:35 +00002544 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
sewardj9cc2bbf2011-06-05 17:56:03 +00002545 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2546 goto bad; /* FIXME: awaiting test case */
2547 *p++ = toUChar(opc_imma);
2548 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2549 } else
2550 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2551 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) );
2552 if (rex != 0x40) *p++ = rex;
2553 *p++ = 0x83;
2554 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
2555 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2556 } else {
2557 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) );
2558 if (rex != 0x40) *p++ = rex;
2559 *p++ = 0x81;
2560 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
2561 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2562 }
2563 goto done;
2564 case Armi_Reg:
2565 rex = clearWBit(
2566 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2567 i->Ain.Alu32R.dst) );
2568 if (rex != 0x40) *p++ = rex;
2569 *p++ = toUChar(opc_rr);
2570 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2571 i->Ain.Alu32R.dst);
2572 goto done;
2573 case Armi_Mem:
2574 rex = clearWBit(
2575 rexAMode_M( i->Ain.Alu32R.dst,
2576 i->Ain.Alu32R.src->Armi.Mem.am) );
2577 if (rex != 0x40) *p++ = rex;
2578 *p++ = toUChar(opc);
2579 p = doAMode_M(p, i->Ain.Alu32R.dst,
2580 i->Ain.Alu32R.src->Armi.Mem.am);
2581 goto done;
2582 default:
2583 goto bad;
2584 }
2585 break;
2586
sewardj9b967672005-02-08 11:13:09 +00002587 case Ain_MulL:
2588 subopc = i->Ain.MulL.syned ? 5 : 4;
sewardj501a3392005-05-11 15:37:50 +00002589 switch (i->Ain.MulL.src->tag) {
2590 case Arm_Mem:
2591 *p++ = rexAMode_M( fake(0),
2592 i->Ain.MulL.src->Arm.Mem.am);
2593 *p++ = 0xF7;
2594 p = doAMode_M(p, fake(subopc),
2595 i->Ain.MulL.src->Arm.Mem.am);
2596 goto done;
2597 case Arm_Reg:
2598 *p++ = rexAMode_R(fake(0),
2599 i->Ain.MulL.src->Arm.Reg.reg);
2600 *p++ = 0xF7;
2601 p = doAMode_R(p, fake(subopc),
2602 i->Ain.MulL.src->Arm.Reg.reg);
2603 goto done;
2604 default:
2605 goto bad;
sewardj9b967672005-02-08 11:13:09 +00002606 }
2607 break;
2608
sewardj7de0d3c2005-02-13 02:26:41 +00002609 case Ain_Div:
2610 subopc = i->Ain.Div.syned ? 7 : 6;
2611 if (i->Ain.Div.sz == 4) {
2612 switch (i->Ain.Div.src->tag) {
2613 case Arm_Mem:
sewardja6b93d12005-02-17 09:28:28 +00002614 goto bad;
2615 /*FIXME*/
sewardj7de0d3c2005-02-13 02:26:41 +00002616 *p++ = 0xF7;
2617 p = doAMode_M(p, fake(subopc),
2618 i->Ain.Div.src->Arm.Mem.am);
2619 goto done;
2620 case Arm_Reg:
2621 *p++ = clearWBit(
2622 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
2623 *p++ = 0xF7;
2624 p = doAMode_R(p, fake(subopc),
2625 i->Ain.Div.src->Arm.Reg.reg);
2626 goto done;
2627 default:
2628 goto bad;
2629 }
2630 }
sewardja6b93d12005-02-17 09:28:28 +00002631 if (i->Ain.Div.sz == 8) {
2632 switch (i->Ain.Div.src->tag) {
2633 case Arm_Mem:
sewardj85520e42005-02-19 15:22:38 +00002634 *p++ = rexAMode_M( fake(0),
2635 i->Ain.Div.src->Arm.Mem.am);
sewardja6b93d12005-02-17 09:28:28 +00002636 *p++ = 0xF7;
2637 p = doAMode_M(p, fake(subopc),
2638 i->Ain.Div.src->Arm.Mem.am);
2639 goto done;
2640 case Arm_Reg:
sewardj85520e42005-02-19 15:22:38 +00002641 *p++ = rexAMode_R( fake(0),
2642 i->Ain.Div.src->Arm.Reg.reg);
sewardja6b93d12005-02-17 09:28:28 +00002643 *p++ = 0xF7;
2644 p = doAMode_R(p, fake(subopc),
2645 i->Ain.Div.src->Arm.Reg.reg);
2646 goto done;
2647 default:
2648 goto bad;
2649 }
2650 }
sewardj7de0d3c2005-02-13 02:26:41 +00002651 break;
2652
sewardj1001dc42005-02-21 08:25:55 +00002653 case Ain_Push:
2654 switch (i->Ain.Push.src->tag) {
2655 case Armi_Mem:
2656 *p++ = clearWBit(
2657 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
2658 *p++ = 0xFF;
2659 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
2660 goto done;
sewardj1a01e652005-02-23 11:39:21 +00002661 case Armi_Imm:
2662 *p++ = 0x68;
2663 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2664 goto done;
2665 case Armi_Reg:
sewardj03ccf852005-03-21 02:47:42 +00002666 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
sewardjdc2ca892005-04-07 02:01:23 +00002667 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
sewardj1a01e652005-02-23 11:39:21 +00002668 goto done;
sewardj1001dc42005-02-21 08:25:55 +00002669 default:
2670 goto bad;
2671 }
sewardj1b8d58e2005-02-05 14:34:18 +00002672
sewardj4d77a9c2007-08-25 23:21:08 +00002673 case Ain_Call: {
sewardj74142b82013-08-08 10:28:59 +00002674 if (i->Ain.Call.cond != Acc_ALWAYS
2675 && i->Ain.Call.rloc.pri != RLPri_None) {
sewardjcfe046e2013-01-17 14:23:53 +00002676 /* The call might not happen (it isn't unconditional) and it
2677 returns a result. In this case we will need to generate a
2678 control flow diamond to put 0x555..555 in the return
2679 register(s) in the case where the call doesn't happen. If
2680 this ever becomes necessary, maybe copy code from the ARM
2681 equivalent. Until that day, just give up. */
2682 goto bad;
2683 }
sewardj1b8d58e2005-02-05 14:34:18 +00002684 /* As per detailed comment for Ain_Call in
2685 getRegUsage_AMD64Instr above, %r11 is used as an address
2686 temporary. */
2687 /* jump over the following two insns if the condition does not
2688 hold */
sewardj4d77a9c2007-08-25 23:21:08 +00002689 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
sewardj1b8d58e2005-02-05 14:34:18 +00002690 if (i->Ain.Call.cond != Acc_ALWAYS) {
sewardj03ccf852005-03-21 02:47:42 +00002691 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
sewardj4d77a9c2007-08-25 23:21:08 +00002692 *p++ = shortImm ? 10 : 13;
2693 /* 10 or 13 bytes in the next two insns */
sewardj1b8d58e2005-02-05 14:34:18 +00002694 }
sewardj4d77a9c2007-08-25 23:21:08 +00002695 if (shortImm) {
2696 /* 7 bytes: movl sign-extend(imm32), %r11 */
2697 *p++ = 0x49;
2698 *p++ = 0xC7;
2699 *p++ = 0xC3;
2700 p = emit32(p, (UInt)i->Ain.Call.target);
2701 } else {
2702 /* 10 bytes: movabsq $target, %r11 */
2703 *p++ = 0x49;
2704 *p++ = 0xBB;
2705 p = emit64(p, i->Ain.Call.target);
2706 }
2707 /* 3 bytes: call *%r11 */
sewardj1b8d58e2005-02-05 14:34:18 +00002708 *p++ = 0x41;
2709 *p++ = 0xFF;
2710 *p++ = 0xD3;
2711 goto done;
sewardj4d77a9c2007-08-25 23:21:08 +00002712 }
sewardj549e0642005-02-05 12:00:14 +00002713
sewardjc6f970f2012-04-02 21:54:49 +00002714 case Ain_XDirect: {
2715 /* NB: what goes on here has to be very closely coordinated with the
2716 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
2717 /* We're generating chain-me requests here, so we need to be
2718 sure this is actually allowed -- no-redir translations can't
2719 use chain-me's. Hence: */
2720 vassert(disp_cp_chain_me_to_slowEP != NULL);
2721 vassert(disp_cp_chain_me_to_fastEP != NULL);
2722
2723 HReg r11 = hregAMD64_R11();
sewardj010ac542011-05-29 09:29:18 +00002724
sewardj549e0642005-02-05 12:00:14 +00002725 /* Use ptmp for backpatching conditional jumps. */
2726 ptmp = NULL;
2727
2728 /* First off, if this is conditional, create a conditional
2729 jump over the rest of it. */
sewardjc6f970f2012-04-02 21:54:49 +00002730 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
sewardj549e0642005-02-05 12:00:14 +00002731 /* jmp fwds if !condition */
sewardjc6f970f2012-04-02 21:54:49 +00002732 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
sewardj549e0642005-02-05 12:00:14 +00002733 ptmp = p; /* fill in this bit later */
2734 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2735 }
2736
sewardjc6f970f2012-04-02 21:54:49 +00002737 /* Update the guest RIP. */
sewardj3e8ba602012-04-21 08:18:02 +00002738 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
2739 /* use a shorter encoding */
2740 /* movl sign-extend(dstGA), %r11 */
2741 *p++ = 0x49;
2742 *p++ = 0xC7;
2743 *p++ = 0xC3;
2744 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
2745 } else {
2746 /* movabsq $dstGA, %r11 */
2747 *p++ = 0x49;
2748 *p++ = 0xBB;
2749 p = emit64(p, i->Ain.XDirect.dstGA);
2750 }
2751
sewardjc6f970f2012-04-02 21:54:49 +00002752 /* movq %r11, amRIP */
2753 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
2754 *p++ = 0x89;
2755 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
sewardj010ac542011-05-29 09:29:18 +00002756
sewardjc6f970f2012-04-02 21:54:49 +00002757 /* --- FIRST PATCHABLE BYTE follows --- */
2758 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2759 to) backs up the return address, so as to find the address of
2760 the first patchable byte. So: don't change the length of the
2761 two instructions below. */
2762 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
2763 *p++ = 0x49;
2764 *p++ = 0xBB;
florian8462d112014-09-24 15:18:09 +00002765 const void* disp_cp_chain_me
sewardjc6f970f2012-04-02 21:54:49 +00002766 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2767 : disp_cp_chain_me_to_slowEP;
2768 p = emit64(p, Ptr_to_ULong(disp_cp_chain_me));
2769 /* call *%r11 */
2770 *p++ = 0x41;
sewardj0528bb52005-12-15 15:45:20 +00002771 *p++ = 0xFF;
sewardjc6f970f2012-04-02 21:54:49 +00002772 *p++ = 0xD3;
2773 /* --- END of PATCHABLE BYTES --- */
sewardj549e0642005-02-05 12:00:14 +00002774
2775 /* Fix up the conditional jump, if there was one. */
sewardjc6f970f2012-04-02 21:54:49 +00002776 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
sewardj549e0642005-02-05 12:00:14 +00002777 Int delta = p - ptmp;
sewardjc6f970f2012-04-02 21:54:49 +00002778 vassert(delta > 0 && delta < 40);
2779 *ptmp = toUChar(delta-1);
2780 }
2781 goto done;
2782 }
2783
2784 case Ain_XIndir: {
2785 /* We're generating transfers that could lead indirectly to a
2786 chain-me, so we need to be sure this is actually allowed --
2787 no-redir translations are not allowed to reach normal
2788 translations without going through the scheduler. That means
2789 no XDirects or XIndirs out from no-redir translations.
2790 Hence: */
2791 vassert(disp_cp_xindir != NULL);
2792
2793 /* Use ptmp for backpatching conditional jumps. */
2794 ptmp = NULL;
2795
2796 /* First off, if this is conditional, create a conditional
2797 jump over the rest of it. */
2798 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2799 /* jmp fwds if !condition */
2800 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
2801 ptmp = p; /* fill in this bit later */
2802 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2803 }
2804
2805 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2806 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
2807 *p++ = 0x89;
2808 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
sewardj3e8ba602012-04-21 08:18:02 +00002809
2810 /* get $disp_cp_xindir into %r11 */
2811 if (fitsIn32Bits(Ptr_to_ULong(disp_cp_xindir))) {
2812 /* use a shorter encoding */
2813 /* movl sign-extend(disp_cp_xindir), %r11 */
2814 *p++ = 0x49;
2815 *p++ = 0xC7;
2816 *p++ = 0xC3;
2817 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
2818 } else {
2819 /* movabsq $disp_cp_xindir, %r11 */
2820 *p++ = 0x49;
2821 *p++ = 0xBB;
2822 p = emit64(p, Ptr_to_ULong(disp_cp_xindir));
2823 }
2824
sewardjc6f970f2012-04-02 21:54:49 +00002825 /* jmp *%r11 */
2826 *p++ = 0x41;
2827 *p++ = 0xFF;
2828 *p++ = 0xE3;
2829
2830 /* Fix up the conditional jump, if there was one. */
2831 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2832 Int delta = p - ptmp;
2833 vassert(delta > 0 && delta < 40);
2834 *ptmp = toUChar(delta-1);
2835 }
2836 goto done;
2837 }
2838
2839 case Ain_XAssisted: {
2840 /* Use ptmp for backpatching conditional jumps. */
2841 ptmp = NULL;
2842
2843 /* First off, if this is conditional, create a conditional
2844 jump over the rest of it. */
2845 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
2846 /* jmp fwds if !condition */
2847 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
2848 ptmp = p; /* fill in this bit later */
2849 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2850 }
2851
2852 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2853 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2854 *p++ = 0x89;
2855 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2856 /* movl $magic_number, %ebp. Since these numbers are all small positive
2857 integers, we can get away with "movl $N, %ebp" rather than
2858 the longer "movq $N, %rbp". */
2859 UInt trcval = 0;
2860 switch (i->Ain.XAssisted.jk) {
2861 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
2862 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
2863 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
2864 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
2865 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
2866 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
2867 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
sewardj05f5e012014-05-04 10:52:11 +00002868 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
sewardjc6f970f2012-04-02 21:54:49 +00002869 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
2870 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
2871 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
2872 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
2873 /* We don't expect to see the following being assisted. */
2874 case Ijk_Ret:
2875 case Ijk_Call:
2876 /* fallthrough */
2877 default:
2878 ppIRJumpKind(i->Ain.XAssisted.jk);
2879 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
2880 }
2881 vassert(trcval != 0);
2882 *p++ = 0xBD;
2883 p = emit32(p, trcval);
2884 /* movabsq $disp_assisted, %r11 */
2885 *p++ = 0x49;
2886 *p++ = 0xBB;
2887 p = emit64(p, Ptr_to_ULong(disp_cp_xassisted));
2888 /* jmp *%r11 */
2889 *p++ = 0x41;
2890 *p++ = 0xFF;
2891 *p++ = 0xE3;
2892
2893 /* Fix up the conditional jump, if there was one. */
2894 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
2895 Int delta = p - ptmp;
2896 vassert(delta > 0 && delta < 40);
sewardj03ccf852005-03-21 02:47:42 +00002897 *ptmp = toUChar(delta-1);
sewardj549e0642005-02-05 12:00:14 +00002898 }
2899 goto done;
sewardj010ac542011-05-29 09:29:18 +00002900 }
sewardj549e0642005-02-05 12:00:14 +00002901
sewardj1b8d58e2005-02-05 14:34:18 +00002902 case Ain_CMov64:
2903 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
2904 if (i->Ain.CMov64.src->tag == Arm_Reg) {
2905 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2906 *p++ = 0x0F;
sewardj03ccf852005-03-21 02:47:42 +00002907 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
sewardj1b8d58e2005-02-05 14:34:18 +00002908 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2909 goto done;
2910 }
2911 if (i->Ain.CMov64.src->tag == Arm_Mem) {
sewardj1b8d58e2005-02-05 14:34:18 +00002912 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2913 *p++ = 0x0F;
sewardj03ccf852005-03-21 02:47:42 +00002914 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
sewardj1b8d58e2005-02-05 14:34:18 +00002915 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2916 goto done;
2917 }
2918 break;
sewardj549e0642005-02-05 12:00:14 +00002919
sewardjca257bc2010-09-08 08:34:52 +00002920 case Ain_MovxLQ:
2921 /* No, _don't_ ask me why the sense of the args has to be
2922 different in the S vs Z case. I don't know. */
2923 if (i->Ain.MovxLQ.syned) {
2924 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
2925 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2926 *p++ = 0x63;
2927 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2928 } else {
2929 /* Produce a 32-bit reg-reg move, since the implicit
2930 zero-extend does what we want. */
2931 *p++ = clearWBit (
2932 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
2933 *p++ = 0x89;
2934 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
2935 }
sewardj549e0642005-02-05 12:00:14 +00002936 goto done;
2937
2938 case Ain_LoadEX:
sewardj1b8d58e2005-02-05 14:34:18 +00002939 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
2940 /* movzbq */
2941 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2942 *p++ = 0x0F;
2943 *p++ = 0xB6;
2944 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2945 goto done;
2946 }
2947 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
2948 /* movzwq */
2949 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2950 *p++ = 0x0F;
2951 *p++ = 0xB7;
2952 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2953 goto done;
2954 }
sewardj549e0642005-02-05 12:00:14 +00002955 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
2956 /* movzlq */
sewardj1b8d58e2005-02-05 14:34:18 +00002957 /* This isn't really an existing AMD64 instruction per se.
sewardj549e0642005-02-05 12:00:14 +00002958 Rather, we have to do a 32-bit load. Because a 32-bit
2959 write implicitly clears the upper 32 bits of the target
2960 register, we get what we want. */
2961 *p++ = clearWBit(
2962 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
2963 *p++ = 0x8B;
2964 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2965 goto done;
2966 }
2967 break;
2968
sewardja5bd0af2005-03-24 20:40:12 +00002969 case Ain_Set64:
2970 /* Make the destination register be 1 or 0, depending on whether
2971 the relevant condition holds. Complication: the top 56 bits
2972 of the destination should be forced to zero, but doing 'xorq
2973 %r,%r' kills the flag(s) we are about to read. Sigh. So
2974 start off my moving $0 into the dest. */
sewardjdc2ca892005-04-07 02:01:23 +00002975 reg = iregBits3210(i->Ain.Set64.dst);
sewardja5bd0af2005-03-24 20:40:12 +00002976 vassert(reg < 16);
2977
2978 /* movq $0, %dst */
2979 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
2980 *p++ = 0xC7;
2981 *p++ = toUChar(0xC0 + (reg & 7));
2982 p = emit32(p, 0);
2983
2984 /* setb lo8(%dst) */
2985 /* note, 8-bit register rex trickyness. Be careful here. */
2986 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
2987 *p++ = 0x0F;
2988 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
2989 *p++ = toUChar(0xC0 + (reg & 7));
2990 goto done;
2991
sewardjf53b7352005-04-06 20:01:56 +00002992 case Ain_Bsfr64:
2993 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2994 *p++ = 0x0F;
2995 if (i->Ain.Bsfr64.isFwds) {
2996 *p++ = 0xBC;
2997 } else {
2998 *p++ = 0xBD;
2999 }
3000 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3001 goto done;
sewardjd0a12df2005-02-10 02:07:43 +00003002
3003 case Ain_MFence:
3004 /* mfence */
3005 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3006 goto done;
sewardj1b8d58e2005-02-05 14:34:18 +00003007
sewardje9d8a262009-07-01 08:06:34 +00003008 case Ain_ACAS:
3009 /* lock */
3010 *p++ = 0xF0;
3011 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3012 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3013 in %rbx. The new-value register is hardwired to be %rbx
3014 since dealing with byte integer registers is too much hassle,
3015 so we force the register operand to %rbx (could equally be
3016 %rcx or %rdx). */
3017 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3018 if (i->Ain.ACAS.sz != 8)
3019 rex = clearWBit(rex);
3020
3021 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3022 *p++ = 0x0F;
3023 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3024 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3025 goto done;
3026
3027 case Ain_DACAS:
3028 /* lock */
3029 *p++ = 0xF0;
3030 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3031 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3032 aren't encoded in the insn. */
3033 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
3034 if (i->Ain.ACAS.sz != 8)
3035 rex = clearWBit(rex);
3036 *p++ = rex;
3037 *p++ = 0x0F;
3038 *p++ = 0xC7;
3039 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
3040 goto done;
3041
sewardj25a85812005-05-08 23:03:48 +00003042 case Ain_A87Free:
3043 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3044 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3045 p = do_ffree_st(p, 7-j);
3046 }
3047 goto done;
3048
3049 case Ain_A87PushPop:
sewardjd15b5972010-06-27 09:06:34 +00003050 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
sewardj25a85812005-05-08 23:03:48 +00003051 if (i->Ain.A87PushPop.isPush) {
sewardjd15b5972010-06-27 09:06:34 +00003052 /* Load from memory into %st(0): flds/fldl amode */
sewardj25a85812005-05-08 23:03:48 +00003053 *p++ = clearWBit(
3054 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
sewardjd15b5972010-06-27 09:06:34 +00003055 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
sewardj25a85812005-05-08 23:03:48 +00003056 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
3057 } else {
sewardjd15b5972010-06-27 09:06:34 +00003058 /* Dump %st(0) to memory: fstps/fstpl amode */
sewardj25a85812005-05-08 23:03:48 +00003059 *p++ = clearWBit(
3060 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
sewardjd15b5972010-06-27 09:06:34 +00003061 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
sewardj25a85812005-05-08 23:03:48 +00003062 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
3063 goto done;
3064 }
3065 goto done;
3066
3067 case Ain_A87FpOp:
3068 switch (i->Ain.A87FpOp.op) {
sewardj5e205372005-05-09 02:57:08 +00003069 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3070 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3071 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
sewardj5e205372005-05-09 02:57:08 +00003072 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3073 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3074 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3075 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3076 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3077 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
sewardjf4c803b2006-09-11 11:07:34 +00003078 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
sewardj4970e4e2008-10-11 10:07:55 +00003079 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
sewardje9c51c92014-04-30 22:50:34 +00003080 case Afp_TAN:
3081 /* fptan pushes 1.0 on the FP stack, except when the
3082 argument is out of range. Hence we have to do the
3083 instruction, then inspect C2 to see if there is an out
3084 of range condition. If there is, we skip the fincstp
3085 that is used by the in-range case to get rid of this
3086 extra 1.0 value. */
3087 *p++ = 0xD9; *p++ = 0xF2; // fptan
3088 *p++ = 0x50; // pushq %rax
3089 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3090 *p++ = 0x66; *p++ = 0xA9;
3091 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3092 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3093 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3094 *p++ = 0x58; // after_fincstp: popq %rax
3095 break;
3096 default:
3097 goto bad;
sewardj25a85812005-05-08 23:03:48 +00003098 }
3099 goto done;
3100
3101 case Ain_A87LdCW:
3102 *p++ = clearWBit(
3103 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
3104 *p++ = 0xD9;
3105 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
3106 goto done;
3107
sewardjf4c803b2006-09-11 11:07:34 +00003108 case Ain_A87StSW:
3109 *p++ = clearWBit(
3110 rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
3111 *p++ = 0xDD;
3112 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
3113 goto done;
3114
sewardj1b8d58e2005-02-05 14:34:18 +00003115 case Ain_Store:
3116 if (i->Ain.Store.sz == 2) {
3117 /* This just goes to show the crazyness of the instruction
3118 set encoding. We have to insert two prefix bytes, but be
3119 careful to avoid a conflict in what the size should be, by
3120 ensuring that REX.W = 0. */
3121 *p++ = 0x66; /* override to 16-bits */
3122 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3123 *p++ = 0x89;
3124 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3125 goto done;
3126 }
3127 if (i->Ain.Store.sz == 4) {
3128 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3129 *p++ = 0x89;
3130 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3131 goto done;
3132 }
sewardje1698952005-02-08 15:02:39 +00003133 if (i->Ain.Store.sz == 1) {
sewardja8903672005-02-27 13:31:42 +00003134 /* This is one place where it would be wrong to skip emitting
3135 a rex byte of 0x40, since the mere presence of rex changes
3136 the meaning of the byte register access. Be careful. */
sewardje1698952005-02-08 15:02:39 +00003137 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3138 *p++ = 0x88;
3139 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3140 goto done;
3141 }
sewardj1b8d58e2005-02-05 14:34:18 +00003142 break;
3143
sewardj1a01e652005-02-23 11:39:21 +00003144 case Ain_LdMXCSR:
3145 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
3146 *p++ = 0x0F;
3147 *p++ = 0xAE;
3148 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
3149 goto done;
3150
sewardj18303862005-02-21 12:36:54 +00003151 case Ain_SseUComIS:
3152 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3153 /* ucomi[sd] %srcL, %srcR */
3154 if (i->Ain.SseUComIS.sz == 8) {
3155 *p++ = 0x66;
3156 } else {
3157 goto bad;
3158 vassert(i->Ain.SseUComIS.sz == 4);
3159 }
3160 *p++ = clearWBit (
3161 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
3162 vreg2ireg(i->Ain.SseUComIS.srcR) ));
3163 *p++ = 0x0F;
3164 *p++ = 0x2E;
3165 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
3166 vreg2ireg(i->Ain.SseUComIS.srcR) );
3167 /* pushfq */
3168 *p++ = 0x9C;
3169 /* popq %dst */
sewardj03ccf852005-03-21 02:47:42 +00003170 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
sewardjdc2ca892005-04-07 02:01:23 +00003171 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
sewardj18303862005-02-21 12:36:54 +00003172 goto done;
3173
sewardj1a01e652005-02-23 11:39:21 +00003174 case Ain_SseSI2SF:
3175 /* cvssi2s[sd] %src, %dst */
3176 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
3177 i->Ain.SseSI2SF.src );
sewardj03ccf852005-03-21 02:47:42 +00003178 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3179 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
sewardj1a01e652005-02-23 11:39:21 +00003180 *p++ = 0x0F;
3181 *p++ = 0x2A;
3182 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
3183 i->Ain.SseSI2SF.src );
3184 goto done;
3185
3186 case Ain_SseSF2SI:
3187 /* cvss[sd]2si %src, %dst */
3188 rex = rexAMode_R( i->Ain.SseSF2SI.dst,
3189 vreg2ireg(i->Ain.SseSF2SI.src) );
sewardj03ccf852005-03-21 02:47:42 +00003190 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3191 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
sewardj1a01e652005-02-23 11:39:21 +00003192 *p++ = 0x0F;
3193 *p++ = 0x2D;
3194 p = doAMode_R( p, i->Ain.SseSF2SI.dst,
3195 vreg2ireg(i->Ain.SseSF2SI.src) );
3196 goto done;
3197
sewardj8d965312005-02-25 02:48:47 +00003198 case Ain_SseSDSS:
3199 /* cvtsd2ss/cvtss2sd %src, %dst */
sewardj03ccf852005-03-21 02:47:42 +00003200 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
sewardj8d965312005-02-25 02:48:47 +00003201 *p++ = clearWBit(
3202 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
3203 vreg2ireg(i->Ain.SseSDSS.src) ));
3204 *p++ = 0x0F;
3205 *p++ = 0x5A;
3206 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
3207 vreg2ireg(i->Ain.SseSDSS.src) );
3208 goto done;
3209
sewardj1001dc42005-02-21 08:25:55 +00003210 case Ain_SseLdSt:
sewardj18303862005-02-21 12:36:54 +00003211 if (i->Ain.SseLdSt.sz == 8) {
3212 *p++ = 0xF2;
3213 } else
3214 if (i->Ain.SseLdSt.sz == 4) {
sewardj18303862005-02-21 12:36:54 +00003215 *p++ = 0xF3;
3216 } else
3217 if (i->Ain.SseLdSt.sz != 16) {
3218 vassert(0);
3219 }
sewardj1001dc42005-02-21 08:25:55 +00003220 *p++ = clearWBit(
3221 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
3222 *p++ = 0x0F;
sewardj03ccf852005-03-21 02:47:42 +00003223 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
sewardj1001dc42005-02-21 08:25:55 +00003224 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
3225 goto done;
3226
3227 case Ain_SseLdzLO:
3228 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3229 /* movs[sd] amode, %xmm-dst */
sewardj03ccf852005-03-21 02:47:42 +00003230 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
sewardj1001dc42005-02-21 08:25:55 +00003231 *p++ = clearWBit(
3232 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
3233 i->Ain.SseLdzLO.addr));
3234 *p++ = 0x0F;
3235 *p++ = 0x10;
3236 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
3237 i->Ain.SseLdzLO.addr);
3238 goto done;
3239
sewardj8d965312005-02-25 02:48:47 +00003240 case Ain_Sse32Fx4:
3241 xtra = 0;
3242 *p++ = clearWBit(
3243 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
3244 vreg2ireg(i->Ain.Sse32Fx4.src) ));
3245 *p++ = 0x0F;
3246 switch (i->Ain.Sse32Fx4.op) {
sewardj432f8b62005-05-10 02:50:05 +00003247 case Asse_ADDF: *p++ = 0x58; break;
3248 case Asse_DIVF: *p++ = 0x5E; break;
3249 case Asse_MAXF: *p++ = 0x5F; break;
3250 case Asse_MINF: *p++ = 0x5D; break;
3251 case Asse_MULF: *p++ = 0x59; break;
sewardja7ba8c42005-05-10 20:08:34 +00003252 case Asse_RCPF: *p++ = 0x53; break;
3253 case Asse_RSQRTF: *p++ = 0x52; break;
3254 case Asse_SQRTF: *p++ = 0x51; break;
sewardj432f8b62005-05-10 02:50:05 +00003255 case Asse_SUBF: *p++ = 0x5C; break;
sewardj8d965312005-02-25 02:48:47 +00003256 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
sewardj432f8b62005-05-10 02:50:05 +00003257 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3258 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003259 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj8d965312005-02-25 02:48:47 +00003260 default: goto bad;
3261 }
3262 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
3263 vreg2ireg(i->Ain.Sse32Fx4.src) );
3264 if (xtra & 0x100)
sewardj03ccf852005-03-21 02:47:42 +00003265 *p++ = toUChar(xtra & 0xFF);
sewardj8d965312005-02-25 02:48:47 +00003266 goto done;
3267
sewardj4c328cf2005-05-05 12:05:54 +00003268 case Ain_Sse64Fx2:
3269 xtra = 0;
3270 *p++ = 0x66;
3271 *p++ = clearWBit(
3272 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
3273 vreg2ireg(i->Ain.Sse64Fx2.src) ));
3274 *p++ = 0x0F;
3275 switch (i->Ain.Sse64Fx2.op) {
3276 case Asse_ADDF: *p++ = 0x58; break;
sewardj5992bd02005-05-11 02:13:42 +00003277 case Asse_DIVF: *p++ = 0x5E; break;
3278 case Asse_MAXF: *p++ = 0x5F; break;
3279 case Asse_MINF: *p++ = 0x5D; break;
sewardj4c328cf2005-05-05 12:05:54 +00003280 case Asse_MULF: *p++ = 0x59; break;
sewardj97628592005-05-10 22:42:54 +00003281 case Asse_SQRTF: *p++ = 0x51; break;
sewardj4c328cf2005-05-05 12:05:54 +00003282 case Asse_SUBF: *p++ = 0x5C; break;
sewardj97628592005-05-10 22:42:54 +00003283 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3284 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3285 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003286 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj4c328cf2005-05-05 12:05:54 +00003287 default: goto bad;
3288 }
3289 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
3290 vreg2ireg(i->Ain.Sse64Fx2.src) );
3291 if (xtra & 0x100)
sewardjca673ab2005-05-11 10:03:08 +00003292 *p++ = toUChar(xtra & 0xFF);
sewardj4c328cf2005-05-05 12:05:54 +00003293 goto done;
sewardj8d965312005-02-25 02:48:47 +00003294
3295 case Ain_Sse32FLo:
3296 xtra = 0;
3297 *p++ = 0xF3;
3298 *p++ = clearWBit(
3299 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
3300 vreg2ireg(i->Ain.Sse32FLo.src) ));
3301 *p++ = 0x0F;
3302 switch (i->Ain.Sse32FLo.op) {
3303 case Asse_ADDF: *p++ = 0x58; break;
sewardjc49ce232005-02-25 13:03:03 +00003304 case Asse_DIVF: *p++ = 0x5E; break;
sewardj37d52572005-02-25 14:22:12 +00003305 case Asse_MAXF: *p++ = 0x5F; break;
3306 case Asse_MINF: *p++ = 0x5D; break;
sewardj8d965312005-02-25 02:48:47 +00003307 case Asse_MULF: *p++ = 0x59; break;
sewardja7ba8c42005-05-10 20:08:34 +00003308 case Asse_RCPF: *p++ = 0x53; break;
3309 case Asse_RSQRTF: *p++ = 0x52; break;
3310 case Asse_SQRTF: *p++ = 0x51; break;
sewardj8d965312005-02-25 02:48:47 +00003311 case Asse_SUBF: *p++ = 0x5C; break;
sewardj432f8b62005-05-10 02:50:05 +00003312 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
sewardj3aba9eb2005-03-30 23:20:47 +00003313 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
sewardj4c328cf2005-05-05 12:05:54 +00003314 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003315 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj8d965312005-02-25 02:48:47 +00003316 default: goto bad;
3317 }
3318 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
3319 vreg2ireg(i->Ain.Sse32FLo.src) );
3320 if (xtra & 0x100)
sewardj03ccf852005-03-21 02:47:42 +00003321 *p++ = toUChar(xtra & 0xFF);
sewardj8d965312005-02-25 02:48:47 +00003322 goto done;
sewardj1001dc42005-02-21 08:25:55 +00003323
3324 case Ain_Sse64FLo:
3325 xtra = 0;
3326 *p++ = 0xF2;
3327 *p++ = clearWBit(
3328 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
3329 vreg2ireg(i->Ain.Sse64FLo.src) ));
3330 *p++ = 0x0F;
3331 switch (i->Ain.Sse64FLo.op) {
3332 case Asse_ADDF: *p++ = 0x58; break;
3333 case Asse_DIVF: *p++ = 0x5E; break;
sewardj1a01e652005-02-23 11:39:21 +00003334 case Asse_MAXF: *p++ = 0x5F; break;
sewardjc49ce232005-02-25 13:03:03 +00003335 case Asse_MINF: *p++ = 0x5D; break;
sewardj1001dc42005-02-21 08:25:55 +00003336 case Asse_MULF: *p++ = 0x59; break;
sewardj18303862005-02-21 12:36:54 +00003337 case Asse_SQRTF: *p++ = 0x51; break;
sewardj1001dc42005-02-21 08:25:55 +00003338 case Asse_SUBF: *p++ = 0x5C; break;
sewardj137015d2005-03-27 04:01:15 +00003339 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
sewardj8d965312005-02-25 02:48:47 +00003340 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
sewardj137015d2005-03-27 04:01:15 +00003341 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
sewardjb9282632005-11-05 02:33:25 +00003342 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
sewardj1001dc42005-02-21 08:25:55 +00003343 default: goto bad;
3344 }
3345 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
3346 vreg2ireg(i->Ain.Sse64FLo.src) );
3347 if (xtra & 0x100)
sewardj03ccf852005-03-21 02:47:42 +00003348 *p++ = toUChar(xtra & 0xFF);
sewardj1001dc42005-02-21 08:25:55 +00003349 goto done;
3350
3351 case Ain_SseReRg:
3352# define XX(_n) *p++ = (_n)
3353
3354 rex = clearWBit(
3355 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
3356 vreg2ireg(i->Ain.SseReRg.src) ));
3357
3358 switch (i->Ain.SseReRg.op) {
3359 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
sewardj8d965312005-02-25 02:48:47 +00003360 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
sewardj9da16972005-02-21 13:58:26 +00003361 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
sewardj1a01e652005-02-23 11:39:21 +00003362 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
sewardj137015d2005-03-27 04:01:15 +00003363 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
sewardj97628592005-05-10 22:42:54 +00003364 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3365 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3366 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3367 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
sewardj5992bd02005-05-11 02:13:42 +00003368 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
sewardj97628592005-05-10 22:42:54 +00003369 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
sewardj09717342005-05-05 21:34:02 +00003370 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
sewardj5992bd02005-05-11 02:13:42 +00003371 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3372 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3373 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3374 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3375 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3376 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3377 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3378 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
sewardj09717342005-05-05 21:34:02 +00003379 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
sewardj5992bd02005-05-11 02:13:42 +00003380 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3381 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3382 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
sewardjadffcef2005-05-11 00:03:06 +00003383 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3384 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3385 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3386 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3387 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3388 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3389 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3390 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3391 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3392 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3393 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3394 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3395 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3396 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
sewardj09717342005-05-05 21:34:02 +00003397 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
sewardj97628592005-05-10 22:42:54 +00003398 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3399 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3400 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
sewardj09717342005-05-05 21:34:02 +00003401 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
sewardj97628592005-05-10 22:42:54 +00003402 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3403 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3404 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3405 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3406 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3407 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3408 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3409 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3410 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3411 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3412 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3413 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
sewardj1001dc42005-02-21 08:25:55 +00003414 default: goto bad;
3415 }
3416 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
3417 vreg2ireg(i->Ain.SseReRg.src) );
3418# undef XX
3419 goto done;
3420
sewardj8d965312005-02-25 02:48:47 +00003421 case Ain_SseCMov:
3422 /* jmp fwds if !condition */
sewardj03ccf852005-03-21 02:47:42 +00003423 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
sewardj8d965312005-02-25 02:48:47 +00003424 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3425 ptmp = p;
3426
3427 /* movaps %src, %dst */
3428 *p++ = clearWBit(
3429 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
3430 vreg2ireg(i->Ain.SseCMov.src) ));
3431 *p++ = 0x0F;
3432 *p++ = 0x28;
3433 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
3434 vreg2ireg(i->Ain.SseCMov.src) );
3435
3436 /* Fill in the jump offset. */
sewardj03ccf852005-03-21 02:47:42 +00003437 *(ptmp-1) = toUChar(p - ptmp);
sewardj8d965312005-02-25 02:48:47 +00003438 goto done;
3439
sewardj09717342005-05-05 21:34:02 +00003440 case Ain_SseShuf:
3441 *p++ = 0x66;
3442 *p++ = clearWBit(
3443 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
3444 vreg2ireg(i->Ain.SseShuf.src) ));
3445 *p++ = 0x0F;
3446 *p++ = 0x70;
3447 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
3448 vreg2ireg(i->Ain.SseShuf.src) );
3449 *p++ = (UChar)(i->Ain.SseShuf.order);
3450 goto done;
3451
sewardj3616a2e2012-05-27 16:18:13 +00003452 //uu case Ain_AvxLdSt: {
3453 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
3454 //uu i->Ain.AvxLdSt.addr );
3455 //uu p = emitVexPrefix(p, vex);
3456 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
3457 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
3458 //uu goto done;
3459 //uu }
sewardjc4530ae2012-05-21 10:18:49 +00003460
sewardjc6f970f2012-04-02 21:54:49 +00003461 case Ain_EvCheck: {
3462 /* We generate:
3463 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
3464 (2 bytes) jns nofail expected taken
3465 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
3466 nofail:
3467 */
3468 /* This is heavily asserted re instruction lengths. It needs to
3469 be. If we get given unexpected forms of .amCounter or
3470 .amFailAddr -- basically, anything that's not of the form
3471 uimm7(%rbp) -- they are likely to fail. */
3472 /* Note also that after the decl we must be very careful not to
3473 read the carry flag, else we get a partial flags stall.
3474 js/jns avoids that, though. */
3475 UChar* p0 = p;
3476 /* --- decl 8(%rbp) --- */
3477 /* Need to compute the REX byte for the decl in order to prove
3478 that we don't need it, since this is a 32-bit inc and all
3479 registers involved in the amode are < r8. "fake(1)" because
3480 there's no register in this encoding; instead the register
3481 field is used as a sub opcode. The encoding for "decl r/m32"
3482 is FF /1, hence the fake(1). */
3483 rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter));
3484 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
3485 *p++ = 0xFF;
3486 p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter);
3487 vassert(p - p0 == 3);
3488 /* --- jns nofail --- */
3489 *p++ = 0x79;
3490 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3491 vassert(p - p0 == 5);
3492 /* --- jmp* 0(%rbp) --- */
3493 /* Once again, verify we don't need REX. The encoding is FF /4.
3494 We don't need REX.W since by default FF /4 in 64-bit mode
3495 implies a 64 bit load. */
3496 rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr));
3497 if (rex != 0x40) goto bad;
3498 *p++ = 0xFF;
3499 p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr);
3500 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3501 /* And crosscheck .. */
sewardj9b769162014-07-24 12:42:03 +00003502 vassert(evCheckSzB_AMD64(endness_host) == 8);
sewardjc6f970f2012-04-02 21:54:49 +00003503 goto done;
3504 }
3505
3506 case Ain_ProfInc: {
3507 /* We generate movabsq $0, %r11
3508 incq (%r11)
3509 in the expectation that a later call to LibVEX_patchProfCtr
3510 will be used to fill in the immediate field once the right
3511 value is known.
3512 49 BB 00 00 00 00 00 00 00 00
3513 49 FF 03
3514 */
3515 *p++ = 0x49; *p++ = 0xBB;
3516 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3517 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3518 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
3519 /* Tell the caller .. */
3520 vassert(!(*is_profInc));
3521 *is_profInc = True;
3522 goto done;
3523 }
3524
sewardjc33671d2005-02-01 20:30:00 +00003525 default:
3526 goto bad;
3527 }
3528
3529 bad:
cerion92b64362005-12-13 12:02:26 +00003530 ppAMD64Instr(i, mode64);
sewardjc33671d2005-02-01 20:30:00 +00003531 vpanic("emit_AMD64Instr");
3532 /*NOTREACHED*/
3533
sewardj813ce9e2005-02-04 21:16:48 +00003534 done:
sewardjc33671d2005-02-01 20:30:00 +00003535 vassert(p - &buf[0] <= 32);
3536 return p - &buf[0];
3537
3538# undef fake
3539}
sewardja3e98302005-02-01 15:55:05 +00003540
sewardjc6f970f2012-04-02 21:54:49 +00003541
3542/* How big is an event check? See case for Ain_EvCheck in
3543 emit_AMD64Instr just above. That crosschecks what this returns, so
3544 we can tell if we're inconsistent. */
sewardj9b769162014-07-24 12:42:03 +00003545Int evCheckSzB_AMD64 ( VexEndness endness_host )
sewardjc6f970f2012-04-02 21:54:49 +00003546{
3547 return 8;
3548}
3549
3550
3551/* NB: what goes on here has to be very closely coordinated with the
3552 emitInstr case for XDirect, above. */
sewardj9b769162014-07-24 12:42:03 +00003553VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
3554 void* place_to_chain,
florian7d6f81d2014-09-22 21:43:37 +00003555 const void* disp_cp_chain_me_EXPECTED,
3556 const void* place_to_jump_to )
sewardjc6f970f2012-04-02 21:54:49 +00003557{
sewardj9b769162014-07-24 12:42:03 +00003558 vassert(endness_host == VexEndnessLE);
3559
sewardjc6f970f2012-04-02 21:54:49 +00003560 /* What we're expecting to see is:
3561 movabsq $disp_cp_chain_me_EXPECTED, %r11
3562 call *%r11
3563 viz
3564 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
3565 41 FF D3
3566 */
3567 UChar* p = (UChar*)place_to_chain;
3568 vassert(p[0] == 0x49);
3569 vassert(p[1] == 0xBB);
3570 vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
3571 vassert(p[10] == 0x41);
3572 vassert(p[11] == 0xFF);
3573 vassert(p[12] == 0xD3);
3574 /* And what we want to change it to is either:
3575 (general case):
3576 movabsq $place_to_jump_to, %r11
3577 jmpq *%r11
3578 viz
3579 49 BB <8 bytes value == place_to_jump_to>
3580 41 FF E3
3581 So it's the same length (convenient, huh) and we don't
3582 need to change all the bits.
3583 ---OR---
3584 in the case where the displacement falls within 32 bits
3585 jmpq disp32 where disp32 is relative to the next insn
3586 ud2; ud2; ud2; ud2
3587 viz
3588 E9 <4 bytes == disp32>
3589 0F 0B 0F 0B 0F 0B 0F 0B
3590
3591 In both cases the replacement has the same length as the original.
3592 To remain sane & verifiable,
3593 (1) limit the displacement for the short form to
3594 (say) +/- one billion, so as to avoid wraparound
3595 off-by-ones
3596 (2) even if the short form is applicable, once every (say)
3597 1024 times use the long form anyway, so as to maintain
3598 verifiability
3599 */
3600 /* This is the delta we need to put into a JMP d32 insn. It's
3601 relative to the start of the next insn, hence the -5. */
florian7d6f81d2014-09-22 21:43:37 +00003602 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
sewardjc6f970f2012-04-02 21:54:49 +00003603 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
3604
3605 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
3606 if (shortOK) {
3607 shortCTR++; // thread safety bleh
3608 if (0 == (shortCTR & 0x3FF)) {
3609 shortOK = False;
3610 if (0)
3611 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
3612 "using long jmp\n", shortCTR);
3613 }
3614 }
3615
3616 /* And make the modifications. */
3617 if (shortOK) {
3618 p[0] = 0xE9;
3619 p[1] = (delta >> 0) & 0xFF;
3620 p[2] = (delta >> 8) & 0xFF;
3621 p[3] = (delta >> 16) & 0xFF;
3622 p[4] = (delta >> 24) & 0xFF;
3623 p[5] = 0x0F; p[6] = 0x0B;
3624 p[7] = 0x0F; p[8] = 0x0B;
3625 p[9] = 0x0F; p[10] = 0x0B;
3626 p[11] = 0x0F; p[12] = 0x0B;
3627 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3628 delta >>= 32;
3629 vassert(delta == 0LL || delta == -1LL);
3630 } else {
3631 /* Minimal modifications from the starting sequence. */
3632 *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to);
3633 p[12] = 0xE3;
3634 }
florian5ea257b2012-09-29 17:05:46 +00003635 VexInvalRange vir = { (HWord)place_to_chain, 13 };
sewardjc6f970f2012-04-02 21:54:49 +00003636 return vir;
3637}
3638
3639
3640/* NB: what goes on here has to be very closely coordinated with the
3641 emitInstr case for XDirect, above. */
sewardj9b769162014-07-24 12:42:03 +00003642VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
3643 void* place_to_unchain,
florian7d6f81d2014-09-22 21:43:37 +00003644 const void* place_to_jump_to_EXPECTED,
3645 const void* disp_cp_chain_me )
sewardjc6f970f2012-04-02 21:54:49 +00003646{
sewardj9b769162014-07-24 12:42:03 +00003647 vassert(endness_host == VexEndnessLE);
3648
sewardjc6f970f2012-04-02 21:54:49 +00003649 /* What we're expecting to see is either:
3650 (general case)
3651 movabsq $place_to_jump_to_EXPECTED, %r11
3652 jmpq *%r11
3653 viz
3654 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
3655 41 FF E3
3656 ---OR---
3657 in the case where the displacement falls within 32 bits
3658 jmpq d32
3659 ud2; ud2; ud2; ud2
3660 viz
3661 E9 <4 bytes == disp32>
3662 0F 0B 0F 0B 0F 0B 0F 0B
3663 */
3664 UChar* p = (UChar*)place_to_unchain;
3665 Bool valid = False;
3666 if (p[0] == 0x49 && p[1] == 0xBB
3667 && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED)
3668 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
3669 /* it's the long form */
3670 valid = True;
3671 }
3672 else
3673 if (p[0] == 0xE9
3674 && p[5] == 0x0F && p[6] == 0x0B
3675 && p[7] == 0x0F && p[8] == 0x0B
3676 && p[9] == 0x0F && p[10] == 0x0B
3677 && p[11] == 0x0F && p[12] == 0x0B) {
3678 /* It's the short form. Check the offset is right. */
3679 Int s32 = *(Int*)(&p[1]);
3680 Long s64 = (Long)s32;
florian7d6f81d2014-09-22 21:43:37 +00003681 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
sewardjc6f970f2012-04-02 21:54:49 +00003682 valid = True;
3683 if (0)
3684 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
3685 }
3686 }
3687 vassert(valid);
3688 /* And what we want to change it to is:
3689 movabsq $disp_cp_chain_me, %r11
3690 call *%r11
3691 viz
3692 49 BB <8 bytes value == disp_cp_chain_me>
3693 41 FF D3
3694 So it's the same length (convenient, huh).
3695 */
3696 p[0] = 0x49;
3697 p[1] = 0xBB;
3698 *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me);
3699 p[10] = 0x41;
3700 p[11] = 0xFF;
3701 p[12] = 0xD3;
florian5ea257b2012-09-29 17:05:46 +00003702 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
sewardjc6f970f2012-04-02 21:54:49 +00003703 return vir;
3704}
3705
3706
3707/* Patch the counter address into a profile inc point, as previously
3708 created by the Ain_ProfInc case for emit_AMD64Instr. */
sewardj9b769162014-07-24 12:42:03 +00003709VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
3710 void* place_to_patch,
florian7d6f81d2014-09-22 21:43:37 +00003711 const ULong* location_of_counter )
sewardjc6f970f2012-04-02 21:54:49 +00003712{
sewardj9b769162014-07-24 12:42:03 +00003713 vassert(endness_host == VexEndnessLE);
sewardjc6f970f2012-04-02 21:54:49 +00003714 vassert(sizeof(ULong*) == 8);
3715 UChar* p = (UChar*)place_to_patch;
3716 vassert(p[0] == 0x49);
3717 vassert(p[1] == 0xBB);
3718 vassert(p[2] == 0x00);
3719 vassert(p[3] == 0x00);
3720 vassert(p[4] == 0x00);
3721 vassert(p[5] == 0x00);
3722 vassert(p[6] == 0x00);
3723 vassert(p[7] == 0x00);
3724 vassert(p[8] == 0x00);
3725 vassert(p[9] == 0x00);
3726 vassert(p[10] == 0x49);
3727 vassert(p[11] == 0xFF);
3728 vassert(p[12] == 0x03);
3729 ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter);
3730 p[2] = imm64 & 0xFF; imm64 >>= 8;
3731 p[3] = imm64 & 0xFF; imm64 >>= 8;
3732 p[4] = imm64 & 0xFF; imm64 >>= 8;
3733 p[5] = imm64 & 0xFF; imm64 >>= 8;
3734 p[6] = imm64 & 0xFF; imm64 >>= 8;
3735 p[7] = imm64 & 0xFF; imm64 >>= 8;
3736 p[8] = imm64 & 0xFF; imm64 >>= 8;
3737 p[9] = imm64 & 0xFF; imm64 >>= 8;
florian5ea257b2012-09-29 17:05:46 +00003738 VexInvalRange vir = { (HWord)place_to_patch, 13 };
sewardjc6f970f2012-04-02 21:54:49 +00003739 return vir;
3740}
3741
3742
sewardja3e98302005-02-01 15:55:05 +00003743/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00003744/*--- end host_amd64_defs.c ---*/
sewardja3e98302005-02-01 15:55:05 +00003745/*---------------------------------------------------------------*/