blob: e67ecbaf9792fc9692dc624dad626fa2aa299a60 [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2004-2013 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
sewardj69d98e32010-06-18 08:17:41 +000045#include "host_generic_simd128.h"
sewardjcc3d2192013-03-27 11:37:33 +000046#include "host_generic_simd256.h"
47#include "host_generic_maddf.h"
sewardjcef7d3e2009-07-02 12:21:59 +000048#include "host_amd64_defs.h"
sewardj1a01e652005-02-23 11:39:21 +000049
50
51/*---------------------------------------------------------*/
52/*--- x87/SSE control word stuff ---*/
53/*---------------------------------------------------------*/
54
55/* Vex-generated code expects to run with the FPU set as follows: all
56 exceptions masked, round-to-nearest, precision = 53 bits. This
57 corresponds to a FPU control word value of 0x027F.
58
59 Similarly the SSE control word (%mxcsr) should be 0x1F80.
60
61 %fpucw and %mxcsr should have these values on entry to
62 Vex-generated code, and should those values should be
63 unchanged at exit.
64*/
65
66#define DEFAULT_FPUCW 0x027F
67
68#define DEFAULT_MXCSR 0x1F80
69
70/* debugging only, do not use */
71/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000072
73
74/*---------------------------------------------------------*/
75/*--- misc helpers ---*/
76/*---------------------------------------------------------*/
77
78/* These are duplicated in guest-amd64/toIR.c */
79static IRExpr* unop ( IROp op, IRExpr* a )
80{
81 return IRExpr_Unop(op, a);
82}
83
84static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
85{
86 return IRExpr_Binop(op, a1, a2);
87}
88
sewardj05b3b6a2005-02-04 01:44:33 +000089static IRExpr* bind ( Int binder )
90{
91 return IRExpr_Binder(binder);
92}
sewardjc33671d2005-02-01 20:30:00 +000093
sewardj009230b2013-01-26 11:47:55 +000094static Bool isZeroU8 ( IRExpr* e )
95{
96 return e->tag == Iex_Const
97 && e->Iex.Const.con->tag == Ico_U8
98 && e->Iex.Const.con->Ico.U8 == 0;
99}
100
sewardjc33671d2005-02-01 20:30:00 +0000101
sewardjc33671d2005-02-01 20:30:00 +0000102/*---------------------------------------------------------*/
103/*--- ISelEnv ---*/
104/*---------------------------------------------------------*/
105
106/* This carries around:
107
108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
109 might encounter. This is computed before insn selection starts,
110 and does not change.
111
112 - A mapping from IRTemp to HReg. This tells the insn selector
113 which virtual register is associated with each IRTemp
114 temporary. This is computed before insn selection starts, and
115 does not change. We expect this mapping to map precisely the
116 same set of IRTemps as the type mapping does.
117
sewardj9b967672005-02-08 11:13:09 +0000118 - vregmap holds the primary register for the IRTemp.
119 - vregmapHI is only used for 128-bit integer-typed
120 IRTemps. It holds the identity of a second
121 64-bit virtual HReg, which holds the high half
122 of the value.
123
sewardjc6f970f2012-04-02 21:54:49 +0000124 - The host subarchitecture we are selecting insns for.
125 This is set at the start and does not change.
126
sewardjc33671d2005-02-01 20:30:00 +0000127 - The code array, that is, the insns selected so far.
128
129 - A counter, for generating new virtual registers.
130
sewardjc6f970f2012-04-02 21:54:49 +0000131 - A Bool for indicating whether we may generate chain-me
132 instructions for control flow transfers, or whether we must use
133 XAssisted.
134
135 - The maximum guest address of any guest insn in this block.
136 Actually, the address of the highest-addressed byte from any insn
137 in this block. Is set at the start and does not change. This is
138 used for detecting jumps which are definitely forward-edges from
139 this block, and therefore can be made (chained) to the fast entry
140 point of the destination, thereby avoiding the destination's
141 event check.
sewardjc33671d2005-02-01 20:30:00 +0000142
143 Note, this is all host-independent. (JRS 20050201: well, kinda
144 ... not completely. Compare with ISelEnv for X86.)
145*/
146
147typedef
148 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000149 /* Constant -- are set at the start and do not change. */
sewardjc33671d2005-02-01 20:30:00 +0000150 IRTypeEnv* type_env;
151
152 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000153 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000154 Int n_vregmap;
155
sewardj5117ce12006-01-27 21:20:15 +0000156 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000157
158 Bool chainingAllowed;
159 Addr64 max_ga;
160
161 /* These are modified as we go along. */
162 HInstrArray* code;
163 Int vreg_ctr;
sewardjc33671d2005-02-01 20:30:00 +0000164 }
165 ISelEnv;
166
167
168static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
169{
170 vassert(tmp >= 0);
171 vassert(tmp < env->n_vregmap);
172 return env->vregmap[tmp];
173}
174
sewardjc4530ae2012-05-21 10:18:49 +0000175static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
176 ISelEnv* env, IRTemp tmp )
sewardj9b967672005-02-08 11:13:09 +0000177{
178 vassert(tmp >= 0);
179 vassert(tmp < env->n_vregmap);
florian79efdc62013-02-11 00:47:35 +0000180 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
sewardj9b967672005-02-08 11:13:09 +0000181 *vrLO = env->vregmap[tmp];
182 *vrHI = env->vregmapHI[tmp];
183}
sewardj614b3fb2005-02-02 02:16:03 +0000184
185static void addInstr ( ISelEnv* env, AMD64Instr* instr )
186{
187 addHInstr(env->code, instr);
188 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjf355f6b2006-08-16 00:23:21 +0000189 ppAMD64Instr(instr, True);
sewardj614b3fb2005-02-02 02:16:03 +0000190 vex_printf("\n");
191 }
192}
193
sewardj8258a8c2005-02-02 03:11:24 +0000194static HReg newVRegI ( ISelEnv* env )
195{
196 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
197 env->vreg_ctr++;
198 return reg;
199}
200
sewardj0852a132005-02-21 08:28:46 +0000201static HReg newVRegV ( ISelEnv* env )
202{
203 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
204 env->vreg_ctr++;
205 return reg;
206}
sewardj614b3fb2005-02-02 02:16:03 +0000207
208
209/*---------------------------------------------------------*/
210/*--- ISEL: Forward declarations ---*/
211/*---------------------------------------------------------*/
212
213/* These are organised as iselXXX and iselXXX_wrk pairs. The
214 iselXXX_wrk do the real work, but are not to be called directly.
215 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
216 checks that all returned registers are virtual. You should not
217 call the _wrk version directly.
218*/
219static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
220static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
221
222static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
223static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
224
225static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
226static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
227
228static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
230
231static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
232static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
233
sewardjc4530ae2012-05-21 10:18:49 +0000234static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000235 ISelEnv* env, IRExpr* e );
sewardjc4530ae2012-05-21 10:18:49 +0000236static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000237 ISelEnv* env, IRExpr* e );
238
sewardj614b3fb2005-02-02 02:16:03 +0000239static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
240static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
241
sewardj18303862005-02-21 12:36:54 +0000242static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000244
sewardj8d965312005-02-25 02:48:47 +0000245static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
246static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000247
sewardj0852a132005-02-21 08:28:46 +0000248static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000250
sewardjc4530ae2012-05-21 10:18:49 +0000251static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
252 ISelEnv* env, IRExpr* e );
253static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
254 ISelEnv* env, IRExpr* e );
255
sewardj614b3fb2005-02-02 02:16:03 +0000256
257/*---------------------------------------------------------*/
258/*--- ISEL: Misc helpers ---*/
259/*---------------------------------------------------------*/
260
261static Bool sane_AMode ( AMD64AMode* am )
262{
263 switch (am->tag) {
264 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000265 return
266 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
267 && (hregIsVirtual(am->Aam.IR.reg)
florian79efdc62013-02-11 00:47:35 +0000268 || sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
sewardj614b3fb2005-02-02 02:16:03 +0000269 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000270 return
271 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
272 && hregIsVirtual(am->Aam.IRRS.base)
273 && hregClass(am->Aam.IRRS.index) == HRcInt64
274 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000275 default:
276 vpanic("sane_AMode: unknown amd64 amode tag");
277 }
278}
279
280
281/* Can the lower 32 bits be signedly widened to produce the whole
282 64-bit value? In other words, are the top 33 bits either all 0 or
283 all 1 ? */
284static Bool fitsIn32Bits ( ULong x )
285{
286 Long y0 = (Long)x;
287 Long y1 = y0;
288 y1 <<= 32;
289 y1 >>=/*s*/ 32;
290 return toBool(x == y1);
291}
292
sewardjeb17e492007-08-25 23:07:44 +0000293/* Is this a 64-bit zero expression? */
294
295static Bool isZeroU64 ( IRExpr* e )
296{
297 return e->tag == Iex_Const
298 && e->Iex.Const.con->tag == Ico_U64
299 && e->Iex.Const.con->Ico.U64 == 0ULL;
300}
301
302static Bool isZeroU32 ( IRExpr* e )
303{
304 return e->tag == Iex_Const
305 && e->Iex.Const.con->tag == Ico_U32
306 && e->Iex.Const.con->Ico.U32 == 0;
307}
sewardj8258a8c2005-02-02 03:11:24 +0000308
309/* Make a int reg-reg move. */
310
311static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
312{
313 vassert(hregClass(src) == HRcInt64);
314 vassert(hregClass(dst) == HRcInt64);
315 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
316}
317
sewardjc4530ae2012-05-21 10:18:49 +0000318/* Make a vector (128 bit) reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000319
sewardj0852a132005-02-21 08:28:46 +0000320static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
321{
322 vassert(hregClass(src) == HRcVec128);
323 vassert(hregClass(dst) == HRcVec128);
324 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
325}
326
327/* Advance/retreat %rsp by n. */
328
329static void add_to_rsp ( ISelEnv* env, Int n )
330{
331 vassert(n > 0 && n < 256 && (n%8) == 0);
332 addInstr(env,
333 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
334 hregAMD64_RSP()));
335}
336
sewardj18303862005-02-21 12:36:54 +0000337static void sub_from_rsp ( ISelEnv* env, Int n )
338{
339 vassert(n > 0 && n < 256 && (n%8) == 0);
340 addInstr(env,
341 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
342 hregAMD64_RSP()));
343}
344
ded403e792010-04-02 14:15:58 +0000345/* Push 64-bit constants on the stack. */
346static void push_uimm64( ISelEnv* env, ULong uimm64 )
347{
348 /* If uimm64 can be expressed as the sign extension of its
349 lower 32 bits, we can do it the easy way. */
350 Long simm64 = (Long)uimm64;
351 if ( simm64 == ((simm64 << 32) >> 32) ) {
352 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
353 } else {
354 HReg tmp = newVRegI(env);
355 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
356 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
357 }
358}
sewardj18303862005-02-21 12:36:54 +0000359
sewardj05b3b6a2005-02-04 01:44:33 +0000360
sewardj4d77a9c2007-08-25 23:21:08 +0000361/* Used only in doHelperCall. If possible, produce a single
362 instruction which computes 'e' into 'dst'. If not possible, return
363 NULL. */
364
365static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
366 HReg dst,
367 IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +0000368{
sewardj74142b82013-08-08 10:28:59 +0000369 /* Per comments in doHelperCall below, appearance of
florian90419562013-08-15 20:54:52 +0000370 Iex_VECRET implies ill-formed IR. */
371 vassert(e->tag != Iex_VECRET);
sewardj74142b82013-08-08 10:28:59 +0000372
373 /* In this case we give out a copy of the BaseBlock pointer. */
florian90419562013-08-15 20:54:52 +0000374 if (UNLIKELY(e->tag == Iex_BBPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000375 return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
376 }
377
sewardj4d77a9c2007-08-25 23:21:08 +0000378 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
379
380 if (e->tag == Iex_Const) {
381 vassert(e->Iex.Const.con->tag == Ico_U64);
382 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
383 return AMD64Instr_Alu64R(
384 Aalu_MOV,
385 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
386 dst
387 );
388 } else {
389 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
390 }
sewardj05b3b6a2005-02-04 01:44:33 +0000391 }
sewardj4d77a9c2007-08-25 23:21:08 +0000392
393 if (e->tag == Iex_RdTmp) {
394 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
395 return mk_iMOVsd_RR(src, dst);
396 }
397
398 if (e->tag == Iex_Get) {
399 vassert(e->Iex.Get.ty == Ity_I64);
400 return AMD64Instr_Alu64R(
401 Aalu_MOV,
402 AMD64RMI_Mem(
403 AMD64AMode_IR(e->Iex.Get.offset,
404 hregAMD64_RBP())),
405 dst);
406 }
407
408 if (e->tag == Iex_Unop
409 && e->Iex.Unop.op == Iop_32Uto64
410 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
411 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
sewardjca257bc2010-09-08 08:34:52 +0000412 return AMD64Instr_MovxLQ(False, src, dst);
sewardj4d77a9c2007-08-25 23:21:08 +0000413 }
414
415 if (0) { ppIRExpr(e); vex_printf("\n"); }
416
417 return NULL;
sewardj05b3b6a2005-02-04 01:44:33 +0000418}
419
420
sewardj74142b82013-08-08 10:28:59 +0000421/* Do a complete function call. |guard| is a Ity_Bit expression
sewardj05b3b6a2005-02-04 01:44:33 +0000422 indicating whether or not the call happens. If guard==NULL, the
sewardj74142b82013-08-08 10:28:59 +0000423 call is unconditional. |retloc| is set to indicate where the
424 return value is after the call. The caller (of this fn) must
425 generate code to add |stackAdjustAfterCall| to the stack pointer
426 after the call is done. */
sewardj05b3b6a2005-02-04 01:44:33 +0000427
428static
sewardj74142b82013-08-08 10:28:59 +0000429void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
430 /*OUT*/RetLoc* retloc,
431 ISelEnv* env,
432 IRExpr* guard,
433 IRCallee* cee, IRType retTy, IRExpr** args )
sewardj05b3b6a2005-02-04 01:44:33 +0000434{
435 AMD64CondCode cc;
436 HReg argregs[6];
437 HReg tmpregs[6];
sewardj4d77a9c2007-08-25 23:21:08 +0000438 AMD64Instr* fastinstrs[6];
sewardj74142b82013-08-08 10:28:59 +0000439 UInt n_args, i;
440
441 /* Set default returns. We'll update them later if needed. */
442 *stackAdjustAfterCall = 0;
443 *retloc = mk_RetLoc_INVALID();
444
445 /* These are used for cross-checking that IR-level constraints on
florian90419562013-08-15 20:54:52 +0000446 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
sewardj74142b82013-08-08 10:28:59 +0000447 UInt nVECRETs = 0;
448 UInt nBBPTRs = 0;
sewardj05b3b6a2005-02-04 01:44:33 +0000449
450 /* Marshal args for a call and do the call.
451
sewardj05b3b6a2005-02-04 01:44:33 +0000452 This function only deals with a tiny set of possibilities, which
453 cover all helpers in practice. The restrictions are that only
454 arguments in registers are supported, hence only 6x64 integer
455 bits in total can be passed. In fact the only supported arg
456 type is I64.
457
sewardj74142b82013-08-08 10:28:59 +0000458 The return type can be I{64,32,16,8} or V{128,256}. In the
459 latter two cases, it is expected that |args| will contain the
florian90419562013-08-15 20:54:52 +0000460 special node IRExpr_VECRET(), in which case this routine
sewardj74142b82013-08-08 10:28:59 +0000461 generates code to allocate space on the stack for the vector
462 return value. Since we are not passing any scalars on the
463 stack, it is enough to preallocate the return space before
464 marshalling any arguments, in this case.
465
florian90419562013-08-15 20:54:52 +0000466 |args| may also contain IRExpr_BBPTR(), in which case the
sewardj74142b82013-08-08 10:28:59 +0000467 value in %rbp is passed as the corresponding argument.
468
sewardj05b3b6a2005-02-04 01:44:33 +0000469 Generating code which is both efficient and correct when
470 parameters are to be passed in registers is difficult, for the
471 reasons elaborated in detail in comments attached to
472 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
473 of the method described in those comments.
474
475 The problem is split into two cases: the fast scheme and the
476 slow scheme. In the fast scheme, arguments are computed
477 directly into the target (real) registers. This is only safe
478 when we can be sure that computation of each argument will not
479 trash any real registers set by computation of any other
480 argument.
481
482 In the slow scheme, all args are first computed into vregs, and
483 once they are all done, they are moved to the relevant real
484 regs. This always gives correct code, but it also gives a bunch
485 of vreg-to-rreg moves which are usually redundant but are hard
486 for the register allocator to get rid of.
487
488 To decide which scheme to use, all argument expressions are
489 first examined. If they are all so simple that it is clear they
490 will be evaluated without use of any fixed registers, use the
491 fast scheme, else use the slow scheme. Note also that only
492 unconditional calls may use the fast scheme, since having to
493 compute a condition expression could itself trash real
sewardj74142b82013-08-08 10:28:59 +0000494 registers. Note that for simplicity, in the case where
florian90419562013-08-15 20:54:52 +0000495 IRExpr_VECRET() is present, we use the slow scheme. This is
sewardj74142b82013-08-08 10:28:59 +0000496 motivated by the desire to avoid any possible complexity
497 w.r.t. nested calls.
sewardj05b3b6a2005-02-04 01:44:33 +0000498
499 Note this requires being able to examine an expression and
500 determine whether or not evaluation of it might use a fixed
501 register. That requires knowledge of how the rest of this insn
502 selector works. Currently just the following 3 are regarded as
503 safe -- hopefully they cover the majority of arguments in
504 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
505 */
506
507 /* Note that the cee->regparms field is meaningless on AMD64 host
508 (since there is only one calling convention) and so we always
509 ignore it. */
sewardj05b3b6a2005-02-04 01:44:33 +0000510 n_args = 0;
511 for (i = 0; args[i]; i++)
512 n_args++;
513
sewardj74142b82013-08-08 10:28:59 +0000514 if (n_args > 6)
sewardj05b3b6a2005-02-04 01:44:33 +0000515 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
516
517 argregs[0] = hregAMD64_RDI();
518 argregs[1] = hregAMD64_RSI();
519 argregs[2] = hregAMD64_RDX();
520 argregs[3] = hregAMD64_RCX();
521 argregs[4] = hregAMD64_R8();
522 argregs[5] = hregAMD64_R9();
523
524 tmpregs[0] = tmpregs[1] = tmpregs[2] =
525 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
526
sewardj4d77a9c2007-08-25 23:21:08 +0000527 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
528 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
529
sewardj05b3b6a2005-02-04 01:44:33 +0000530 /* First decide which scheme (slow or fast) is to be used. First
531 assume the fast scheme, and select slow if any contraindications
532 (wow) appear. */
533
sewardj74142b82013-08-08 10:28:59 +0000534 /* We'll need space on the stack for the return value. Avoid
535 possible complications with nested calls by using the slow
536 scheme. */
537 if (retTy == Ity_V128 || retTy == Ity_V256)
538 goto slowscheme;
539
sewardj05b3b6a2005-02-04 01:44:33 +0000540 if (guard) {
541 if (guard->tag == Iex_Const
542 && guard->Iex.Const.con->tag == Ico_U1
543 && guard->Iex.Const.con->Ico.U1 == True) {
544 /* unconditional */
545 } else {
546 /* Not manifestly unconditional -- be conservative. */
sewardj4d77a9c2007-08-25 23:21:08 +0000547 goto slowscheme;
sewardj05b3b6a2005-02-04 01:44:33 +0000548 }
549 }
550
sewardj4d77a9c2007-08-25 23:21:08 +0000551 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
552 use the slow scheme. Because this is tentative, we can't call
553 addInstr (that is, commit to) any instructions until we're
554 handled all the arguments. So park the resulting instructions
555 in a buffer and emit that if we're successful. */
556
557 /* FAST SCHEME */
sewardj74142b82013-08-08 10:28:59 +0000558 /* In this loop, we process args that can be computed into the
559 destination (real) register with a single instruction, without
florian90419562013-08-15 20:54:52 +0000560 using any fixed regs. That also includes IRExpr_BBPTR(), but
561 not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can
562 never see IRExpr_VECRET() at this point, since the return-type
sewardj74142b82013-08-08 10:28:59 +0000563 check above should ensure all those cases use the slow scheme
564 instead. */
565 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000566 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000567 IRExpr* arg = args[i];
florian90419562013-08-15 20:54:52 +0000568 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
sewardj74142b82013-08-08 10:28:59 +0000569 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
570 }
571 fastinstrs[i]
572 = iselIntExpr_single_instruction( env, argregs[i], args[i] );
573 if (fastinstrs[i] == NULL)
sewardj4d77a9c2007-08-25 23:21:08 +0000574 goto slowscheme;
sewardj4d77a9c2007-08-25 23:21:08 +0000575 }
576
577 /* Looks like we're in luck. Emit the accumulated instructions and
578 move on to doing the call itself. */
sewardj74142b82013-08-08 10:28:59 +0000579 for (i = 0; i < n_args; i++)
sewardj4d77a9c2007-08-25 23:21:08 +0000580 addInstr(env, fastinstrs[i]);
581
582 /* Fast scheme only applies for unconditional calls. Hence: */
583 cc = Acc_ALWAYS;
584
585 goto handle_call;
586
587
588 /* SLOW SCHEME; move via temporaries */
589 slowscheme:
sewardj74142b82013-08-08 10:28:59 +0000590 {}
sewardjc4530ae2012-05-21 10:18:49 +0000591# if 0 /* debug only */
592 if (n_args > 0) {for (i = 0; args[i]; i++) {
593 ppIRExpr(args[i]); vex_printf(" "); }
594 vex_printf("\n");}
595# endif
sewardj4d77a9c2007-08-25 23:21:08 +0000596
sewardj74142b82013-08-08 10:28:59 +0000597 /* If we have a vector return type, allocate a place for it on the
598 stack and record its address. */
599 HReg r_vecRetAddr = INVALID_HREG;
600 if (retTy == Ity_V128) {
601 r_vecRetAddr = newVRegI(env);
602 sub_from_rsp(env, 16);
603 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
604 }
605 else if (retTy == Ity_V256) {
sewardj74142b82013-08-08 10:28:59 +0000606 r_vecRetAddr = newVRegI(env);
607 sub_from_rsp(env, 32);
608 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
sewardj4d77a9c2007-08-25 23:21:08 +0000609 }
610
sewardj74142b82013-08-08 10:28:59 +0000611 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000612 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000613 IRExpr* arg = args[i];
florian90419562013-08-15 20:54:52 +0000614 if (UNLIKELY(arg->tag == Iex_BBPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000615 tmpregs[i] = newVRegI(env);
616 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
617 nBBPTRs++;
618 }
florian90419562013-08-15 20:54:52 +0000619 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
sewardj74142b82013-08-08 10:28:59 +0000620 /* We stashed the address of the return slot earlier, so just
621 retrieve it now. */
622 vassert(!hregIsInvalid(r_vecRetAddr));
623 tmpregs[i] = r_vecRetAddr;
624 nVECRETs++;
625 }
626 else {
627 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
628 tmpregs[i] = iselIntExpr_R(env, args[i]);
629 }
sewardj4d77a9c2007-08-25 23:21:08 +0000630 }
631
632 /* Now we can compute the condition. We can't do it earlier
633 because the argument computations could trash the condition
634 codes. Be a bit clever to handle the common case where the
635 guard is 1:Bit. */
636 cc = Acc_ALWAYS;
637 if (guard) {
638 if (guard->tag == Iex_Const
639 && guard->Iex.Const.con->tag == Ico_U1
640 && guard->Iex.Const.con->Ico.U1 == True) {
641 /* unconditional -- do nothing */
642 } else {
643 cc = iselCondCode( env, guard );
sewardj05b3b6a2005-02-04 01:44:33 +0000644 }
645 }
646
sewardj4d77a9c2007-08-25 23:21:08 +0000647 /* Move the args to their final destinations. */
sewardj74142b82013-08-08 10:28:59 +0000648 for (i = 0; i < n_args; i++) {
sewardj4d77a9c2007-08-25 23:21:08 +0000649 /* None of these insns, including any spill code that might
650 be generated, may alter the condition codes. */
651 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
sewardj05b3b6a2005-02-04 01:44:33 +0000652 }
653
sewardj4d77a9c2007-08-25 23:21:08 +0000654
sewardj74142b82013-08-08 10:28:59 +0000655 /* Do final checks, set the return values, and generate the call
656 instruction proper. */
sewardj4d77a9c2007-08-25 23:21:08 +0000657 handle_call:
sewardj74142b82013-08-08 10:28:59 +0000658
659 if (retTy == Ity_V128 || retTy == Ity_V256) {
660 vassert(nVECRETs == 1);
661 } else {
662 vassert(nVECRETs == 0);
663 }
664
665 vassert(nBBPTRs == 0 || nBBPTRs == 1);
666
667 vassert(*stackAdjustAfterCall == 0);
668 vassert(is_RetLoc_INVALID(*retloc));
669 switch (retTy) {
670 case Ity_INVALID:
671 /* Function doesn't return a value. */
672 *retloc = mk_RetLoc_simple(RLPri_None);
673 break;
674 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
675 *retloc = mk_RetLoc_simple(RLPri_Int);
676 break;
677 case Ity_V128:
678 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
679 *stackAdjustAfterCall = 16;
680 break;
681 case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +0000682 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
683 *stackAdjustAfterCall = 32;
684 break;
685 default:
686 /* IR can denote other possible return types, but we don't
687 handle those here. */
688 vassert(0);
689 }
690
691 /* Finally, generate the call itself. This needs the *retloc value
692 set in the switch above, which is why it's at the end. */
693 addInstr(env,
694 AMD64Instr_Call(cc, Ptr_to_ULong(cee->addr), n_args, *retloc));
sewardj05b3b6a2005-02-04 01:44:33 +0000695}
696
697
sewardj8d965312005-02-25 02:48:47 +0000698/* Given a guest-state array descriptor, an index expression and a
699 bias, generate an AMD64AMode holding the relevant guest state
700 offset. */
701
702static
sewardjdd40fdf2006-12-24 02:20:24 +0000703AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
sewardj8d965312005-02-25 02:48:47 +0000704 IRExpr* off, Int bias )
705{
706 HReg tmp, roff;
707 Int elemSz = sizeofIRType(descr->elemTy);
708 Int nElems = descr->nElems;
709
710 /* Throw out any cases not generated by an amd64 front end. In
711 theory there might be a day where we need to handle them -- if
712 we ever run non-amd64-guest on amd64 host. */
713
714 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
715 vpanic("genGuestArrayOffset(amd64 host)");
716
717 /* Compute off into a reg, %off. Then return:
718
719 movq %off, %tmp
720 addq $bias, %tmp (if bias != 0)
721 andq %tmp, 7
722 ... base(%rbp, %tmp, shift) ...
723 */
724 tmp = newVRegI(env);
725 roff = iselIntExpr_R(env, off);
726 addInstr(env, mk_iMOVsd_RR(roff, tmp));
727 if (bias != 0) {
728 /* Make sure the bias is sane, in the sense that there are
729 no significant bits above bit 30 in it. */
730 vassert(-10000 < bias && bias < 10000);
731 addInstr(env,
732 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
733 }
734 addInstr(env,
735 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
736 vassert(elemSz == 1 || elemSz == 8);
737 return
738 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
739 elemSz==8 ? 3 : 0);
740}
741
sewardj1a01e652005-02-23 11:39:21 +0000742
743/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
744static
745void set_SSE_rounding_default ( ISelEnv* env )
746{
747 /* pushq $DEFAULT_MXCSR
748 ldmxcsr 0(%rsp)
749 addq $8, %rsp
750 */
751 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
752 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
753 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
754 add_to_rsp(env, 8);
755}
756
sewardj25a85812005-05-08 23:03:48 +0000757/* Mess with the FPU's rounding mode: set to the default rounding mode
758 (DEFAULT_FPUCW). */
759static
760void set_FPU_rounding_default ( ISelEnv* env )
761{
762 /* movq $DEFAULT_FPUCW, -8(%rsp)
763 fldcw -8(%esp)
764 */
765 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
766 addInstr(env, AMD64Instr_Alu64M(
767 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
768 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
769}
sewardj1a01e652005-02-23 11:39:21 +0000770
771
772/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
773 expression denoting a value in the range 0 .. 3, indicating a round
774 mode encoded as per type IRRoundingMode. Set the SSE machinery to
775 have the same rounding.
776*/
777static
778void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
779{
780 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
781 both rounding bits == 0. If that wasn't the case, we couldn't
782 create a new rounding field simply by ORing the new value into
783 place. */
784
785 /* movq $3, %reg
786 andq [[mode]], %reg -- shouldn't be needed; paranoia
787 shlq $13, %reg
788 orq $DEFAULT_MXCSR, %reg
789 pushq %reg
790 ldmxcsr 0(%esp)
791 addq $8, %rsp
792 */
793 HReg reg = newVRegI(env);
794 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
795 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
796 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
797 iselIntExpr_RMI(env, mode), reg));
sewardj501a3392005-05-11 15:37:50 +0000798 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
sewardj1a01e652005-02-23 11:39:21 +0000799 addInstr(env, AMD64Instr_Alu64R(
800 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
801 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
802 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
803 add_to_rsp(env, 8);
804}
805
806
sewardj25a85812005-05-08 23:03:48 +0000807/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
808 expression denoting a value in the range 0 .. 3, indicating a round
809 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
810 the same rounding.
811*/
812static
813void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
814{
815 HReg rrm = iselIntExpr_R(env, mode);
816 HReg rrm2 = newVRegI(env);
817 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
818
819 /* movq %rrm, %rrm2
820 andq $3, %rrm2 -- shouldn't be needed; paranoia
821 shlq $10, %rrm2
822 orq $DEFAULT_FPUCW, %rrm2
823 movq %rrm2, -8(%rsp)
824 fldcw -8(%esp)
825 */
826 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
827 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
sewardj501a3392005-05-11 15:37:50 +0000828 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
sewardj25a85812005-05-08 23:03:48 +0000829 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
830 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
831 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
832 AMD64RI_Reg(rrm2), m8_rsp));
833 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
834}
sewardj8d965312005-02-25 02:48:47 +0000835
836
sewardjac530442005-05-11 16:13:37 +0000837/* Generate all-zeroes into a new vector register.
838*/
839static HReg generate_zeroes_V128 ( ISelEnv* env )
840{
841 HReg dst = newVRegV(env);
842 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
843 return dst;
844}
845
846/* Generate all-ones into a new vector register.
847*/
848static HReg generate_ones_V128 ( ISelEnv* env )
849{
850 HReg dst = newVRegV(env);
851 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
852 return dst;
853}
854
855
sewardj09717342005-05-05 21:34:02 +0000856/* Generate !src into a new vector register. Amazing that there isn't
857 a less crappy way to do this.
sewardj8d965312005-02-25 02:48:47 +0000858*/
859static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
860{
sewardjac530442005-05-11 16:13:37 +0000861 HReg dst = generate_ones_V128(env);
sewardj8d965312005-02-25 02:48:47 +0000862 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
863 return dst;
864}
865
866
sewardjacfbd7d2010-08-17 22:52:08 +0000867/* Expand the given byte into a 64-bit word, by cloning each bit
868 8 times. */
869static ULong bitmask8_to_bytemask64 ( UShort w8 )
870{
871 vassert(w8 == (w8 & 0xFF));
872 ULong w64 = 0;
873 Int i;
874 for (i = 0; i < 8; i++) {
875 if (w8 & (1<<i))
876 w64 |= (0xFFULL << (8 * i));
877 }
878 return w64;
879}
880
881
sewardj8258a8c2005-02-02 03:11:24 +0000882/*---------------------------------------------------------*/
883/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
884/*---------------------------------------------------------*/
885
886/* Select insns for an integer-typed expression, and add them to the
887 code list. Return a reg holding the result. This reg will be a
888 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
889 want to modify it, ask for a new vreg, copy it in there, and modify
890 the copy. The register allocator will do its best to map both
891 vregs to the same real register, so the copies will often disappear
892 later in the game.
893
894 This should handle expressions of 64, 32, 16 and 8-bit type. All
895 results are returned in a 64-bit register. For 32-, 16- and 8-bit
sewardje13074c2012-11-08 10:57:08 +0000896 expressions, the upper 32/48/56 bits are arbitrary, so you should
sewardj8258a8c2005-02-02 03:11:24 +0000897 mask or sign extend partial values if necessary.
898*/
899
900static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
901{
902 HReg r = iselIntExpr_R_wrk(env, e);
903 /* sanity checks ... */
904# if 0
905 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
906# endif
907 vassert(hregClass(r) == HRcInt64);
908 vassert(hregIsVirtual(r));
909 return r;
910}
911
912/* DO NOT CALL THIS DIRECTLY ! */
913static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
914{
sewardje7905662005-05-09 18:15:21 +0000915 /* Used for unary/binary SIMD64 ops. */
916 HWord fn = 0;
sewardj8711f662005-05-09 17:52:56 +0000917 Bool second_is_UInt;
sewardje7905662005-05-09 18:15:21 +0000918
sewardj05b3b6a2005-02-04 01:44:33 +0000919 MatchInfo mi;
sewardj176ad2f2005-04-27 11:55:08 +0000920 DECLARE_PATTERN(p_1Uto8_64to1);
sewardjca257bc2010-09-08 08:34:52 +0000921 DECLARE_PATTERN(p_LDle8_then_8Uto64);
922 DECLARE_PATTERN(p_LDle16_then_16Uto64);
sewardj8258a8c2005-02-02 03:11:24 +0000923
924 IRType ty = typeOfIRExpr(env->type_env,e);
sewardj13f12a52011-05-03 07:51:49 +0000925 switch (ty) {
926 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
927 default: vassert(0);
928 }
sewardj8258a8c2005-02-02 03:11:24 +0000929
930 switch (e->tag) {
931
932 /* --------- TEMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +0000933 case Iex_RdTmp: {
934 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj8258a8c2005-02-02 03:11:24 +0000935 }
936
937 /* --------- LOAD --------- */
sewardjaf1ceca2005-06-30 23:31:27 +0000938 case Iex_Load: {
sewardj8258a8c2005-02-02 03:11:24 +0000939 HReg dst = newVRegI(env);
sewardjaf1ceca2005-06-30 23:31:27 +0000940 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
941
sewardje9d8a262009-07-01 08:06:34 +0000942 /* We can't handle big-endian loads, nor load-linked. */
sewardjaf1ceca2005-06-30 23:31:27 +0000943 if (e->Iex.Load.end != Iend_LE)
944 goto irreducible;
945
sewardjf67eadf2005-02-03 03:53:52 +0000946 if (ty == Ity_I64) {
947 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
948 AMD64RMI_Mem(amode), dst) );
949 return dst;
950 }
sewardj8258a8c2005-02-02 03:11:24 +0000951 if (ty == Ity_I32) {
952 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
953 return dst;
954 }
sewardj05b3b6a2005-02-04 01:44:33 +0000955 if (ty == Ity_I16) {
956 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
957 return dst;
958 }
sewardj7f039c42005-02-04 21:13:55 +0000959 if (ty == Ity_I8) {
960 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
961 return dst;
962 }
sewardj8258a8c2005-02-02 03:11:24 +0000963 break;
964 }
965
966 /* --------- BINARY OP --------- */
967 case Iex_Binop: {
968 AMD64AluOp aluOp;
969 AMD64ShiftOp shOp;
sewardj8711f662005-05-09 17:52:56 +0000970
sewardjeb17e492007-08-25 23:07:44 +0000971 /* Pattern: Sub64(0,x) */
972 /* and: Sub32(0,x) */
973 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
974 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
975 HReg dst = newVRegI(env);
976 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
977 addInstr(env, mk_iMOVsd_RR(reg,dst));
978 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
979 return dst;
980 }
981
sewardj8258a8c2005-02-02 03:11:24 +0000982 /* Is it an addition or logical style op? */
983 switch (e->Iex.Binop.op) {
984 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
985 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000986 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
987 aluOp = Aalu_SUB; break;
988 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
989 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000990 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000991 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000992 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
993 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000994 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000995 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000996 default:
997 aluOp = Aalu_INVALID; break;
998 }
999 /* For commutative ops we assume any literal
1000 values are on the second operand. */
1001 if (aluOp != Aalu_INVALID) {
1002 HReg dst = newVRegI(env);
1003 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1004 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1005 addInstr(env, mk_iMOVsd_RR(reg,dst));
1006 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
1007 return dst;
1008 }
1009
1010 /* Perhaps a shift op? */
1011 switch (e->Iex.Binop.op) {
1012 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
1013 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +00001014 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
1015 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +00001016 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
1017 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +00001018 default:
1019 shOp = Ash_INVALID; break;
1020 }
1021 if (shOp != Ash_INVALID) {
1022 HReg dst = newVRegI(env);
1023
1024 /* regL = the value to be shifted */
1025 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1026 addInstr(env, mk_iMOVsd_RR(regL,dst));
1027
1028 /* Do any necessary widening for 32/16/8 bit operands */
1029 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +00001030 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
1031 break;
sewardj85520e42005-02-19 15:22:38 +00001032 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +00001033 break;
sewardj85520e42005-02-19 15:22:38 +00001034 case Iop_Shr8:
1035 addInstr(env, AMD64Instr_Alu64R(
1036 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
1037 break;
1038 case Iop_Shr16:
1039 addInstr(env, AMD64Instr_Alu64R(
1040 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
1041 break;
sewardjb095fba2005-02-13 14:13:04 +00001042 case Iop_Shr32:
sewardjca257bc2010-09-08 08:34:52 +00001043 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
sewardjb095fba2005-02-13 14:13:04 +00001044 break;
sewardje83d9b22005-08-13 23:58:34 +00001045 case Iop_Sar8:
1046 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1047 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
1048 break;
1049 case Iop_Sar16:
1050 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
1051 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
1052 break;
sewardj05b3b6a2005-02-04 01:44:33 +00001053 case Iop_Sar32:
sewardjca257bc2010-09-08 08:34:52 +00001054 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
sewardj05b3b6a2005-02-04 01:44:33 +00001055 break;
1056 default:
sewardj909c06d2005-02-19 22:47:41 +00001057 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +00001058 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +00001059 }
1060
1061 /* Now consider the shift amount. If it's a literal, we
1062 can do a much better job than the general case. */
1063 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1064 /* assert that the IR is well-typed */
1065 Int nshift;
1066 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1067 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1068 vassert(nshift >= 0);
1069 if (nshift > 0)
1070 /* Can't allow nshift==0 since that means %cl */
sewardj501a3392005-05-11 15:37:50 +00001071 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001072 } else {
1073 /* General case; we have to force the amount into %cl. */
1074 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1075 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
sewardj501a3392005-05-11 15:37:50 +00001076 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001077 }
1078 return dst;
1079 }
1080
sewardj8711f662005-05-09 17:52:56 +00001081 /* Deal with 64-bit SIMD binary ops */
1082 second_is_UInt = False;
1083 switch (e->Iex.Binop.op) {
1084 case Iop_Add8x8:
1085 fn = (HWord)h_generic_calc_Add8x8; break;
1086 case Iop_Add16x4:
1087 fn = (HWord)h_generic_calc_Add16x4; break;
1088 case Iop_Add32x2:
1089 fn = (HWord)h_generic_calc_Add32x2; break;
sewardja7ba8c42005-05-10 20:08:34 +00001090
1091 case Iop_Avg8Ux8:
1092 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1093 case Iop_Avg16Ux4:
1094 fn = (HWord)h_generic_calc_Avg16Ux4; break;
sewardj8711f662005-05-09 17:52:56 +00001095
1096 case Iop_CmpEQ8x8:
1097 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1098 case Iop_CmpEQ16x4:
1099 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1100 case Iop_CmpEQ32x2:
1101 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1102
1103 case Iop_CmpGT8Sx8:
1104 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1105 case Iop_CmpGT16Sx4:
1106 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1107 case Iop_CmpGT32Sx2:
1108 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1109
1110 case Iop_InterleaveHI8x8:
1111 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1112 case Iop_InterleaveLO8x8:
1113 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1114 case Iop_InterleaveHI16x4:
1115 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1116 case Iop_InterleaveLO16x4:
1117 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1118 case Iop_InterleaveHI32x2:
1119 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1120 case Iop_InterleaveLO32x2:
1121 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
sewardjd166e282008-02-06 11:42:45 +00001122 case Iop_CatOddLanes16x4:
1123 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1124 case Iop_CatEvenLanes16x4:
1125 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1126 case Iop_Perm8x8:
1127 fn = (HWord)h_generic_calc_Perm8x8; break;
sewardj8711f662005-05-09 17:52:56 +00001128
sewardja7ba8c42005-05-10 20:08:34 +00001129 case Iop_Max8Ux8:
1130 fn = (HWord)h_generic_calc_Max8Ux8; break;
1131 case Iop_Max16Sx4:
1132 fn = (HWord)h_generic_calc_Max16Sx4; break;
1133 case Iop_Min8Ux8:
1134 fn = (HWord)h_generic_calc_Min8Ux8; break;
1135 case Iop_Min16Sx4:
1136 fn = (HWord)h_generic_calc_Min16Sx4; break;
sewardj8711f662005-05-09 17:52:56 +00001137
1138 case Iop_Mul16x4:
1139 fn = (HWord)h_generic_calc_Mul16x4; break;
sewardjd166e282008-02-06 11:42:45 +00001140 case Iop_Mul32x2:
1141 fn = (HWord)h_generic_calc_Mul32x2; break;
sewardj8711f662005-05-09 17:52:56 +00001142 case Iop_MulHi16Sx4:
1143 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
sewardja7ba8c42005-05-10 20:08:34 +00001144 case Iop_MulHi16Ux4:
1145 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1146
sewardj8711f662005-05-09 17:52:56 +00001147 case Iop_QAdd8Sx8:
1148 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1149 case Iop_QAdd16Sx4:
1150 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1151 case Iop_QAdd8Ux8:
1152 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1153 case Iop_QAdd16Ux4:
1154 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1155
sewardj5f438dd2011-06-16 11:36:23 +00001156 case Iop_QNarrowBin32Sto16Sx4:
1157 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
1158 case Iop_QNarrowBin16Sto8Sx8:
1159 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
1160 case Iop_QNarrowBin16Sto8Ux8:
1161 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
sewardjad2c9ea2011-10-22 09:32:16 +00001162 case Iop_NarrowBin16to8x8:
1163 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
1164 case Iop_NarrowBin32to16x4:
1165 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
sewardj8711f662005-05-09 17:52:56 +00001166
1167 case Iop_QSub8Sx8:
1168 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1169 case Iop_QSub16Sx4:
1170 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1171 case Iop_QSub8Ux8:
1172 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1173 case Iop_QSub16Ux4:
1174 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1175
1176 case Iop_Sub8x8:
1177 fn = (HWord)h_generic_calc_Sub8x8; break;
1178 case Iop_Sub16x4:
1179 fn = (HWord)h_generic_calc_Sub16x4; break;
1180 case Iop_Sub32x2:
1181 fn = (HWord)h_generic_calc_Sub32x2; break;
1182
1183 case Iop_ShlN32x2:
1184 fn = (HWord)h_generic_calc_ShlN32x2;
1185 second_is_UInt = True;
1186 break;
1187 case Iop_ShlN16x4:
1188 fn = (HWord)h_generic_calc_ShlN16x4;
1189 second_is_UInt = True;
1190 break;
sewardjd166e282008-02-06 11:42:45 +00001191 case Iop_ShlN8x8:
1192 fn = (HWord)h_generic_calc_ShlN8x8;
1193 second_is_UInt = True;
1194 break;
sewardj8711f662005-05-09 17:52:56 +00001195 case Iop_ShrN32x2:
1196 fn = (HWord)h_generic_calc_ShrN32x2;
1197 second_is_UInt = True;
1198 break;
1199 case Iop_ShrN16x4:
1200 fn = (HWord)h_generic_calc_ShrN16x4;
1201 second_is_UInt = True;
1202 break;
1203 case Iop_SarN32x2:
1204 fn = (HWord)h_generic_calc_SarN32x2;
1205 second_is_UInt = True;
1206 break;
1207 case Iop_SarN16x4:
1208 fn = (HWord)h_generic_calc_SarN16x4;
1209 second_is_UInt = True;
1210 break;
sewardj02f79f12007-09-01 18:59:53 +00001211 case Iop_SarN8x8:
1212 fn = (HWord)h_generic_calc_SarN8x8;
1213 second_is_UInt = True;
1214 break;
sewardj8711f662005-05-09 17:52:56 +00001215
1216 default:
1217 fn = (HWord)0; break;
1218 }
1219 if (fn != (HWord)0) {
1220 /* Note: the following assumes all helpers are of signature
1221 ULong fn ( ULong, ULong ), and they are
1222 not marked as regparm functions.
1223 */
1224 HReg dst = newVRegI(env);
1225 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1226 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1227 if (second_is_UInt)
sewardjca257bc2010-09-08 08:34:52 +00001228 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
sewardj8711f662005-05-09 17:52:56 +00001229 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1230 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
sewardj74142b82013-08-08 10:28:59 +00001231 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
1232 mk_RetLoc_simple(RLPri_Int) ));
sewardj8711f662005-05-09 17:52:56 +00001233 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1234 return dst;
1235 }
1236
sewardj7de0d3c2005-02-13 02:26:41 +00001237 /* Handle misc other ops. */
1238
sewardj478646f2008-05-01 20:13:04 +00001239 if (e->Iex.Binop.op == Iop_Max32U) {
sewardj9cc2bbf2011-06-05 17:56:03 +00001240 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1241 HReg dst = newVRegI(env);
1242 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1243 addInstr(env, mk_iMOVsd_RR(src1, dst));
1244 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
sewardj478646f2008-05-01 20:13:04 +00001245 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
1246 return dst;
1247 }
1248
sewardj7de0d3c2005-02-13 02:26:41 +00001249 if (e->Iex.Binop.op == Iop_DivModS64to32
1250 || e->Iex.Binop.op == Iop_DivModU64to32) {
1251 /* 64 x 32 -> (32(rem),32(div)) division */
1252 /* Get the 64-bit operand into edx:eax, and the other into
1253 any old R/M. */
1254 HReg rax = hregAMD64_RAX();
1255 HReg rdx = hregAMD64_RDX();
1256 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00001257 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +00001258 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +00001259 /* Compute the left operand into a reg, and then
1260 put the top half in edx and the bottom in eax. */
1261 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +00001262 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1263 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj501a3392005-05-11 15:37:50 +00001264 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001265 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardjca257bc2010-09-08 08:34:52 +00001266 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
1267 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
sewardj501a3392005-05-11 15:37:50 +00001268 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001269 addInstr(env, mk_iMOVsd_RR(rax, dst));
1270 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1271 return dst;
1272 }
1273
1274 if (e->Iex.Binop.op == Iop_32HLto64) {
1275 HReg hi32 = newVRegI(env);
1276 HReg lo32 = newVRegI(env);
1277 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1278 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1279 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1280 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
sewardj501a3392005-05-11 15:37:50 +00001281 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
sewardjca257bc2010-09-08 08:34:52 +00001282 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
sewardj7de0d3c2005-02-13 02:26:41 +00001283 addInstr(env, AMD64Instr_Alu64R(
1284 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1285 return hi32;
1286 }
1287
sewardj85520e42005-02-19 15:22:38 +00001288 if (e->Iex.Binop.op == Iop_16HLto32) {
1289 HReg hi16 = newVRegI(env);
1290 HReg lo16 = newVRegI(env);
1291 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1292 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1293 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1294 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
sewardj501a3392005-05-11 15:37:50 +00001295 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
sewardj85520e42005-02-19 15:22:38 +00001296 addInstr(env, AMD64Instr_Alu64R(
1297 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1298 addInstr(env, AMD64Instr_Alu64R(
1299 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1300 return hi16;
1301 }
sewardj7de0d3c2005-02-13 02:26:41 +00001302
sewardja64f8ad2005-04-24 00:26:37 +00001303 if (e->Iex.Binop.op == Iop_8HLto16) {
1304 HReg hi8 = newVRegI(env);
1305 HReg lo8 = newVRegI(env);
1306 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1307 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1308 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1309 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
sewardj501a3392005-05-11 15:37:50 +00001310 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
sewardja64f8ad2005-04-24 00:26:37 +00001311 addInstr(env, AMD64Instr_Alu64R(
1312 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1313 addInstr(env, AMD64Instr_Alu64R(
1314 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1315 return hi8;
1316 }
sewardj85520e42005-02-19 15:22:38 +00001317
1318 if (e->Iex.Binop.op == Iop_MullS32
1319 || e->Iex.Binop.op == Iop_MullS16
1320 || e->Iex.Binop.op == Iop_MullS8
1321 || e->Iex.Binop.op == Iop_MullU32
1322 || e->Iex.Binop.op == Iop_MullU16
1323 || e->Iex.Binop.op == Iop_MullU8) {
1324 HReg a32 = newVRegI(env);
1325 HReg b32 = newVRegI(env);
1326 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1327 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1328 Int shift = 0;
1329 AMD64ShiftOp shr_op = Ash_SHR;
1330 switch (e->Iex.Binop.op) {
1331 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1332 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1333 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1334 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1335 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1336 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1337 default: vassert(0);
1338 }
1339
1340 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1341 addInstr(env, mk_iMOVsd_RR(b32s, b32));
sewardj501a3392005-05-11 15:37:50 +00001342 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1343 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1344 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1345 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
sewardj85520e42005-02-19 15:22:38 +00001346 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1347 return b32;
1348 }
1349
sewardj18303862005-02-21 12:36:54 +00001350 if (e->Iex.Binop.op == Iop_CmpF64) {
1351 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1352 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1353 HReg dst = newVRegI(env);
1354 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1355 /* Mask out irrelevant parts of the result so as to conform
1356 to the CmpF64 definition. */
1357 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1358 return dst;
1359 }
1360
sewardj6c299f32009-12-31 18:00:12 +00001361 if (e->Iex.Binop.op == Iop_F64toI32S
1362 || e->Iex.Binop.op == Iop_F64toI64S) {
1363 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001364 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1365 HReg dst = newVRegI(env);
1366 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001367 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001368 set_SSE_rounding_default(env);
1369 return dst;
1370 }
1371
sewardj8258a8c2005-02-02 03:11:24 +00001372 break;
1373 }
1374
sewardjf67eadf2005-02-03 03:53:52 +00001375 /* --------- UNARY OP --------- */
1376 case Iex_Unop: {
sewardj7f039c42005-02-04 21:13:55 +00001377
sewardj176ad2f2005-04-27 11:55:08 +00001378 /* 1Uto8(64to1(expr64)) */
sewardjca257bc2010-09-08 08:34:52 +00001379 {
1380 DEFINE_PATTERN( p_1Uto8_64to1,
1381 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1382 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
1383 IRExpr* expr64 = mi.bindee[0];
1384 HReg dst = newVRegI(env);
1385 HReg src = iselIntExpr_R(env, expr64);
1386 addInstr(env, mk_iMOVsd_RR(src,dst) );
1387 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1388 AMD64RMI_Imm(1), dst));
sewardjf67eadf2005-02-03 03:53:52 +00001389 return dst;
1390 }
sewardjca257bc2010-09-08 08:34:52 +00001391 }
1392
1393 /* 8Uto64(LDle(expr64)) */
1394 {
1395 DEFINE_PATTERN(p_LDle8_then_8Uto64,
1396 unop(Iop_8Uto64,
1397 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1398 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
1399 HReg dst = newVRegI(env);
1400 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1401 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
1402 return dst;
1403 }
1404 }
1405
1406 /* 16Uto64(LDle(expr64)) */
1407 {
1408 DEFINE_PATTERN(p_LDle16_then_16Uto64,
1409 unop(Iop_16Uto64,
1410 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1411 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
1412 HReg dst = newVRegI(env);
1413 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1414 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
1415 return dst;
1416 }
1417 }
1418
sewardj9cc2bbf2011-06-05 17:56:03 +00001419 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
1420 Use 32 bit arithmetic and let the default zero-extend rule
1421 do the 32Uto64 for free. */
1422 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
1423 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
1424 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
1425 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
1426 AMD64AluOp aluOp = Aalu_INVALID;
1427 switch (opi) {
1428 case Iop_Add32: aluOp = Aalu_ADD; break;
1429 case Iop_Sub32: aluOp = Aalu_SUB; break;
1430 case Iop_And32: aluOp = Aalu_AND; break;
1431 case Iop_Or32: aluOp = Aalu_OR; break;
1432 case Iop_Xor32: aluOp = Aalu_XOR; break;
1433 default: break;
1434 }
1435 if (aluOp != Aalu_INVALID) {
1436 /* For commutative ops we assume any literal values are on
1437 the second operand. */
1438 HReg dst = newVRegI(env);
1439 HReg reg = iselIntExpr_R(env, argL);
1440 AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
1441 addInstr(env, mk_iMOVsd_RR(reg,dst));
1442 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
1443 return dst;
1444 }
1445 /* just fall through to normal handling for Iop_32Uto64 */
1446 }
1447
1448 /* Fallback cases */
sewardjca257bc2010-09-08 08:34:52 +00001449 switch (e->Iex.Unop.op) {
1450 case Iop_32Uto64:
sewardj05b3b6a2005-02-04 01:44:33 +00001451 case Iop_32Sto64: {
1452 HReg dst = newVRegI(env);
1453 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjca257bc2010-09-08 08:34:52 +00001454 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
1455 src, dst) );
sewardj05b3b6a2005-02-04 01:44:33 +00001456 return dst;
1457 }
sewardj9b967672005-02-08 11:13:09 +00001458 case Iop_128HIto64: {
1459 HReg rHi, rLo;
1460 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1461 return rHi; /* and abandon rLo */
1462 }
1463 case Iop_128to64: {
1464 HReg rHi, rLo;
1465 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1466 return rLo; /* and abandon rHi */
1467 }
sewardj85520e42005-02-19 15:22:38 +00001468 case Iop_8Uto16:
sewardjec93f982005-06-21 13:51:18 +00001469 case Iop_8Uto32:
sewardj176ad2f2005-04-27 11:55:08 +00001470 case Iop_8Uto64:
1471 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001472 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001473 HReg dst = newVRegI(env);
1474 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001475 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1476 || e->Iex.Unop.op==Iop_16Uto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001477 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001478 addInstr(env, mk_iMOVsd_RR(src,dst) );
1479 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1480 AMD64RMI_Imm(mask), dst));
1481 return dst;
1482 }
sewardj85520e42005-02-19 15:22:38 +00001483 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001484 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001485 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001486 case Iop_16Sto32:
1487 case Iop_16Sto64: {
1488 HReg dst = newVRegI(env);
1489 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001490 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1491 || e->Iex.Unop.op==Iop_16Sto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001492 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001493 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001494 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1495 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj486074e2005-02-08 20:10:04 +00001496 return dst;
1497 }
sewardj85520e42005-02-19 15:22:38 +00001498 case Iop_Not8:
1499 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001500 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001501 case Iop_Not64: {
1502 HReg dst = newVRegI(env);
1503 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1504 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001505 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
sewardjd0a12df2005-02-10 02:07:43 +00001506 return dst;
1507 }
de5a70f5c2010-04-01 23:08:59 +00001508 case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001509 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001510 case Iop_64HIto32: {
1511 HReg dst = newVRegI(env);
1512 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1513 Int shift = 0;
1514 switch (e->Iex.Unop.op) {
sewardj9ba870d2010-04-02 11:29:23 +00001515 case Iop_16HIto8: shift = 8; break;
sewardj85520e42005-02-19 15:22:38 +00001516 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001517 case Iop_64HIto32: shift = 32; break;
1518 default: vassert(0);
1519 }
1520 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001521 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
sewardj7de0d3c2005-02-13 02:26:41 +00001522 return dst;
1523 }
sewardj176ad2f2005-04-27 11:55:08 +00001524 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001525 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001526 case Iop_1Uto8: {
1527 HReg dst = newVRegI(env);
1528 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1529 addInstr(env, AMD64Instr_Set64(cond,dst));
1530 return dst;
1531 }
sewardja64f8ad2005-04-24 00:26:37 +00001532 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001533 case Iop_1Sto16:
1534 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001535 case Iop_1Sto64: {
1536 /* could do better than this, but for now ... */
1537 HReg dst = newVRegI(env);
1538 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1539 addInstr(env, AMD64Instr_Set64(cond,dst));
sewardj501a3392005-05-11 15:37:50 +00001540 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1541 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
sewardj42322b52005-04-20 22:57:11 +00001542 return dst;
1543 }
sewardjf53b7352005-04-06 20:01:56 +00001544 case Iop_Ctz64: {
1545 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1546 HReg dst = newVRegI(env);
1547 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1548 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1549 return dst;
1550 }
sewardj537cab02005-04-07 02:03:52 +00001551 case Iop_Clz64: {
1552 /* Count leading zeroes. Do 'bsrq' to establish the index
1553 of the highest set bit, and subtract that value from
1554 63. */
1555 HReg tmp = newVRegI(env);
1556 HReg dst = newVRegI(env);
1557 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1558 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1559 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1560 AMD64RMI_Imm(63), dst));
1561 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1562 AMD64RMI_Reg(tmp), dst));
1563 return dst;
1564 }
sewardjeb17e492007-08-25 23:07:44 +00001565
1566 case Iop_CmpwNEZ64: {
sewardj176ad2f2005-04-27 11:55:08 +00001567 HReg dst = newVRegI(env);
sewardjeb17e492007-08-25 23:07:44 +00001568 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1569 addInstr(env, mk_iMOVsd_RR(src,dst));
sewardj501a3392005-05-11 15:37:50 +00001570 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
sewardjeb17e492007-08-25 23:07:44 +00001571 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1572 AMD64RMI_Reg(src), dst));
1573 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1574 return dst;
1575 }
1576
1577 case Iop_CmpwNEZ32: {
1578 HReg src = newVRegI(env);
1579 HReg dst = newVRegI(env);
1580 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1581 addInstr(env, mk_iMOVsd_RR(pre,src));
sewardjca257bc2010-09-08 08:34:52 +00001582 addInstr(env, AMD64Instr_MovxLQ(False, src, src));
sewardjeb17e492007-08-25 23:07:44 +00001583 addInstr(env, mk_iMOVsd_RR(src,dst));
1584 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1585 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1586 AMD64RMI_Reg(src), dst));
1587 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1588 return dst;
1589 }
1590
1591 case Iop_Left8:
1592 case Iop_Left16:
1593 case Iop_Left32:
1594 case Iop_Left64: {
1595 HReg dst = newVRegI(env);
1596 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1597 addInstr(env, mk_iMOVsd_RR(src, dst));
1598 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1599 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
sewardj176ad2f2005-04-27 11:55:08 +00001600 return dst;
1601 }
sewardj537cab02005-04-07 02:03:52 +00001602
sewardj478fe702005-04-23 01:15:47 +00001603 case Iop_V128to32: {
1604 HReg dst = newVRegI(env);
1605 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1606 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1607 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1608 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1609 return dst;
1610 }
sewardj1a01e652005-02-23 11:39:21 +00001611
1612 /* V128{HI}to64 */
1613 case Iop_V128HIto64:
1614 case Iop_V128to64: {
sewardj1a01e652005-02-23 11:39:21 +00001615 HReg dst = newVRegI(env);
sewardjc4530ae2012-05-21 10:18:49 +00001616 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16;
1617 HReg rsp = hregAMD64_RSP();
sewardj1a01e652005-02-23 11:39:21 +00001618 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
sewardjc4530ae2012-05-21 10:18:49 +00001619 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1620 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1621 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1622 16, vec, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00001623 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
sewardjc4530ae2012-05-21 10:18:49 +00001624 AMD64RMI_Mem(off_rsp), dst ));
1625 return dst;
1626 }
1627
1628 case Iop_V256to64_0: case Iop_V256to64_1:
1629 case Iop_V256to64_2: case Iop_V256to64_3: {
1630 HReg vHi, vLo, vec;
1631 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
1632 /* Do the first part of the selection by deciding which of
1633 the 128 bit registers do look at, and second part using
1634 the same scheme as for V128{HI}to64 above. */
1635 Int off = 0;
1636 switch (e->Iex.Unop.op) {
1637 case Iop_V256to64_0: vec = vLo; off = -16; break;
1638 case Iop_V256to64_1: vec = vLo; off = -8; break;
1639 case Iop_V256to64_2: vec = vHi; off = -16; break;
1640 case Iop_V256to64_3: vec = vHi; off = -8; break;
1641 default: vassert(0);
1642 }
1643 HReg dst = newVRegI(env);
1644 HReg rsp = hregAMD64_RSP();
1645 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1646 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1647 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1648 16, vec, m16_rsp));
1649 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1650 AMD64RMI_Mem(off_rsp), dst ));
sewardj1a01e652005-02-23 11:39:21 +00001651 return dst;
1652 }
1653
sewardj924215b2005-03-26 21:50:31 +00001654 /* ReinterpF64asI64(e) */
1655 /* Given an IEEE754 double, produce an I64 with the same bit
1656 pattern. */
1657 case Iop_ReinterpF64asI64: {
1658 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1659 HReg dst = newVRegI(env);
1660 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1661 /* paranoia */
1662 set_SSE_rounding_default(env);
1663 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1664 addInstr(env, AMD64Instr_Alu64R(
1665 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1666 return dst;
1667 }
1668
sewardj79501112008-07-29 09:48:26 +00001669 /* ReinterpF32asI32(e) */
1670 /* Given an IEEE754 single, produce an I64 with the same bit
1671 pattern in the lower half. */
1672 case Iop_ReinterpF32asI32: {
1673 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1674 HReg dst = newVRegI(env);
1675 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1676 /* paranoia */
1677 set_SSE_rounding_default(env);
1678 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1679 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1680 return dst;
1681 }
1682
sewardj85520e42005-02-19 15:22:38 +00001683 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001684 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001685 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001686 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001687 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001688 case Iop_64to32:
1689 /* These are no-ops. */
1690 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001691
sewardje13074c2012-11-08 10:57:08 +00001692 case Iop_GetMSBs8x8: {
1693 /* Note: the following assumes the helper is of
1694 signature
1695 UInt fn ( ULong ), and is not a regparm fn.
1696 */
1697 HReg dst = newVRegI(env);
1698 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1699 fn = (HWord)h_generic_calc_GetMSBs8x8;
1700 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardjcfe046e2013-01-17 14:23:53 +00001701 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001702 1, mk_RetLoc_simple(RLPri_Int) ));
sewardje13074c2012-11-08 10:57:08 +00001703 /* MovxLQ is not exactly the right thing here. We just
1704 need to get the bottom 8 bits of RAX into dst, and zero
1705 out everything else. Assuming that the helper returns
1706 a UInt with the top 24 bits zeroed out, it'll do,
1707 though. */
1708 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1709 return dst;
1710 }
1711
sewardj78a20592012-12-13 18:29:56 +00001712 case Iop_GetMSBs8x16: {
1713 /* Note: the following assumes the helper is of signature
1714 UInt fn ( ULong w64hi, ULong w64Lo ),
1715 and is not a regparm fn. */
1716 HReg dst = newVRegI(env);
1717 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1718 HReg rsp = hregAMD64_RSP();
1719 fn = (HWord)h_generic_calc_GetMSBs8x16;
1720 AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp);
1721 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1722 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1723 16, vec, m16_rsp));
1724 /* hi 64 bits into RDI -- the first arg */
1725 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1726 AMD64RMI_Mem(m8_rsp),
1727 hregAMD64_RDI() )); /* 1st arg */
1728 /* lo 64 bits into RSI -- the 2nd arg */
1729 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1730 AMD64RMI_Mem(m16_rsp),
1731 hregAMD64_RSI() )); /* 2nd arg */
sewardjcfe046e2013-01-17 14:23:53 +00001732 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001733 2, mk_RetLoc_simple(RLPri_Int) ));
sewardj78a20592012-12-13 18:29:56 +00001734 /* MovxLQ is not exactly the right thing here. We just
sewardj9213c612012-12-19 08:39:11 +00001735 need to get the bottom 16 bits of RAX into dst, and zero
sewardj78a20592012-12-13 18:29:56 +00001736 out everything else. Assuming that the helper returns
sewardj9213c612012-12-19 08:39:11 +00001737 a UInt with the top 16 bits zeroed out, it'll do,
sewardj78a20592012-12-13 18:29:56 +00001738 though. */
1739 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1740 return dst;
1741 }
1742
sewardjf67eadf2005-02-03 03:53:52 +00001743 default:
1744 break;
1745 }
sewardje7905662005-05-09 18:15:21 +00001746
1747 /* Deal with unary 64-bit SIMD ops. */
1748 switch (e->Iex.Unop.op) {
1749 case Iop_CmpNEZ32x2:
1750 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1751 case Iop_CmpNEZ16x4:
1752 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1753 case Iop_CmpNEZ8x8:
1754 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1755 default:
1756 fn = (HWord)0; break;
1757 }
1758 if (fn != (HWord)0) {
1759 /* Note: the following assumes all helpers are of
1760 signature
1761 ULong fn ( ULong ), and they are
1762 not marked as regparm functions.
1763 */
1764 HReg dst = newVRegI(env);
1765 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1766 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardj74142b82013-08-08 10:28:59 +00001767 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
1768 mk_RetLoc_simple(RLPri_Int) ));
sewardje7905662005-05-09 18:15:21 +00001769 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1770 return dst;
1771 }
1772
sewardjf67eadf2005-02-03 03:53:52 +00001773 break;
1774 }
sewardj8258a8c2005-02-02 03:11:24 +00001775
1776 /* --------- GET --------- */
1777 case Iex_Get: {
1778 if (ty == Ity_I64) {
1779 HReg dst = newVRegI(env);
1780 addInstr(env, AMD64Instr_Alu64R(
1781 Aalu_MOV,
1782 AMD64RMI_Mem(
1783 AMD64AMode_IR(e->Iex.Get.offset,
1784 hregAMD64_RBP())),
1785 dst));
1786 return dst;
1787 }
1788 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1789 HReg dst = newVRegI(env);
1790 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001791 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001792 False,
1793 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1794 dst));
1795 return dst;
1796 }
1797 break;
1798 }
1799
sewardj8d965312005-02-25 02:48:47 +00001800 case Iex_GetI: {
1801 AMD64AMode* am
1802 = genGuestArrayOffset(
1803 env, e->Iex.GetI.descr,
1804 e->Iex.GetI.ix, e->Iex.GetI.bias );
1805 HReg dst = newVRegI(env);
1806 if (ty == Ity_I8) {
1807 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1808 return dst;
1809 }
sewardj1e015d82005-04-23 23:41:46 +00001810 if (ty == Ity_I64) {
1811 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1812 return dst;
1813 }
sewardj8d965312005-02-25 02:48:47 +00001814 break;
1815 }
sewardj05b3b6a2005-02-04 01:44:33 +00001816
1817 /* --------- CCALL --------- */
1818 case Iex_CCall: {
1819 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001820 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001821
sewardjcfe046e2013-01-17 14:23:53 +00001822 /* be very restrictive for now. Only 64-bit ints allowed for
sewardj74142b82013-08-08 10:28:59 +00001823 args, and 64 or 32 bits for return type. */
sewardje8aaa872005-07-07 13:12:04 +00001824 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
sewardj05b3b6a2005-02-04 01:44:33 +00001825 goto irreducible;
1826
sewardj7f039c42005-02-04 21:13:55 +00001827 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00001828 UInt addToSp = 0;
1829 RetLoc rloc = mk_RetLoc_INVALID();
1830 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1831 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1832 vassert(is_sane_RetLoc(rloc));
1833 vassert(rloc.pri == RLPri_Int);
1834 vassert(addToSp == 0);
sewardj05b3b6a2005-02-04 01:44:33 +00001835
sewardje8aaa872005-07-07 13:12:04 +00001836 /* Move to dst, and zero out the top 32 bits if the result type is
1837 Ity_I32. Probably overkill, but still .. */
1838 if (e->Iex.CCall.retty == Ity_I64)
1839 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1840 else
sewardjca257bc2010-09-08 08:34:52 +00001841 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
sewardje8aaa872005-07-07 13:12:04 +00001842
sewardj05b3b6a2005-02-04 01:44:33 +00001843 return dst;
1844 }
1845
sewardj7f039c42005-02-04 21:13:55 +00001846 /* --------- LITERAL --------- */
1847 /* 64/32/16/8-bit literals */
1848 case Iex_Const:
1849 if (ty == Ity_I64) {
1850 HReg r = newVRegI(env);
1851 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1852 return r;
1853 } else {
1854 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1855 HReg r = newVRegI(env);
1856 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1857 return r;
1858 }
sewardj05b3b6a2005-02-04 01:44:33 +00001859
1860 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00001861 case Iex_ITE: { // VFD
sewardj009230b2013-01-26 11:47:55 +00001862 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
florian99dd03e2013-01-29 03:56:06 +00001863 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1864 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1865 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
sewardj009230b2013-01-26 11:47:55 +00001866 HReg dst = newVRegI(env);
florian99dd03e2013-01-29 03:56:06 +00001867 addInstr(env, mk_iMOVsd_RR(r1,dst));
1868 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00001869 addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst));
1870 return dst;
sewardj05b3b6a2005-02-04 01:44:33 +00001871 }
1872 break;
1873 }
sewardj8258a8c2005-02-02 03:11:24 +00001874
sewardjf4c803b2006-09-11 11:07:34 +00001875 /* --------- TERNARY OP --------- */
1876 case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001877 IRTriop *triop = e->Iex.Triop.details;
sewardjf4c803b2006-09-11 11:07:34 +00001878 /* C3210 flags following FPU partial remainder (fprem), both
1879 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001880 if (triop->op == Iop_PRemC3210F64
1881 || triop->op == Iop_PRem1C3210F64) {
sewardjf4c803b2006-09-11 11:07:34 +00001882 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00001883 HReg arg1 = iselDblExpr(env, triop->arg2);
1884 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardjf4c803b2006-09-11 11:07:34 +00001885 HReg dst = newVRegI(env);
1886 addInstr(env, AMD64Instr_A87Free(2));
1887
1888 /* one arg -> top of x87 stack */
1889 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001890 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001891
1892 /* other arg -> top of x87 stack */
1893 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001894 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001895
florian420bfa92012-06-02 20:29:22 +00001896 switch (triop->op) {
sewardjf4c803b2006-09-11 11:07:34 +00001897 case Iop_PRemC3210F64:
1898 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1899 break;
sewardj4970e4e2008-10-11 10:07:55 +00001900 case Iop_PRem1C3210F64:
1901 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1902 break;
sewardjf4c803b2006-09-11 11:07:34 +00001903 default:
1904 vassert(0);
1905 }
1906 /* Ignore the result, and instead make off with the FPU's
1907 C3210 flags (in the status word). */
1908 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1909 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1910 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1911 return dst;
1912 }
1913 break;
1914 }
1915
sewardj8258a8c2005-02-02 03:11:24 +00001916 default:
1917 break;
1918 } /* switch (e->tag) */
1919
1920 /* We get here if no pattern matched. */
1921 irreducible:
1922 ppIRExpr(e);
1923 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1924}
sewardj614b3fb2005-02-02 02:16:03 +00001925
1926
1927/*---------------------------------------------------------*/
1928/*--- ISEL: Integer expression auxiliaries ---*/
1929/*---------------------------------------------------------*/
1930
1931/* --------------------- AMODEs --------------------- */
1932
1933/* Return an AMode which computes the value of the specified
1934 expression, possibly also adding insns to the code list as a
1935 result. The expression may only be a 32-bit one.
1936*/
1937
sewardj8258a8c2005-02-02 03:11:24 +00001938static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1939{
1940 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1941 vassert(sane_AMode(am));
1942 return am;
1943}
1944
1945/* DO NOT CALL THIS DIRECTLY ! */
1946static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1947{
sewardj05b3b6a2005-02-04 01:44:33 +00001948 MatchInfo mi;
1949 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001950 IRType ty = typeOfIRExpr(env->type_env,e);
1951 vassert(ty == Ity_I64);
1952
sewardj05b3b6a2005-02-04 01:44:33 +00001953 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1954 /* bind0 bind1 bind2 bind3 */
1955 DEFINE_PATTERN(p_complex,
1956 binop( Iop_Add64,
1957 binop( Iop_Add64,
1958 bind(0),
1959 binop(Iop_Shl64, bind(1), bind(2))
1960 ),
1961 bind(3)
1962 )
1963 );
1964 if (matchIRExpr(&mi, p_complex, e)) {
1965 IRExpr* expr1 = mi.bindee[0];
1966 IRExpr* expr2 = mi.bindee[1];
1967 IRExpr* imm8 = mi.bindee[2];
1968 IRExpr* simm32 = mi.bindee[3];
1969 if (imm8->tag == Iex_Const
1970 && imm8->Iex.Const.con->tag == Ico_U8
1971 && imm8->Iex.Const.con->Ico.U8 < 4
1972 /* imm8 is OK, now check simm32 */
1973 && simm32->tag == Iex_Const
1974 && simm32->Iex.Const.con->tag == Ico_U64
1975 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1976 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001977 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001978 HReg r1 = iselIntExpr_R(env, expr1);
1979 HReg r2 = iselIntExpr_R(env, expr2);
1980 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1981 return AMD64AMode_IRRS(offset, r1, r2, shift);
1982 }
1983 }
1984
sewardj8258a8c2005-02-02 03:11:24 +00001985 /* Add64(expr1, Shl64(expr2, imm)) */
1986 if (e->tag == Iex_Binop
1987 && e->Iex.Binop.op == Iop_Add64
1988 && e->Iex.Binop.arg2->tag == Iex_Binop
1989 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1990 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1991 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1992 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1993 if (shift == 1 || shift == 2 || shift == 3) {
1994 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1995 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1996 return AMD64AMode_IRRS(0, r1, r2, shift);
1997 }
1998 }
1999
2000 /* Add64(expr,i) */
2001 if (e->tag == Iex_Binop
2002 && e->Iex.Binop.op == Iop_Add64
2003 && e->Iex.Binop.arg2->tag == Iex_Const
2004 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2005 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
2006 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2007 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00002008 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00002009 r1
2010 );
2011 }
2012
2013 /* Doesn't match anything in particular. Generate it into
2014 a register and use that. */
2015 {
2016 HReg r1 = iselIntExpr_R(env, e);
2017 return AMD64AMode_IR(0, r1);
2018 }
2019}
sewardj614b3fb2005-02-02 02:16:03 +00002020
2021
2022/* --------------------- RMIs --------------------- */
2023
2024/* Similarly, calculate an expression into an X86RMI operand. As with
2025 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
2026
2027static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
2028{
2029 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
2030 /* sanity checks ... */
2031 switch (rmi->tag) {
2032 case Armi_Imm:
2033 return rmi;
2034 case Armi_Reg:
2035 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
2036 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
2037 return rmi;
2038 case Armi_Mem:
2039 vassert(sane_AMode(rmi->Armi.Mem.am));
2040 return rmi;
2041 default:
2042 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
2043 }
2044}
2045
2046/* DO NOT CALL THIS DIRECTLY ! */
2047static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
2048{
2049 IRType ty = typeOfIRExpr(env->type_env,e);
2050 vassert(ty == Ity_I64 || ty == Ity_I32
2051 || ty == Ity_I16 || ty == Ity_I8);
2052
2053 /* special case: immediate 64/32/16/8 */
2054 if (e->tag == Iex_Const) {
2055 switch (e->Iex.Const.con->tag) {
2056 case Ico_U64:
2057 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002058 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00002059 }
2060 break;
2061 case Ico_U32:
2062 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
2063 case Ico_U16:
2064 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
2065 case Ico_U8:
2066 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
2067 default:
2068 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2069 }
2070 }
2071
2072 /* special case: 64-bit GET */
2073 if (e->tag == Iex_Get && ty == Ity_I64) {
2074 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2075 hregAMD64_RBP()));
2076 }
2077
sewardj0852a132005-02-21 08:28:46 +00002078 /* special case: 64-bit load from memory */
sewardje9d8a262009-07-01 08:06:34 +00002079 if (e->tag == Iex_Load && ty == Ity_I64
sewardje768e922009-11-26 17:17:37 +00002080 && e->Iex.Load.end == Iend_LE) {
sewardjaf1ceca2005-06-30 23:31:27 +00002081 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj0852a132005-02-21 08:28:46 +00002082 return AMD64RMI_Mem(am);
2083 }
sewardj614b3fb2005-02-02 02:16:03 +00002084
2085 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00002086 {
2087 HReg r = iselIntExpr_R ( env, e );
2088 return AMD64RMI_Reg(r);
2089 }
sewardj614b3fb2005-02-02 02:16:03 +00002090}
2091
2092
sewardjf67eadf2005-02-03 03:53:52 +00002093/* --------------------- RIs --------------------- */
2094
2095/* Calculate an expression into an AMD64RI operand. As with
2096 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2097 bits. */
2098
2099static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
2100{
2101 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
2102 /* sanity checks ... */
2103 switch (ri->tag) {
2104 case Ari_Imm:
2105 return ri;
sewardj80d6e6d2008-05-28 09:40:29 +00002106 case Ari_Reg:
sewardjf67eadf2005-02-03 03:53:52 +00002107 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
2108 vassert(hregIsVirtual(ri->Ari.Reg.reg));
2109 return ri;
2110 default:
2111 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2112 }
2113}
2114
2115/* DO NOT CALL THIS DIRECTLY ! */
2116static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2117{
2118 IRType ty = typeOfIRExpr(env->type_env,e);
2119 vassert(ty == Ity_I64 || ty == Ity_I32
2120 || ty == Ity_I16 || ty == Ity_I8);
2121
2122 /* special case: immediate */
2123 if (e->tag == Iex_Const) {
2124 switch (e->Iex.Const.con->tag) {
2125 case Ico_U64:
2126 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002127 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00002128 }
2129 break;
2130 case Ico_U32:
2131 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2132 case Ico_U16:
2133 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2134 case Ico_U8:
2135 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2136 default:
2137 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2138 }
2139 }
2140
2141 /* default case: calculate into a register and return that */
2142 {
2143 HReg r = iselIntExpr_R ( env, e );
2144 return AMD64RI_Reg(r);
2145 }
2146}
2147
2148
sewardj05b3b6a2005-02-04 01:44:33 +00002149/* --------------------- RMs --------------------- */
2150
2151/* Similarly, calculate an expression into an AMD64RM operand. As
2152 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2153 bits. */
2154
2155static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2156{
2157 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2158 /* sanity checks ... */
2159 switch (rm->tag) {
2160 case Arm_Reg:
2161 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2162 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2163 return rm;
2164 case Arm_Mem:
2165 vassert(sane_AMode(rm->Arm.Mem.am));
2166 return rm;
2167 default:
2168 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2169 }
2170}
2171
2172/* DO NOT CALL THIS DIRECTLY ! */
2173static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2174{
2175 IRType ty = typeOfIRExpr(env->type_env,e);
2176 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2177
2178 /* special case: 64-bit GET */
2179 if (e->tag == Iex_Get && ty == Ity_I64) {
2180 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2181 hregAMD64_RBP()));
2182 }
2183
2184 /* special case: load from memory */
2185
2186 /* default case: calculate into a register and return that */
2187 {
2188 HReg r = iselIntExpr_R ( env, e );
2189 return AMD64RM_Reg(r);
2190 }
2191}
2192
2193
2194/* --------------------- CONDCODE --------------------- */
2195
2196/* Generate code to evaluated a bit-typed expression, returning the
2197 condition code which would correspond when the expression would
2198 notionally have returned 1. */
2199
2200static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2201{
2202 /* Uh, there's nothing we can sanity check here, unfortunately. */
2203 return iselCondCode_wrk(env,e);
2204}
2205
2206/* DO NOT CALL THIS DIRECTLY ! */
2207static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2208{
sewardjf8c37f72005-02-07 18:55:29 +00002209 MatchInfo mi;
sewardj0af46ab2005-04-26 01:52:29 +00002210
sewardj05b3b6a2005-02-04 01:44:33 +00002211 vassert(e);
2212 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2213
sewardj176ad2f2005-04-27 11:55:08 +00002214 /* var */
sewardjdd40fdf2006-12-24 02:20:24 +00002215 if (e->tag == Iex_RdTmp) {
2216 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj176ad2f2005-04-27 11:55:08 +00002217 HReg dst = newVRegI(env);
2218 addInstr(env, mk_iMOVsd_RR(r64,dst));
2219 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2220 return Acc_NZ;
2221 }
2222
sewardj109e9352005-07-19 08:42:56 +00002223 /* Constant 1:Bit */
2224 if (e->tag == Iex_Const) {
2225 HReg r;
2226 vassert(e->Iex.Const.con->tag == Ico_U1);
2227 vassert(e->Iex.Const.con->Ico.U1 == True
2228 || e->Iex.Const.con->Ico.U1 == False);
2229 r = newVRegI(env);
2230 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2231 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2232 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2233 }
sewardj486074e2005-02-08 20:10:04 +00002234
2235 /* Not1(...) */
2236 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2237 /* Generate code for the arg, and negate the test condition */
2238 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2239 }
2240
sewardj176ad2f2005-04-27 11:55:08 +00002241 /* --- patterns rooted at: 64to1 --- */
2242
sewardj176ad2f2005-04-27 11:55:08 +00002243 /* 64to1 */
2244 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
sewardj501a3392005-05-11 15:37:50 +00002245 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2246 addInstr(env, AMD64Instr_Test64(1,reg));
sewardjf8c37f72005-02-07 18:55:29 +00002247 return Acc_NZ;
2248 }
2249
florianc862f282012-07-19 17:22:33 +00002250 /* --- patterns rooted at: 32to1 --- */
2251
2252 /* 32to1 */
2253 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
2254 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2255 addInstr(env, AMD64Instr_Test64(1,reg));
2256 return Acc_NZ;
2257 }
2258
sewardj176ad2f2005-04-27 11:55:08 +00002259 /* --- patterns rooted at: CmpNEZ8 --- */
2260
2261 /* CmpNEZ8(x) */
2262 if (e->tag == Iex_Unop
2263 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2264 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002265 addInstr(env, AMD64Instr_Test64(0xFF,r));
sewardj176ad2f2005-04-27 11:55:08 +00002266 return Acc_NZ;
2267 }
2268
sewardj86ec28b2005-04-27 13:39:35 +00002269 /* --- patterns rooted at: CmpNEZ16 --- */
2270
2271 /* CmpNEZ16(x) */
2272 if (e->tag == Iex_Unop
2273 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2274 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002275 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
sewardj86ec28b2005-04-27 13:39:35 +00002276 return Acc_NZ;
2277 }
2278
sewardj176ad2f2005-04-27 11:55:08 +00002279 /* --- patterns rooted at: CmpNEZ32 --- */
2280
2281 /* CmpNEZ32(x) */
2282 if (e->tag == Iex_Unop
2283 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2284 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj176ad2f2005-04-27 11:55:08 +00002285 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
sewardj9cc2bbf2011-06-05 17:56:03 +00002286 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
sewardj176ad2f2005-04-27 11:55:08 +00002287 return Acc_NZ;
2288 }
2289
2290 /* --- patterns rooted at: CmpNEZ64 --- */
2291
sewardj0bc78ab2005-05-11 22:47:32 +00002292 /* CmpNEZ64(Or64(x,y)) */
2293 {
2294 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2295 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2296 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2297 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2298 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2299 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2300 HReg tmp = newVRegI(env);
2301 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2302 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2303 return Acc_NZ;
2304 }
2305 }
2306
sewardj176ad2f2005-04-27 11:55:08 +00002307 /* CmpNEZ64(x) */
2308 if (e->tag == Iex_Unop
2309 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2310 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2311 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2312 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2313 return Acc_NZ;
2314 }
2315
2316 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2317
sewardj42322b52005-04-20 22:57:11 +00002318 /* CmpEQ8 / CmpNE8 */
2319 if (e->tag == Iex_Binop
2320 && (e->Iex.Binop.op == Iop_CmpEQ8
sewardj1fb8c922009-07-12 12:56:53 +00002321 || e->Iex.Binop.op == Iop_CmpNE8
2322 || e->Iex.Binop.op == Iop_CasCmpEQ8
2323 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
sewardj009230b2013-01-26 11:47:55 +00002324 if (isZeroU8(e->Iex.Binop.arg2)) {
2325 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2326 addInstr(env, AMD64Instr_Test64(0xFF,r1));
2327 switch (e->Iex.Binop.op) {
2328 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2329 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2330 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
2331 }
2332 } else {
2333 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2334 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2335 HReg r = newVRegI(env);
2336 addInstr(env, mk_iMOVsd_RR(r1,r));
2337 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2338 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2339 switch (e->Iex.Binop.op) {
2340 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2341 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2342 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
2343 }
sewardj42322b52005-04-20 22:57:11 +00002344 }
2345 }
2346
sewardj0af46ab2005-04-26 01:52:29 +00002347 /* CmpEQ16 / CmpNE16 */
2348 if (e->tag == Iex_Binop
2349 && (e->Iex.Binop.op == Iop_CmpEQ16
sewardj1fb8c922009-07-12 12:56:53 +00002350 || e->Iex.Binop.op == Iop_CmpNE16
2351 || e->Iex.Binop.op == Iop_CasCmpEQ16
2352 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
sewardj0af46ab2005-04-26 01:52:29 +00002353 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2354 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2355 HReg r = newVRegI(env);
2356 addInstr(env, mk_iMOVsd_RR(r1,r));
2357 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2358 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2359 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002360 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2361 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002362 default: vpanic("iselCondCode(amd64): CmpXX16");
2363 }
2364 }
2365
sewardj50d89bf2011-01-10 15:10:48 +00002366 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
2367 Saves a "movq %rax, %tmp" compared to the default route. */
2368 if (e->tag == Iex_Binop
2369 && e->Iex.Binop.op == Iop_CmpNE64
2370 && e->Iex.Binop.arg1->tag == Iex_CCall
2371 && e->Iex.Binop.arg2->tag == Iex_Const) {
2372 IRExpr* cal = e->Iex.Binop.arg1;
2373 IRExpr* con = e->Iex.Binop.arg2;
2374 HReg tmp = newVRegI(env);
2375 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
2376 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
2377 vassert(con->Iex.Const.con->tag == Ico_U64);
2378 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00002379 UInt addToSp = 0;
2380 RetLoc rloc = mk_RetLoc_INVALID();
2381 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2382 cal->Iex.CCall.cee,
2383 cal->Iex.CCall.retty, cal->Iex.CCall.args );
2384 vassert(is_sane_RetLoc(rloc));
2385 vassert(rloc.pri == RLPri_Int);
2386 vassert(addToSp == 0);
2387 /* */
sewardj50d89bf2011-01-10 15:10:48 +00002388 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
2389 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
2390 AMD64RMI_Reg(hregAMD64_RAX()), tmp));
2391 return Acc_NZ;
2392 }
2393
sewardjd0a12df2005-02-10 02:07:43 +00002394 /* Cmp*64*(x,y) */
2395 if (e->tag == Iex_Binop
2396 && (e->Iex.Binop.op == Iop_CmpEQ64
2397 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00002398 || e->Iex.Binop.op == Iop_CmpLT64S
2399 || e->Iex.Binop.op == Iop_CmpLT64U
2400 || e->Iex.Binop.op == Iop_CmpLE64S
sewardja9e4a802005-12-26 19:33:55 +00002401 || e->Iex.Binop.op == Iop_CmpLE64U
sewardj1fb8c922009-07-12 12:56:53 +00002402 || e->Iex.Binop.op == Iop_CasCmpEQ64
sewardje13074c2012-11-08 10:57:08 +00002403 || e->Iex.Binop.op == Iop_CasCmpNE64
2404 || e->Iex.Binop.op == Iop_ExpCmpNE64)) {
sewardjd0a12df2005-02-10 02:07:43 +00002405 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2406 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2407 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2408 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002409 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
sewardje13074c2012-11-08 10:57:08 +00002410 case Iop_CmpNE64:
2411 case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002412 case Iop_CmpLT64S: return Acc_L;
2413 case Iop_CmpLT64U: return Acc_B;
2414 case Iop_CmpLE64S: return Acc_LE;
sewardja9e4a802005-12-26 19:33:55 +00002415 case Iop_CmpLE64U: return Acc_BE;
sewardjd0a12df2005-02-10 02:07:43 +00002416 default: vpanic("iselCondCode(amd64): CmpXX64");
2417 }
2418 }
2419
sewardj9cc2bbf2011-06-05 17:56:03 +00002420 /* Cmp*32*(x,y) */
2421 if (e->tag == Iex_Binop
2422 && (e->Iex.Binop.op == Iop_CmpEQ32
2423 || e->Iex.Binop.op == Iop_CmpNE32
2424 || e->Iex.Binop.op == Iop_CmpLT32S
2425 || e->Iex.Binop.op == Iop_CmpLT32U
2426 || e->Iex.Binop.op == Iop_CmpLE32S
2427 || e->Iex.Binop.op == Iop_CmpLE32U
2428 || e->Iex.Binop.op == Iop_CasCmpEQ32
sewardj009230b2013-01-26 11:47:55 +00002429 || e->Iex.Binop.op == Iop_CasCmpNE32
2430 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
sewardj9cc2bbf2011-06-05 17:56:03 +00002431 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2432 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2433 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
2434 switch (e->Iex.Binop.op) {
2435 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
sewardj009230b2013-01-26 11:47:55 +00002436 case Iop_CmpNE32:
2437 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ;
sewardj9cc2bbf2011-06-05 17:56:03 +00002438 case Iop_CmpLT32S: return Acc_L;
2439 case Iop_CmpLT32U: return Acc_B;
2440 case Iop_CmpLE32S: return Acc_LE;
2441 case Iop_CmpLE32U: return Acc_BE;
2442 default: vpanic("iselCondCode(amd64): CmpXX32");
2443 }
2444 }
2445
sewardj05b3b6a2005-02-04 01:44:33 +00002446 ppIRExpr(e);
2447 vpanic("iselCondCode(amd64)");
2448}
2449
2450
sewardj9b967672005-02-08 11:13:09 +00002451/*---------------------------------------------------------*/
2452/*--- ISEL: Integer expressions (128 bit) ---*/
2453/*---------------------------------------------------------*/
2454
2455/* Compute a 128-bit value into a register pair, which is returned as
2456 the first two parameters. As with iselIntExpr_R, these may be
2457 either real or virtual regs; in any case they must not be changed
2458 by subsequent code emitted by the caller. */
2459
2460static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2461 ISelEnv* env, IRExpr* e )
2462{
2463 iselInt128Expr_wrk(rHi, rLo, env, e);
2464# if 0
2465 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2466# endif
2467 vassert(hregClass(*rHi) == HRcInt64);
2468 vassert(hregIsVirtual(*rHi));
2469 vassert(hregClass(*rLo) == HRcInt64);
2470 vassert(hregIsVirtual(*rLo));
2471}
2472
2473/* DO NOT CALL THIS DIRECTLY ! */
2474static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2475 ISelEnv* env, IRExpr* e )
2476{
sewardj9b967672005-02-08 11:13:09 +00002477 vassert(e);
2478 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2479
sewardj9b967672005-02-08 11:13:09 +00002480 /* read 128-bit IRTemp */
sewardjdd40fdf2006-12-24 02:20:24 +00002481 if (e->tag == Iex_RdTmp) {
sewardjc4530ae2012-05-21 10:18:49 +00002482 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
sewardj9b967672005-02-08 11:13:09 +00002483 return;
2484 }
2485
sewardj9b967672005-02-08 11:13:09 +00002486 /* --------- BINARY ops --------- */
2487 if (e->tag == Iex_Binop) {
2488 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002489 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002490 case Iop_MullU64:
2491 case Iop_MullS64: {
2492 /* get one operand into %rax, and the other into a R/M.
2493 Need to make an educated guess about which is better in
2494 which. */
2495 HReg tLo = newVRegI(env);
2496 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002497 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002498 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2499 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2500 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
sewardj501a3392005-05-11 15:37:50 +00002501 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
sewardj9b967672005-02-08 11:13:09 +00002502 /* Result is now in RDX:RAX. Tell the caller. */
2503 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2504 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2505 *rHi = tHi;
2506 *rLo = tLo;
2507 return;
2508 }
sewardj7de0d3c2005-02-13 02:26:41 +00002509
sewardja6b93d12005-02-17 09:28:28 +00002510 /* 128 x 64 -> (64(rem),64(div)) division */
2511 case Iop_DivModU128to64:
2512 case Iop_DivModS128to64: {
2513 /* Get the 128-bit operand into rdx:rax, and the other into
2514 any old R/M. */
2515 HReg sHi, sLo;
2516 HReg tLo = newVRegI(env);
2517 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002518 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002519 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2520 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2521 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2522 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2523 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2524 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2525 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2526 *rHi = tHi;
2527 *rLo = tLo;
2528 return;
2529 }
2530
2531 /* 64HLto128(e1,e2) */
2532 case Iop_64HLto128:
2533 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2534 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2535 return;
2536
sewardj9b967672005-02-08 11:13:09 +00002537 default:
2538 break;
2539 }
2540 } /* if (e->tag == Iex_Binop) */
2541
sewardj9b967672005-02-08 11:13:09 +00002542 ppIRExpr(e);
2543 vpanic("iselInt128Expr");
2544}
2545
2546
sewardj8d965312005-02-25 02:48:47 +00002547/*---------------------------------------------------------*/
2548/*--- ISEL: Floating point expressions (32 bit) ---*/
2549/*---------------------------------------------------------*/
2550
2551/* Nothing interesting here; really just wrappers for
2552 64-bit stuff. */
2553
2554static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2555{
2556 HReg r = iselFltExpr_wrk( env, e );
2557# if 0
2558 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2559# endif
2560 vassert(hregClass(r) == HRcVec128);
2561 vassert(hregIsVirtual(r));
2562 return r;
2563}
2564
2565/* DO NOT CALL THIS DIRECTLY */
2566static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2567{
2568 IRType ty = typeOfIRExpr(env->type_env,e);
2569 vassert(ty == Ity_F32);
2570
sewardjdd40fdf2006-12-24 02:20:24 +00002571 if (e->tag == Iex_RdTmp) {
2572 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardjc49ce232005-02-25 13:03:03 +00002573 }
2574
sewardje768e922009-11-26 17:17:37 +00002575 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardjc49ce232005-02-25 13:03:03 +00002576 AMD64AMode* am;
2577 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002578 vassert(e->Iex.Load.ty == Ity_F32);
2579 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardjc49ce232005-02-25 13:03:03 +00002580 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2581 return res;
2582 }
sewardj8d965312005-02-25 02:48:47 +00002583
2584 if (e->tag == Iex_Binop
2585 && e->Iex.Binop.op == Iop_F64toF32) {
2586 /* Although the result is still held in a standard SSE register,
2587 we need to round it to reflect the loss of accuracy/range
2588 entailed in casting it to a 32-bit float. */
2589 HReg dst = newVRegV(env);
2590 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2591 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2592 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2593 set_SSE_rounding_default( env );
2594 return dst;
2595 }
2596
sewardjc49ce232005-02-25 13:03:03 +00002597 if (e->tag == Iex_Get) {
2598 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2599 hregAMD64_RBP() );
2600 HReg res = newVRegV(env);
2601 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2602 return res;
2603 }
2604
sewardj5992bd02005-05-11 02:13:42 +00002605 if (e->tag == Iex_Unop
2606 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2607 /* Given an I32, produce an IEEE754 float with the same bit
2608 pattern. */
2609 HReg dst = newVRegV(env);
2610 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2611 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2612 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2613 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2614 return dst;
2615 }
sewardj8d965312005-02-25 02:48:47 +00002616
sewardjd15b5972010-06-27 09:06:34 +00002617 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2618 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2619 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
2620 HReg dst = newVRegV(env);
2621
2622 /* rf now holds the value to be rounded. The first thing to do
2623 is set the FPU's rounding mode accordingly. */
2624
2625 /* Set host x87 rounding mode */
2626 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2627
2628 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
2629 addInstr(env, AMD64Instr_A87Free(1));
2630 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
2631 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2632 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
2633 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
2634
2635 /* Restore default x87 rounding. */
2636 set_FPU_rounding_default( env );
2637
2638 return dst;
2639 }
2640
sewardjcc3d2192013-03-27 11:37:33 +00002641 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) {
2642 /* Sigh ... very rough code. Could do much better. */
2643 /* Get the 128-bit literal 00---0 10---0 into a register
2644 and xor it with the value to be negated. */
2645 HReg r1 = newVRegI(env);
2646 HReg dst = newVRegV(env);
2647 HReg tmp = newVRegV(env);
2648 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2649 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
2650 addInstr(env, mk_vMOVsd_RR(src,tmp));
2651 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2652 addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 ));
2653 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
2654 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
2655 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
2656 add_to_rsp(env, 16);
2657 return dst;
2658 }
2659
2660 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) {
2661 IRQop *qop = e->Iex.Qop.details;
2662 HReg dst = newVRegV(env);
2663 HReg argX = iselFltExpr(env, qop->arg2);
2664 HReg argY = iselFltExpr(env, qop->arg3);
2665 HReg argZ = iselFltExpr(env, qop->arg4);
2666 /* XXXROUNDINGFIXME */
2667 /* set roundingmode here */
2668 /* subq $16, %rsp -- make a space*/
2669 sub_from_rsp(env, 16);
2670 /* Prepare 4 arg regs:
2671 leaq 0(%rsp), %rdi
2672 leaq 4(%rsp), %rsi
2673 leaq 8(%rsp), %rdx
2674 leaq 12(%rsp), %rcx
2675 */
2676 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2677 hregAMD64_RDI()));
2678 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()),
2679 hregAMD64_RSI()));
2680 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2681 hregAMD64_RDX()));
2682 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()),
2683 hregAMD64_RCX()));
2684 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2685 movss %argX, 0(%rsi)
2686 movss %argY, 0(%rdx)
2687 movss %argZ, 0(%rcx)
2688 */
2689 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX,
2690 AMD64AMode_IR(0, hregAMD64_RSI())));
2691 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY,
2692 AMD64AMode_IR(0, hregAMD64_RDX())));
2693 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ,
2694 AMD64AMode_IR(0, hregAMD64_RCX())));
2695 /* call the helper */
2696 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2697 (ULong)(HWord)h_generic_calc_MAddF32,
sewardj74142b82013-08-08 10:28:59 +00002698 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002699 /* fetch the result from memory, using %r_argp, which the
2700 register allocator will keep alive across the call. */
2701 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst,
2702 AMD64AMode_IR(0, hregAMD64_RSP())));
2703 /* and finally, clear the space */
2704 add_to_rsp(env, 16);
2705 return dst;
2706 }
2707
sewardj8d965312005-02-25 02:48:47 +00002708 ppIRExpr(e);
2709 vpanic("iselFltExpr_wrk");
2710}
sewardj18303862005-02-21 12:36:54 +00002711
2712
2713/*---------------------------------------------------------*/
2714/*--- ISEL: Floating point expressions (64 bit) ---*/
2715/*---------------------------------------------------------*/
2716
2717/* Compute a 64-bit floating point value into the lower half of an xmm
2718 register, the identity of which is returned. As with
2719 iselIntExpr_R, the returned reg will be virtual, and it must not be
2720 changed by subsequent code emitted by the caller.
2721*/
2722
2723/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2724
2725 Type S (1 bit) E (11 bits) F (52 bits)
2726 ---- --------- ----------- -----------
2727 signalling NaN u 2047 (max) .0uuuuu---u
2728 (with at least
2729 one 1 bit)
2730 quiet NaN u 2047 (max) .1uuuuu---u
2731
2732 negative infinity 1 2047 (max) .000000---0
2733
2734 positive infinity 0 2047 (max) .000000---0
2735
2736 negative zero 1 0 .000000---0
2737
2738 positive zero 0 0 .000000---0
2739*/
2740
2741static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2742{
2743 HReg r = iselDblExpr_wrk( env, e );
2744# if 0
2745 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2746# endif
2747 vassert(hregClass(r) == HRcVec128);
2748 vassert(hregIsVirtual(r));
2749 return r;
2750}
2751
2752/* DO NOT CALL THIS DIRECTLY */
2753static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2754{
2755 IRType ty = typeOfIRExpr(env->type_env,e);
2756 vassert(e);
2757 vassert(ty == Ity_F64);
2758
sewardjdd40fdf2006-12-24 02:20:24 +00002759 if (e->tag == Iex_RdTmp) {
2760 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj18303862005-02-21 12:36:54 +00002761 }
2762
sewardj8d965312005-02-25 02:48:47 +00002763 if (e->tag == Iex_Const) {
2764 union { ULong u64; Double f64; } u;
2765 HReg res = newVRegV(env);
2766 HReg tmp = newVRegI(env);
2767 vassert(sizeof(u) == 8);
2768 vassert(sizeof(u.u64) == 8);
2769 vassert(sizeof(u.f64) == 8);
2770
2771 if (e->Iex.Const.con->tag == Ico_F64) {
2772 u.f64 = e->Iex.Const.con->Ico.F64;
2773 }
2774 else if (e->Iex.Const.con->tag == Ico_F64i) {
2775 u.u64 = e->Iex.Const.con->Ico.F64i;
2776 }
2777 else
2778 vpanic("iselDblExpr(amd64): const");
2779
2780 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2781 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2782 addInstr(env, AMD64Instr_SseLdSt(
2783 True/*load*/, 8, res,
2784 AMD64AMode_IR(0, hregAMD64_RSP())
2785 ));
2786 add_to_rsp(env, 8);
2787 return res;
2788 }
sewardj9da16972005-02-21 13:58:26 +00002789
sewardje768e922009-11-26 17:17:37 +00002790 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj9da16972005-02-21 13:58:26 +00002791 AMD64AMode* am;
2792 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002793 vassert(e->Iex.Load.ty == Ity_F64);
2794 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj9da16972005-02-21 13:58:26 +00002795 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2796 return res;
2797 }
sewardj18303862005-02-21 12:36:54 +00002798
2799 if (e->tag == Iex_Get) {
2800 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2801 hregAMD64_RBP() );
2802 HReg res = newVRegV(env);
2803 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2804 return res;
2805 }
2806
sewardj8d965312005-02-25 02:48:47 +00002807 if (e->tag == Iex_GetI) {
2808 AMD64AMode* am
2809 = genGuestArrayOffset(
2810 env, e->Iex.GetI.descr,
2811 e->Iex.GetI.ix, e->Iex.GetI.bias );
2812 HReg res = newVRegV(env);
2813 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2814 return res;
2815 }
2816
sewardj4796d662006-02-05 16:06:26 +00002817 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00002818 IRTriop *triop = e->Iex.Triop.details;
sewardj137015d2005-03-27 04:01:15 +00002819 AMD64SseOp op = Asse_INVALID;
florian420bfa92012-06-02 20:29:22 +00002820 switch (triop->op) {
sewardj137015d2005-03-27 04:01:15 +00002821 case Iop_AddF64: op = Asse_ADDF; break;
2822 case Iop_SubF64: op = Asse_SUBF; break;
2823 case Iop_MulF64: op = Asse_MULF; break;
2824 case Iop_DivF64: op = Asse_DIVF; break;
2825 default: break;
2826 }
2827 if (op != Asse_INVALID) {
2828 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002829 HReg argL = iselDblExpr(env, triop->arg2);
2830 HReg argR = iselDblExpr(env, triop->arg3);
sewardj137015d2005-03-27 04:01:15 +00002831 addInstr(env, mk_vMOVsd_RR(argL, dst));
sewardj4796d662006-02-05 16:06:26 +00002832 /* XXXROUNDINGFIXME */
2833 /* set roundingmode here */
sewardj137015d2005-03-27 04:01:15 +00002834 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2835 return dst;
2836 }
2837 }
2838
sewardjcc3d2192013-03-27 11:37:33 +00002839 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) {
2840 IRQop *qop = e->Iex.Qop.details;
2841 HReg dst = newVRegV(env);
2842 HReg argX = iselDblExpr(env, qop->arg2);
2843 HReg argY = iselDblExpr(env, qop->arg3);
2844 HReg argZ = iselDblExpr(env, qop->arg4);
2845 /* XXXROUNDINGFIXME */
2846 /* set roundingmode here */
2847 /* subq $32, %rsp -- make a space*/
2848 sub_from_rsp(env, 32);
2849 /* Prepare 4 arg regs:
2850 leaq 0(%rsp), %rdi
2851 leaq 8(%rsp), %rsi
2852 leaq 16(%rsp), %rdx
2853 leaq 24(%rsp), %rcx
2854 */
2855 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2856 hregAMD64_RDI()));
2857 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2858 hregAMD64_RSI()));
2859 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()),
2860 hregAMD64_RDX()));
2861 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()),
2862 hregAMD64_RCX()));
2863 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2864 movsd %argX, 0(%rsi)
2865 movsd %argY, 0(%rdx)
2866 movsd %argZ, 0(%rcx)
2867 */
2868 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX,
2869 AMD64AMode_IR(0, hregAMD64_RSI())));
2870 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY,
2871 AMD64AMode_IR(0, hregAMD64_RDX())));
2872 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ,
2873 AMD64AMode_IR(0, hregAMD64_RCX())));
2874 /* call the helper */
2875 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2876 (ULong)(HWord)h_generic_calc_MAddF64,
sewardj74142b82013-08-08 10:28:59 +00002877 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002878 /* fetch the result from memory, using %r_argp, which the
2879 register allocator will keep alive across the call. */
2880 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst,
2881 AMD64AMode_IR(0, hregAMD64_RSP())));
2882 /* and finally, clear the space */
2883 add_to_rsp(env, 32);
2884 return dst;
2885 }
2886
sewardjb183b852006-02-03 16:08:03 +00002887 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
sewardj25a85812005-05-08 23:03:48 +00002888 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2889 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2890 HReg dst = newVRegV(env);
2891
2892 /* rf now holds the value to be rounded. The first thing to do
2893 is set the FPU's rounding mode accordingly. */
2894
2895 /* Set host x87 rounding mode */
2896 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2897
2898 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2899 addInstr(env, AMD64Instr_A87Free(1));
sewardjd15b5972010-06-27 09:06:34 +00002900 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002901 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
sewardjd15b5972010-06-27 09:06:34 +00002902 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002903 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2904
2905 /* Restore default x87 rounding. */
2906 set_FPU_rounding_default( env );
2907
2908 return dst;
2909 }
2910
florian420bfa92012-06-02 20:29:22 +00002911 IRTriop *triop = e->Iex.Triop.details;
sewardj4796d662006-02-05 16:06:26 +00002912 if (e->tag == Iex_Triop
florian420bfa92012-06-02 20:29:22 +00002913 && (triop->op == Iop_ScaleF64
2914 || triop->op == Iop_AtanF64
2915 || triop->op == Iop_Yl2xF64
2916 || triop->op == Iop_Yl2xp1F64
2917 || triop->op == Iop_PRemF64
2918 || triop->op == Iop_PRem1F64)
sewardj25a85812005-05-08 23:03:48 +00002919 ) {
2920 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00002921 HReg arg1 = iselDblExpr(env, triop->arg2);
2922 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardj25a85812005-05-08 23:03:48 +00002923 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002924 Bool arg2first = toBool(triop->op == Iop_ScaleF64
2925 || triop->op == Iop_PRemF64
2926 || triop->op == Iop_PRem1F64);
sewardj25a85812005-05-08 23:03:48 +00002927 addInstr(env, AMD64Instr_A87Free(2));
2928
2929 /* one arg -> top of x87 stack */
2930 addInstr(env, AMD64Instr_SseLdSt(
2931 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002932 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002933
2934 /* other arg -> top of x87 stack */
2935 addInstr(env, AMD64Instr_SseLdSt(
2936 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002937 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002938
2939 /* do it */
sewardj4796d662006-02-05 16:06:26 +00002940 /* XXXROUNDINGFIXME */
2941 /* set roundingmode here */
florian420bfa92012-06-02 20:29:22 +00002942 switch (triop->op) {
sewardj25a85812005-05-08 23:03:48 +00002943 case Iop_ScaleF64:
2944 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2945 break;
2946 case Iop_AtanF64:
2947 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2948 break;
2949 case Iop_Yl2xF64:
2950 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2951 break;
sewardj5e205372005-05-09 02:57:08 +00002952 case Iop_Yl2xp1F64:
2953 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2954 break;
sewardjf4c803b2006-09-11 11:07:34 +00002955 case Iop_PRemF64:
2956 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2957 break;
sewardj4970e4e2008-10-11 10:07:55 +00002958 case Iop_PRem1F64:
2959 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
2960 break;
sewardj25a85812005-05-08 23:03:48 +00002961 default:
2962 vassert(0);
2963 }
2964
2965 /* save result */
sewardjd15b5972010-06-27 09:06:34 +00002966 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002967 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2968 return dst;
2969 }
sewardj1a01e652005-02-23 11:39:21 +00002970
sewardj6c299f32009-12-31 18:00:12 +00002971 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002972 HReg dst = newVRegV(env);
2973 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2974 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2975 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2976 set_SSE_rounding_default( env );
2977 return dst;
2978 }
2979
sewardj6c299f32009-12-31 18:00:12 +00002980 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002981 HReg dst = newVRegV(env);
2982 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2983 set_SSE_rounding_default( env );
2984 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2985 return dst;
2986 }
2987
sewardj137015d2005-03-27 04:01:15 +00002988 if (e->tag == Iex_Unop
2989 && (e->Iex.Unop.op == Iop_NegF64
2990 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00002991 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00002992 /* Get the 128-bit literal 00---0 10---0 into a register
2993 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00002994 HReg r1 = newVRegI(env);
2995 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00002996 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00002997 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2998 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00002999 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00003000 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3001 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3002 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00003003 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3004
3005 if (e->Iex.Unop.op == Iop_NegF64)
3006 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3007 else
3008 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3009
sewardj8d965312005-02-25 02:48:47 +00003010 add_to_rsp(env, 16);
3011 return dst;
3012 }
3013
sewardj4796d662006-02-05 16:06:26 +00003014 if (e->tag == Iex_Binop) {
sewardj25a85812005-05-08 23:03:48 +00003015 A87FpOp fpop = Afp_INVALID;
sewardj4796d662006-02-05 16:06:26 +00003016 switch (e->Iex.Binop.op) {
sewardj25a85812005-05-08 23:03:48 +00003017 case Iop_SqrtF64: fpop = Afp_SQRT; break;
sewardj5e205372005-05-09 02:57:08 +00003018 case Iop_SinF64: fpop = Afp_SIN; break;
3019 case Iop_CosF64: fpop = Afp_COS; break;
3020 case Iop_TanF64: fpop = Afp_TAN; break;
sewardj25a85812005-05-08 23:03:48 +00003021 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3022 default: break;
3023 }
3024 if (fpop != Afp_INVALID) {
3025 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00003026 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
sewardj25a85812005-05-08 23:03:48 +00003027 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00003028 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
sewardj25a85812005-05-08 23:03:48 +00003029 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
sewardj5e205372005-05-09 02:57:08 +00003030 addInstr(env, AMD64Instr_A87Free(nNeeded));
sewardjd15b5972010-06-27 09:06:34 +00003031 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj4796d662006-02-05 16:06:26 +00003032 /* XXXROUNDINGFIXME */
3033 /* set roundingmode here */
sewardje9c51c92014-04-30 22:50:34 +00003034 /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition
3035 codes. I don't think that matters, since this insn
3036 selector never generates such an instruction intervening
3037 between an flag-setting instruction and a flag-using
3038 instruction. */
sewardj25a85812005-05-08 23:03:48 +00003039 addInstr(env, AMD64Instr_A87FpOp(fpop));
sewardjd15b5972010-06-27 09:06:34 +00003040 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00003041 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3042 return dst;
3043 }
3044 }
sewardjc49ce232005-02-25 13:03:03 +00003045
3046 if (e->tag == Iex_Unop) {
3047 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00003048//.. case Iop_I32toF64: {
3049//.. HReg dst = newVRegF(env);
3050//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3051//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3052//.. set_FPU_rounding_default(env);
3053//.. addInstr(env, X86Instr_FpLdStI(
3054//.. True/*load*/, 4, dst,
3055//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00003056//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00003057//.. return dst;
3058//.. }
sewardj924215b2005-03-26 21:50:31 +00003059 case Iop_ReinterpI64asF64: {
3060 /* Given an I64, produce an IEEE754 double with the same
3061 bit pattern. */
3062 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3063 HReg dst = newVRegV(env);
3064 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3065 /* paranoia */
3066 set_SSE_rounding_default(env);
3067 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3068 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3069 return dst;
3070 }
sewardjc49ce232005-02-25 13:03:03 +00003071 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00003072 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00003073 HReg f64 = newVRegV(env);
3074 /* this shouldn't be necessary, but be paranoid ... */
3075 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00003076 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00003077 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3078 return f64;
3079 }
3080 default:
3081 break;
3082 }
3083 }
sewardj8d965312005-02-25 02:48:47 +00003084
3085 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00003086 if (e->tag == Iex_ITE) { // VFD
3087 HReg r1, r0, dst;
sewardj8d965312005-02-25 02:48:47 +00003088 vassert(ty == Ity_F64);
florian99dd03e2013-01-29 03:56:06 +00003089 vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1);
3090 r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3091 r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
sewardj8d965312005-02-25 02:48:47 +00003092 dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003093 addInstr(env, mk_vMOVsd_RR(r1,dst));
3094 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003095 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardj8d965312005-02-25 02:48:47 +00003096 return dst;
3097 }
sewardj18303862005-02-21 12:36:54 +00003098
3099 ppIRExpr(e);
3100 vpanic("iselDblExpr_wrk");
3101}
sewardjc2bcb6f2005-02-07 00:17:12 +00003102
sewardj0852a132005-02-21 08:28:46 +00003103
3104/*---------------------------------------------------------*/
3105/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3106/*---------------------------------------------------------*/
3107
3108static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3109{
3110 HReg r = iselVecExpr_wrk( env, e );
3111# if 0
3112 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3113# endif
3114 vassert(hregClass(r) == HRcVec128);
3115 vassert(hregIsVirtual(r));
3116 return r;
3117}
3118
3119
3120/* DO NOT CALL THIS DIRECTLY */
3121static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3122{
sewardj69d98e32010-06-18 08:17:41 +00003123 HWord fn = 0; /* address of helper fn, if required */
3124 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00003125 AMD64SseOp op = Asse_INVALID;
3126 IRType ty = typeOfIRExpr(env->type_env,e);
3127 vassert(e);
3128 vassert(ty == Ity_V128);
3129
sewardjdd40fdf2006-12-24 02:20:24 +00003130 if (e->tag == Iex_RdTmp) {
3131 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj0852a132005-02-21 08:28:46 +00003132 }
3133
3134 if (e->tag == Iex_Get) {
3135 HReg dst = newVRegV(env);
3136 addInstr(env, AMD64Instr_SseLdSt(
3137 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00003138 16,
sewardj0852a132005-02-21 08:28:46 +00003139 dst,
3140 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3141 )
3142 );
3143 return dst;
3144 }
3145
sewardje768e922009-11-26 17:17:37 +00003146 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj1a01e652005-02-23 11:39:21 +00003147 HReg dst = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00003148 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj1a01e652005-02-23 11:39:21 +00003149 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3150 return dst;
3151 }
3152
3153 if (e->tag == Iex_Const) {
3154 HReg dst = newVRegV(env);
3155 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj9ba870d2010-04-02 11:29:23 +00003156 switch (e->Iex.Const.con->Ico.V128) {
3157 case 0x0000:
3158 dst = generate_zeroes_V128(env);
sewardjacfbd7d2010-08-17 22:52:08 +00003159 break;
sewardj9ba870d2010-04-02 11:29:23 +00003160 case 0xFFFF:
3161 dst = generate_ones_V128(env);
sewardj9ba870d2010-04-02 11:29:23 +00003162 break;
sewardjacfbd7d2010-08-17 22:52:08 +00003163 default: {
3164 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3165 /* do push_uimm64 twice, first time for the high-order half. */
3166 push_uimm64(env, bitmask8_to_bytemask64(
3167 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
3168 ));
3169 push_uimm64(env, bitmask8_to_bytemask64(
3170 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
3171 ));
3172 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3173 add_to_rsp(env, 16);
3174 break;
3175 }
sewardj1a01e652005-02-23 11:39:21 +00003176 }
sewardj9ba870d2010-04-02 11:29:23 +00003177 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003178 }
sewardj0852a132005-02-21 08:28:46 +00003179
3180 if (e->tag == Iex_Unop) {
3181 switch (e->Iex.Unop.op) {
3182
sewardj8d965312005-02-25 02:48:47 +00003183 case Iop_NotV128: {
3184 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3185 return do_sse_NotV128(env, arg);
3186 }
3187
sewardj09717342005-05-05 21:34:02 +00003188 case Iop_CmpNEZ64x2: {
3189 /* We can use SSE2 instructions for this. */
3190 /* Ideally, we want to do a 64Ix2 comparison against zero of
3191 the operand. Problem is no such insn exists. Solution
3192 therefore is to do a 32Ix4 comparison instead, and bitwise-
3193 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3194 let the not'd result of this initial comparison be a:b:c:d.
3195 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3196 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3197 giving the required result.
3198
3199 The required selection sequence is 2,3,0,1, which
3200 according to Intel's documentation means the pshufd
3201 literal value is 0xB1, that is,
3202 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3203 */
3204 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
sewardjac530442005-05-11 16:13:37 +00003205 HReg tmp = generate_zeroes_V128(env);
sewardj09717342005-05-05 21:34:02 +00003206 HReg dst = newVRegV(env);
sewardj09717342005-05-05 21:34:02 +00003207 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3208 tmp = do_sse_NotV128(env, tmp);
3209 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3210 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3211 return dst;
3212 }
3213
sewardjac530442005-05-11 16:13:37 +00003214 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3215 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3216 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3217 do_CmpNEZ_vector:
3218 {
3219 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3220 HReg tmp = newVRegV(env);
3221 HReg zero = generate_zeroes_V128(env);
3222 HReg dst;
3223 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3224 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3225 dst = do_sse_NotV128(env, tmp);
3226 return dst;
3227 }
sewardja7ba8c42005-05-10 20:08:34 +00003228
sewardj1ddee212014-08-24 14:00:19 +00003229 case Iop_RecipEst32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3230 case Iop_RSqrtEst32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3231 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
sewardja7ba8c42005-05-10 20:08:34 +00003232 do_32Fx4_unary:
3233 {
3234 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3235 HReg dst = newVRegV(env);
3236 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3237 return dst;
3238 }
3239
sewardj97628592005-05-10 22:42:54 +00003240 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
3241 do_64Fx2_unary:
3242 {
3243 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3244 HReg dst = newVRegV(env);
3245 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3246 return dst;
3247 }
sewardja7ba8c42005-05-10 20:08:34 +00003248
sewardj1ddee212014-08-24 14:00:19 +00003249 case Iop_RecipEst32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3250 case Iop_RSqrtEst32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3251 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
sewardja7ba8c42005-05-10 20:08:34 +00003252 do_32F0x4_unary:
3253 {
3254 /* A bit subtle. We have to copy the arg to the result
3255 register first, because actually doing the SSE scalar insn
3256 leaves the upper 3/4 of the destination register
3257 unchanged. Whereas the required semantics of these
3258 primops is that the upper 3/4 is simply copied in from the
3259 argument. */
3260 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3261 HReg dst = newVRegV(env);
3262 addInstr(env, mk_vMOVsd_RR(arg, dst));
3263 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3264 return dst;
3265 }
3266
sewardj0852a132005-02-21 08:28:46 +00003267 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3268 do_64F0x2_unary:
3269 {
3270 /* A bit subtle. We have to copy the arg to the result
3271 register first, because actually doing the SSE scalar insn
3272 leaves the upper half of the destination register
3273 unchanged. Whereas the required semantics of these
3274 primops is that the upper half is simply copied in from the
3275 argument. */
3276 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3277 HReg dst = newVRegV(env);
3278 addInstr(env, mk_vMOVsd_RR(arg, dst));
3279 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3280 return dst;
3281 }
3282
sewardj8d965312005-02-25 02:48:47 +00003283 case Iop_32UtoV128: {
3284 HReg dst = newVRegV(env);
3285 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3286 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3287 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3288 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3289 return dst;
3290 }
sewardj0852a132005-02-21 08:28:46 +00003291
3292 case Iop_64UtoV128: {
3293 HReg dst = newVRegV(env);
3294 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3295 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3296 addInstr(env, AMD64Instr_Push(rmi));
3297 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3298 add_to_rsp(env, 8);
3299 return dst;
3300 }
3301
sewardj4b1cc832012-06-13 11:10:20 +00003302 case Iop_V256toV128_0:
3303 case Iop_V256toV128_1: {
3304 HReg vHi, vLo;
3305 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
3306 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
3307 }
3308
sewardj0852a132005-02-21 08:28:46 +00003309 default:
3310 break;
3311 } /* switch (e->Iex.Unop.op) */
3312 } /* if (e->tag == Iex_Unop) */
3313
3314 if (e->tag == Iex_Binop) {
3315 switch (e->Iex.Binop.op) {
3316
sewardjc4530ae2012-05-21 10:18:49 +00003317 /* FIXME: could we generate MOVQ here? */
sewardj18303862005-02-21 12:36:54 +00003318 case Iop_SetV128lo64: {
3319 HReg dst = newVRegV(env);
3320 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3321 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003322 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3323 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3324 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3325 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3326 return dst;
3327 }
3328
sewardjc4530ae2012-05-21 10:18:49 +00003329 /* FIXME: could we generate MOVD here? */
sewardj478fe702005-04-23 01:15:47 +00003330 case Iop_SetV128lo32: {
3331 HReg dst = newVRegV(env);
3332 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3333 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3334 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3335 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3336 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3337 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003338 return dst;
3339 }
3340
sewardj1a01e652005-02-23 11:39:21 +00003341 case Iop_64HLtoV128: {
sewardjc4530ae2012-05-21 10:18:49 +00003342 HReg rsp = hregAMD64_RSP();
3343 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
3344 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
3345 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1);
3346 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2);
3347 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp));
3348 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003349 HReg dst = newVRegV(env);
sewardjc4530ae2012-05-21 10:18:49 +00003350 /* One store-forwarding stall coming up, oh well :-( */
3351 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003352 return dst;
3353 }
3354
sewardj432f8b62005-05-10 02:50:05 +00003355 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3356 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3357 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
sewardjb9282632005-11-05 02:33:25 +00003358 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003359 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3360 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003361 do_32Fx4:
3362 {
3363 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3364 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3365 HReg dst = newVRegV(env);
3366 addInstr(env, mk_vMOVsd_RR(argL, dst));
3367 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3368 return dst;
3369 }
3370
sewardj97628592005-05-10 22:42:54 +00003371 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3372 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3373 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
sewardjb9282632005-11-05 02:33:25 +00003374 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
sewardj5992bd02005-05-11 02:13:42 +00003375 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3376 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003377 do_64Fx2:
3378 {
3379 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3380 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3381 HReg dst = newVRegV(env);
3382 addInstr(env, mk_vMOVsd_RR(argL, dst));
3383 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3384 return dst;
3385 }
sewardj8d965312005-02-25 02:48:47 +00003386
sewardj432f8b62005-05-10 02:50:05 +00003387 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003388 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardj4c328cf2005-05-05 12:05:54 +00003389 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
sewardjb9282632005-11-05 02:33:25 +00003390 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003391 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003392 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003393 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3394 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003395 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3396 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3397 do_32F0x4: {
3398 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3399 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3400 HReg dst = newVRegV(env);
3401 addInstr(env, mk_vMOVsd_RR(argL, dst));
3402 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3403 return dst;
3404 }
3405
sewardj137015d2005-03-27 04:01:15 +00003406 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003407 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003408 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardjb9282632005-11-05 02:33:25 +00003409 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003410 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3411 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003412 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003413 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003414 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3415 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3416 do_64F0x2: {
3417 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3418 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3419 HReg dst = newVRegV(env);
3420 addInstr(env, mk_vMOVsd_RR(argL, dst));
3421 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3422 return dst;
3423 }
3424
sewardj5f438dd2011-06-16 11:36:23 +00003425 case Iop_QNarrowBin32Sto16Sx8:
sewardj97628592005-05-10 22:42:54 +00003426 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003427 case Iop_QNarrowBin16Sto8Sx16:
sewardj97628592005-05-10 22:42:54 +00003428 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003429 case Iop_QNarrowBin16Sto8Ux16:
sewardj97628592005-05-10 22:42:54 +00003430 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3431
3432 case Iop_InterleaveHI8x16:
3433 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3434 case Iop_InterleaveHI16x8:
3435 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3436 case Iop_InterleaveHI32x4:
3437 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3438 case Iop_InterleaveHI64x2:
3439 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3440
3441 case Iop_InterleaveLO8x16:
3442 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3443 case Iop_InterleaveLO16x8:
3444 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3445 case Iop_InterleaveLO32x4:
3446 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3447 case Iop_InterleaveLO64x2:
3448 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3449
sewardj1a01e652005-02-23 11:39:21 +00003450 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003451 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003452 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003453 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003454 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003455 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003456 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003457 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3458 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3459 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3460 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3461 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3462 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3463 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3464 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3465 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3466 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3467 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3468 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
sewardjadffcef2005-05-11 00:03:06 +00003469 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3470 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3471 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3472 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3473 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3474 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3475 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003476 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3477 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3478 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003479 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003480 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3481 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3482 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3483 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003484 do_SseReRg: {
3485 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3486 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3487 HReg dst = newVRegV(env);
3488 if (arg1isEReg) {
sewardj9da16972005-02-21 13:58:26 +00003489 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3490 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3491 } else {
3492 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3493 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3494 }
3495 return dst;
3496 }
3497
sewardjadffcef2005-05-11 00:03:06 +00003498 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3499 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3500 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3501 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3502 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3503 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3504 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
sewardj09717342005-05-05 21:34:02 +00003505 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3506 do_SseShift: {
3507 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3508 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3509 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3510 HReg ereg = newVRegV(env);
3511 HReg dst = newVRegV(env);
3512 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3513 addInstr(env, AMD64Instr_Push(rmi));
3514 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3515 addInstr(env, mk_vMOVsd_RR(greg, dst));
3516 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3517 add_to_rsp(env, 16);
3518 return dst;
3519 }
sewardj0852a132005-02-21 08:28:46 +00003520
sewardj69d98e32010-06-18 08:17:41 +00003521 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
3522 goto do_SseAssistedBinary;
3523 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
3524 goto do_SseAssistedBinary;
3525 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
3526 goto do_SseAssistedBinary;
3527 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
3528 goto do_SseAssistedBinary;
3529 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
3530 goto do_SseAssistedBinary;
3531 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
3532 goto do_SseAssistedBinary;
3533 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
3534 goto do_SseAssistedBinary;
3535 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
3536 goto do_SseAssistedBinary;
3537 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
3538 goto do_SseAssistedBinary;
sewardjd8815622011-10-19 15:24:01 +00003539 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2;
3540 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003541 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
3542 goto do_SseAssistedBinary;
sewardjd8bca7e2012-06-20 11:46:19 +00003543 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4;
3544 goto do_SseAssistedBinary;
sewardj5f438dd2011-06-16 11:36:23 +00003545 case Iop_QNarrowBin32Sto16Ux8:
3546 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
sewardj2260b992011-06-15 16:05:07 +00003547 goto do_SseAssistedBinary;
sewardjad2c9ea2011-10-22 09:32:16 +00003548 case Iop_NarrowBin16to8x16:
3549 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3550 goto do_SseAssistedBinary;
3551 case Iop_NarrowBin32to16x8:
3552 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3553 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003554 do_SseAssistedBinary: {
3555 /* RRRufff! RRRufff code is what we're generating here. Oh
3556 well. */
3557 vassert(fn != 0);
3558 HReg dst = newVRegV(env);
3559 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3560 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3561 HReg argp = newVRegI(env);
3562 /* subq $112, %rsp -- make a space*/
3563 sub_from_rsp(env, 112);
3564 /* leaq 48(%rsp), %r_argp -- point into it */
3565 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3566 argp));
3567 /* andq $-16, %r_argp -- 16-align the pointer */
3568 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3569 AMD64RMI_Imm( ~(UInt)15 ),
3570 argp));
3571 /* Prepare 3 arg regs:
3572 leaq 0(%r_argp), %rdi
3573 leaq 16(%r_argp), %rsi
3574 leaq 32(%r_argp), %rdx
3575 */
3576 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3577 hregAMD64_RDI()));
3578 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3579 hregAMD64_RSI()));
3580 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
3581 hregAMD64_RDX()));
3582 /* Store the two args, at (%rsi) and (%rdx):
3583 movupd %argL, 0(%rsi)
3584 movupd %argR, 0(%rdx)
3585 */
3586 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3587 AMD64AMode_IR(0, hregAMD64_RSI())));
3588 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
3589 AMD64AMode_IR(0, hregAMD64_RDX())));
3590 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003591 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003592 3, mk_RetLoc_simple(RLPri_None) ));
sewardj69d98e32010-06-18 08:17:41 +00003593 /* fetch the result from memory, using %r_argp, which the
3594 register allocator will keep alive across the call. */
3595 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3596 AMD64AMode_IR(0, argp)));
3597 /* and finally, clear the space */
3598 add_to_rsp(env, 112);
3599 return dst;
3600 }
3601
sewardj0874bee2011-01-17 10:32:18 +00003602 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
3603 goto do_SseAssistedVectorAndScalar;
3604 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
3605 goto do_SseAssistedVectorAndScalar;
3606 do_SseAssistedVectorAndScalar: {
3607 /* RRRufff! RRRufff code is what we're generating here. Oh
3608 well. */
3609 vassert(fn != 0);
3610 HReg dst = newVRegV(env);
3611 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3612 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3613 HReg argp = newVRegI(env);
3614 /* subq $112, %rsp -- make a space*/
3615 sub_from_rsp(env, 112);
3616 /* leaq 48(%rsp), %r_argp -- point into it */
3617 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3618 argp));
3619 /* andq $-16, %r_argp -- 16-align the pointer */
3620 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3621 AMD64RMI_Imm( ~(UInt)15 ),
3622 argp));
3623 /* Prepare 2 vector arg regs:
3624 leaq 0(%r_argp), %rdi
3625 leaq 16(%r_argp), %rsi
3626 */
3627 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3628 hregAMD64_RDI()));
3629 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3630 hregAMD64_RSI()));
3631 /* Store the vector arg, at (%rsi):
3632 movupd %argL, 0(%rsi)
3633 */
3634 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3635 AMD64AMode_IR(0, hregAMD64_RSI())));
3636 /* And get the scalar value into rdx */
3637 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
3638
3639 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003640 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003641 3, mk_RetLoc_simple(RLPri_None) ));
sewardj0874bee2011-01-17 10:32:18 +00003642 /* fetch the result from memory, using %r_argp, which the
3643 register allocator will keep alive across the call. */
3644 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3645 AMD64AMode_IR(0, argp)));
3646 /* and finally, clear the space */
3647 add_to_rsp(env, 112);
3648 return dst;
3649 }
3650
sewardj0852a132005-02-21 08:28:46 +00003651 default:
3652 break;
3653 } /* switch (e->Iex.Binop.op) */
3654 } /* if (e->tag == Iex_Binop) */
3655
sewardj9571dc02014-01-26 18:34:23 +00003656 if (e->tag == Iex_Triop) {
3657 IRTriop *triop = e->Iex.Triop.details;
3658 switch (triop->op) {
3659
3660 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm;
3661 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm;
3662 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm;
3663 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm;
3664 do_64Fx2_w_rm:
3665 {
3666 HReg argL = iselVecExpr(env, triop->arg2);
3667 HReg argR = iselVecExpr(env, triop->arg3);
3668 HReg dst = newVRegV(env);
3669 addInstr(env, mk_vMOVsd_RR(argL, dst));
3670 /* XXXROUNDINGFIXME */
3671 /* set roundingmode here */
3672 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3673 return dst;
3674 }
3675
3676 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm;
3677 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm;
3678 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm;
3679 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm;
3680 do_32Fx4_w_rm:
3681 {
3682 HReg argL = iselVecExpr(env, triop->arg2);
3683 HReg argR = iselVecExpr(env, triop->arg3);
3684 HReg dst = newVRegV(env);
3685 addInstr(env, mk_vMOVsd_RR(argL, dst));
3686 /* XXXROUNDINGFIXME */
3687 /* set roundingmode here */
3688 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3689 return dst;
3690 }
3691
3692 default:
3693 break;
3694 } /* switch (triop->op) */
3695 } /* if (e->tag == Iex_Triop) */
3696
florian99dd03e2013-01-29 03:56:06 +00003697 if (e->tag == Iex_ITE) { // VFD
3698 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3699 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
sewardjadffcef2005-05-11 00:03:06 +00003700 HReg dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003701 addInstr(env, mk_vMOVsd_RR(r1,dst));
floriane6be61f2013-02-01 16:11:51 +00003702 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003703 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardjadffcef2005-05-11 00:03:06 +00003704 return dst;
3705 }
3706
sewardjacfbd7d2010-08-17 22:52:08 +00003707 //vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003708 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
sewardj5117ce12006-01-27 21:20:15 +00003709 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
sewardj0852a132005-02-21 08:28:46 +00003710 ppIRExpr(e);
3711 vpanic("iselVecExpr_wrk");
3712}
sewardjc33671d2005-02-01 20:30:00 +00003713
3714
3715/*---------------------------------------------------------*/
sewardjc4530ae2012-05-21 10:18:49 +00003716/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
3717/*---------------------------------------------------------*/
3718
sewardj56c30312012-06-12 08:45:39 +00003719static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003720 ISelEnv* env, IRExpr* e )
3721{
3722 iselDVecExpr_wrk( rHi, rLo, env, e );
3723# if 0
3724 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3725# endif
3726 vassert(hregClass(*rHi) == HRcVec128);
3727 vassert(hregClass(*rLo) == HRcVec128);
3728 vassert(hregIsVirtual(*rHi));
3729 vassert(hregIsVirtual(*rLo));
3730}
3731
3732
3733/* DO NOT CALL THIS DIRECTLY */
sewardj56c30312012-06-12 08:45:39 +00003734static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003735 ISelEnv* env, IRExpr* e )
3736{
sewardjcc3d2192013-03-27 11:37:33 +00003737 HWord fn = 0; /* address of helper fn, if required */
sewardjc4530ae2012-05-21 10:18:49 +00003738 vassert(e);
3739 IRType ty = typeOfIRExpr(env->type_env,e);
3740 vassert(ty == Ity_V256);
3741
sewardj56c30312012-06-12 08:45:39 +00003742 AMD64SseOp op = Asse_INVALID;
3743
sewardjc4530ae2012-05-21 10:18:49 +00003744 /* read 256-bit IRTemp */
3745 if (e->tag == Iex_RdTmp) {
3746 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3747 return;
3748 }
3749
3750 if (e->tag == Iex_Get) {
3751 HReg vHi = newVRegV(env);
3752 HReg vLo = newVRegV(env);
3753 HReg rbp = hregAMD64_RBP();
3754 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp);
3755 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp);
3756 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3757 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3758 *rHi = vHi;
3759 *rLo = vLo;
3760 return;
3761 }
3762
3763 if (e->tag == Iex_Load) {
3764 HReg vHi = newVRegV(env);
3765 HReg vLo = newVRegV(env);
3766 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr);
3767 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
3768 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
3769 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3770 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3771 *rHi = vHi;
3772 *rLo = vLo;
3773 return;
3774 }
3775
sewardj37a505b2012-06-29 15:28:24 +00003776 if (e->tag == Iex_Const) {
3777 vassert(e->Iex.Const.con->tag == Ico_V256);
3778 switch (e->Iex.Const.con->Ico.V256) {
3779 case 0x00000000: {
3780 HReg vHi = generate_zeroes_V128(env);
3781 HReg vLo = newVRegV(env);
3782 addInstr(env, mk_vMOVsd_RR(vHi, vLo));
3783 *rHi = vHi;
3784 *rLo = vLo;
3785 return;
3786 }
3787 default:
3788 break; /* give up. Until such time as is necessary. */
3789 }
3790 }
3791
sewardj2a2bda92012-06-14 23:32:02 +00003792 if (e->tag == Iex_Unop) {
3793 switch (e->Iex.Unop.op) {
3794
3795 case Iop_NotV256: {
3796 HReg argHi, argLo;
3797 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3798 *rHi = do_sse_NotV128(env, argHi);
3799 *rLo = do_sse_NotV128(env, argLo);
3800 return;
3801 }
3802
sewardj1ddee212014-08-24 14:00:19 +00003803 case Iop_RecipEst32Fx8: op = Asse_RCPF; goto do_32Fx8_unary;
3804 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary;
3805 case Iop_RSqrtEst32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary;
sewardj66becf32012-06-18 23:15:16 +00003806 do_32Fx8_unary:
3807 {
3808 HReg argHi, argLo;
3809 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3810 HReg dstHi = newVRegV(env);
3811 HReg dstLo = newVRegV(env);
3812 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi));
3813 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo));
3814 *rHi = dstHi;
3815 *rLo = dstLo;
3816 return;
3817 }
3818
3819 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary;
3820 do_64Fx4_unary:
3821 {
3822 HReg argHi, argLo;
3823 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3824 HReg dstHi = newVRegV(env);
3825 HReg dstLo = newVRegV(env);
3826 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi));
3827 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo));
3828 *rHi = dstHi;
3829 *rLo = dstLo;
3830 return;
3831 }
3832
sewardj23db8a02012-06-25 07:46:18 +00003833 case Iop_CmpNEZ64x4: {
3834 /* We can use SSE2 instructions for this. */
3835 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide
3836 (obviously). See comment on Iop_CmpNEZ64x2 for
3837 explanation of what's going on here. */
3838 HReg argHi, argLo;
3839 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3840 HReg tmpHi = generate_zeroes_V128(env);
3841 HReg tmpLo = newVRegV(env);
3842 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo));
3843 HReg dstHi = newVRegV(env);
3844 HReg dstLo = newVRegV(env);
3845 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi));
3846 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo));
3847 tmpHi = do_sse_NotV128(env, tmpHi);
3848 tmpLo = do_sse_NotV128(env, tmpLo);
3849 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi));
3850 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo));
3851 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi));
3852 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo));
3853 *rHi = dstHi;
3854 *rLo = dstLo;
3855 return;
3856 }
3857
3858 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
sewardjcc3d2192013-03-27 11:37:33 +00003859 case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3860 case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
sewardj23db8a02012-06-25 07:46:18 +00003861 do_CmpNEZ_vector:
3862 {
3863 HReg argHi, argLo;
3864 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3865 HReg tmpHi = newVRegV(env);
3866 HReg tmpLo = newVRegV(env);
3867 HReg zero = generate_zeroes_V128(env);
3868 HReg dstHi, dstLo;
3869 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi));
3870 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo));
3871 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi));
3872 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo));
3873 dstHi = do_sse_NotV128(env, tmpHi);
3874 dstLo = do_sse_NotV128(env, tmpLo);
3875 *rHi = dstHi;
3876 *rLo = dstLo;
3877 return;
3878 }
3879
sewardj2a2bda92012-06-14 23:32:02 +00003880 default:
3881 break;
3882 } /* switch (e->Iex.Unop.op) */
3883 } /* if (e->tag == Iex_Unop) */
3884
sewardj56c30312012-06-12 08:45:39 +00003885 if (e->tag == Iex_Binop) {
3886 switch (e->Iex.Binop.op) {
3887
sewardj8eb7ae82012-06-24 14:00:27 +00003888 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4;
3889 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4;
sewardj56c30312012-06-12 08:45:39 +00003890 do_64Fx4:
3891 {
3892 HReg argLhi, argLlo, argRhi, argRlo;
3893 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3894 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3895 HReg dstHi = newVRegV(env);
3896 HReg dstLo = newVRegV(env);
3897 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3898 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3899 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
3900 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
3901 *rHi = dstHi;
3902 *rLo = dstLo;
3903 return;
3904 }
3905
sewardj8eb7ae82012-06-24 14:00:27 +00003906 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8;
3907 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8;
sewardj56c30312012-06-12 08:45:39 +00003908 do_32Fx8:
3909 {
3910 HReg argLhi, argLlo, argRhi, argRlo;
3911 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3912 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3913 HReg dstHi = newVRegV(env);
3914 HReg dstLo = newVRegV(env);
3915 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3916 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3917 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
3918 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
3919 *rHi = dstHi;
3920 *rLo = dstLo;
3921 return;
3922 }
3923
sewardj4b1cc832012-06-13 11:10:20 +00003924 case Iop_AndV256: op = Asse_AND; goto do_SseReRg;
sewardj2a2bda92012-06-14 23:32:02 +00003925 case Iop_OrV256: op = Asse_OR; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003926 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg;
sewardjcc3d2192013-03-27 11:37:33 +00003927 case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg;
3928 case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg;
3929 case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg;
3930 case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg;
3931 case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg;
3932 case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg;
3933 case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg;
3934 case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg;
3935 case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg;
3936 case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg;
3937 case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg;
3938 case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg;
3939 case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg;
3940 case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg;
3941 case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg;
3942 case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg;
3943 case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg;
3944 case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg;
3945 case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg;
3946 case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg;
3947 case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg;
3948 case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg;
3949 case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg;
3950 case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg;
3951 case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg;
3952 case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg;
3953 case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg;
3954 case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg;
3955 case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg;
3956 case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg;
3957 case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003958 do_SseReRg:
3959 {
3960 HReg argLhi, argLlo, argRhi, argRlo;
3961 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3962 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3963 HReg dstHi = newVRegV(env);
3964 HReg dstLo = newVRegV(env);
3965 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3966 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3967 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi));
3968 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo));
3969 *rHi = dstHi;
3970 *rLo = dstLo;
3971 return;
3972 }
3973
sewardjcc3d2192013-03-27 11:37:33 +00003974 case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift;
3975 case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift;
3976 case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift;
3977 case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift;
3978 case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift;
3979 case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift;
3980 case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift;
3981 case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift;
3982 do_SseShift: {
3983 HReg gregHi, gregLo;
3984 iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1);
3985 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3986 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3987 HReg ereg = newVRegV(env);
3988 HReg dstHi = newVRegV(env);
3989 HReg dstLo = newVRegV(env);
3990 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3991 addInstr(env, AMD64Instr_Push(rmi));
3992 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3993 addInstr(env, mk_vMOVsd_RR(gregHi, dstHi));
3994 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi));
3995 addInstr(env, mk_vMOVsd_RR(gregLo, dstLo));
3996 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo));
3997 add_to_rsp(env, 16);
3998 *rHi = dstHi;
3999 *rLo = dstLo;
4000 return;
4001 }
4002
sewardj4b1cc832012-06-13 11:10:20 +00004003 case Iop_V128HLtoV256: {
4004 *rHi = iselVecExpr(env, e->Iex.Binop.arg1);
4005 *rLo = iselVecExpr(env, e->Iex.Binop.arg2);
4006 return;
4007 }
4008
sewardjcc3d2192013-03-27 11:37:33 +00004009 case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4;
4010 goto do_SseAssistedBinary;
4011 case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4;
4012 goto do_SseAssistedBinary;
4013 case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4;
4014 goto do_SseAssistedBinary;
4015 case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4;
4016 goto do_SseAssistedBinary;
4017 case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4;
4018 goto do_SseAssistedBinary;
4019 case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8;
4020 goto do_SseAssistedBinary;
4021 case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8;
4022 goto do_SseAssistedBinary;
4023 case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16;
4024 goto do_SseAssistedBinary;
4025 case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16;
4026 goto do_SseAssistedBinary;
4027 case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2;
4028 goto do_SseAssistedBinary;
4029 case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2;
4030 goto do_SseAssistedBinary;
4031 do_SseAssistedBinary: {
4032 /* RRRufff! RRRufff code is what we're generating here. Oh
4033 well. */
4034 vassert(fn != 0);
4035 HReg dstHi = newVRegV(env);
4036 HReg dstLo = newVRegV(env);
4037 HReg argLhi, argLlo, argRhi, argRlo;
4038 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4039 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4040 HReg argp = newVRegI(env);
4041 /* subq $160, %rsp -- make a space*/
4042 sub_from_rsp(env, 160);
4043 /* leaq 48(%rsp), %r_argp -- point into it */
4044 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4045 argp));
4046 /* andq $-16, %r_argp -- 16-align the pointer */
4047 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4048 AMD64RMI_Imm( ~(UInt)15 ),
4049 argp));
4050 /* Prepare 3 arg regs:
4051 leaq 0(%r_argp), %rdi
4052 leaq 16(%r_argp), %rsi
4053 leaq 32(%r_argp), %rdx
4054 */
4055 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4056 hregAMD64_RDI()));
4057 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
4058 hregAMD64_RSI()));
4059 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4060 hregAMD64_RDX()));
4061 /* Store the two high args, at (%rsi) and (%rdx):
4062 movupd %argLhi, 0(%rsi)
4063 movupd %argRhi, 0(%rdx)
4064 */
4065 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4066 AMD64AMode_IR(0, hregAMD64_RSI())));
4067 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4068 AMD64AMode_IR(0, hregAMD64_RDX())));
4069 /* Store the two low args, at 48(%rsi) and 48(%rdx):
4070 movupd %argLlo, 48(%rsi)
4071 movupd %argRlo, 48(%rdx)
4072 */
4073 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4074 AMD64AMode_IR(48, hregAMD64_RSI())));
4075 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4076 AMD64AMode_IR(48, hregAMD64_RDX())));
4077 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004078 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4079 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004080 /* Prepare 3 arg regs:
4081 leaq 48(%r_argp), %rdi
4082 leaq 64(%r_argp), %rsi
4083 leaq 80(%r_argp), %rdx
4084 */
4085 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp),
4086 hregAMD64_RDI()));
4087 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4088 hregAMD64_RSI()));
4089 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp),
4090 hregAMD64_RDX()));
4091 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004092 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4093 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004094 /* fetch the result from memory, using %r_argp, which the
4095 register allocator will keep alive across the call. */
4096 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4097 AMD64AMode_IR(0, argp)));
4098 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4099 AMD64AMode_IR(48, argp)));
4100 /* and finally, clear the space */
4101 add_to_rsp(env, 160);
4102 *rHi = dstHi;
4103 *rLo = dstLo;
4104 return;
4105 }
4106
4107 case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8;
4108 goto do_SseAssistedBinary256;
4109 do_SseAssistedBinary256: {
4110 /* RRRufff! RRRufff code is what we're generating here. Oh
4111 well. */
4112 vassert(fn != 0);
4113 HReg dstHi = newVRegV(env);
4114 HReg dstLo = newVRegV(env);
4115 HReg argLhi, argLlo, argRhi, argRlo;
4116 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4117 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4118 HReg argp = newVRegI(env);
4119 /* subq $160, %rsp -- make a space*/
4120 sub_from_rsp(env, 160);
4121 /* leaq 48(%rsp), %r_argp -- point into it */
4122 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4123 argp));
4124 /* andq $-16, %r_argp -- 16-align the pointer */
4125 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4126 AMD64RMI_Imm( ~(UInt)15 ),
4127 argp));
4128 /* Prepare 3 arg regs:
4129 leaq 0(%r_argp), %rdi
4130 leaq 32(%r_argp), %rsi
4131 leaq 64(%r_argp), %rdx
4132 */
4133 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4134 hregAMD64_RDI()));
4135 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4136 hregAMD64_RSI()));
4137 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4138 hregAMD64_RDX()));
4139 /* Store the two args, at (%rsi) and (%rdx):
4140 movupd %argLlo, 0(%rsi)
4141 movupd %argLhi, 16(%rsi)
4142 movupd %argRlo, 0(%rdx)
4143 movupd %argRhi, 16(%rdx)
4144 */
4145 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4146 AMD64AMode_IR(0, hregAMD64_RSI())));
4147 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4148 AMD64AMode_IR(16, hregAMD64_RSI())));
4149 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4150 AMD64AMode_IR(0, hregAMD64_RDX())));
4151 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4152 AMD64AMode_IR(16, hregAMD64_RDX())));
4153 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004154 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4155 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004156 /* fetch the result from memory, using %r_argp, which the
4157 register allocator will keep alive across the call. */
4158 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4159 AMD64AMode_IR(0, argp)));
4160 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4161 AMD64AMode_IR(16, argp)));
4162 /* and finally, clear the space */
4163 add_to_rsp(env, 160);
4164 *rHi = dstHi;
4165 *rLo = dstLo;
4166 return;
4167 }
4168
sewardj56c30312012-06-12 08:45:39 +00004169 default:
4170 break;
4171 } /* switch (e->Iex.Binop.op) */
4172 } /* if (e->tag == Iex_Binop) */
4173
sewardj9571dc02014-01-26 18:34:23 +00004174 if (e->tag == Iex_Triop) {
4175 IRTriop *triop = e->Iex.Triop.details;
4176 switch (triop->op) {
4177
4178 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm;
4179 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm;
4180 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm;
4181 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm;
4182 do_64Fx4_w_rm:
4183 {
4184 HReg argLhi, argLlo, argRhi, argRlo;
4185 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
4186 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
4187 HReg dstHi = newVRegV(env);
4188 HReg dstLo = newVRegV(env);
4189 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
4190 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
4191 /* XXXROUNDINGFIXME */
4192 /* set roundingmode here */
4193 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
4194 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
4195 *rHi = dstHi;
4196 *rLo = dstLo;
4197 return;
4198 }
4199
4200 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm;
4201 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm;
4202 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm;
4203 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm;
4204 do_32Fx8_w_rm:
4205 {
4206 HReg argLhi, argLlo, argRhi, argRlo;
4207 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
4208 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
4209 HReg dstHi = newVRegV(env);
4210 HReg dstLo = newVRegV(env);
4211 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
4212 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
4213 /* XXXROUNDINGFIXME */
4214 /* set roundingmode here */
4215 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
4216 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
4217 *rHi = dstHi;
4218 *rLo = dstLo;
4219 return;
4220 }
4221
4222 default:
4223 break;
4224 } /* switch (triop->op) */
4225 } /* if (e->tag == Iex_Triop) */
4226
4227
florian96d7cc32012-06-01 20:41:24 +00004228 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) {
sewardjc4530ae2012-05-21 10:18:49 +00004229 HReg rsp = hregAMD64_RSP();
4230 HReg vHi = newVRegV(env);
4231 HReg vLo = newVRegV(env);
4232 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
4233 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
4234 /* arg1 is the most significant (Q3), arg4 the least (Q0) */
4235 /* Get all the args into regs, before messing with the stack. */
florian96d7cc32012-06-01 20:41:24 +00004236 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1);
4237 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2);
4238 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3);
4239 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4);
sewardjc4530ae2012-05-21 10:18:49 +00004240 /* less significant lane (Q2) at the lower address (-16(rsp)) */
4241 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp));
4242 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp));
4243 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp));
4244 /* and then the lower half .. */
4245 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp));
4246 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp));
4247 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp));
4248 *rHi = vHi;
4249 *rLo = vLo;
4250 return;
4251 }
4252
sewardjcc3d2192013-03-27 11:37:33 +00004253 if (e->tag == Iex_ITE) {
4254 HReg r1Hi, r1Lo, r0Hi, r0Lo;
4255 iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue);
4256 iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse);
4257 HReg dstHi = newVRegV(env);
4258 HReg dstLo = newVRegV(env);
4259 addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi));
4260 addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo));
4261 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
4262 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi));
4263 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo));
4264 *rHi = dstHi;
4265 *rLo = dstLo;
4266 return;
4267 }
4268
sewardjc4530ae2012-05-21 10:18:49 +00004269 //avx_fail:
4270 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n",
4271 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
4272 ppIRExpr(e);
4273 vpanic("iselDVecExpr_wrk");
4274}
4275
4276
4277/*---------------------------------------------------------*/
sewardjc33671d2005-02-01 20:30:00 +00004278/*--- ISEL: Statements ---*/
4279/*---------------------------------------------------------*/
4280
4281static void iselStmt ( ISelEnv* env, IRStmt* stmt )
4282{
4283 if (vex_traceflags & VEX_TRACE_VCODE) {
4284 vex_printf("\n-- ");
4285 ppIRStmt(stmt);
4286 vex_printf("\n");
4287 }
4288
4289 switch (stmt->tag) {
4290
sewardj05b3b6a2005-02-04 01:44:33 +00004291 /* --------- STORE --------- */
sewardjaf1ceca2005-06-30 23:31:27 +00004292 case Ist_Store: {
sewardje9d8a262009-07-01 08:06:34 +00004293 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
4294 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
4295 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00004296
sewardje768e922009-11-26 17:17:37 +00004297 if (tya != Ity_I64 || end != Iend_LE)
sewardjaf1ceca2005-06-30 23:31:27 +00004298 goto stmt_fail;
4299
sewardj31191072005-02-05 18:24:47 +00004300 if (tyd == Ity_I64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004301 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004302 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
sewardj31191072005-02-05 18:24:47 +00004303 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
4304 return;
4305 }
sewardj05b3b6a2005-02-04 01:44:33 +00004306 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004307 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004308 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
sewardj428fabd2005-03-21 03:11:17 +00004309 addInstr(env, AMD64Instr_Store(
4310 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
4311 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00004312 return;
4313 }
sewardj8d965312005-02-25 02:48:47 +00004314 if (tyd == Ity_F64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004315 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004316 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
sewardj8d965312005-02-25 02:48:47 +00004317 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
4318 return;
4319 }
sewardjc49ce232005-02-25 13:03:03 +00004320 if (tyd == Ity_F32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004321 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004322 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
sewardjc49ce232005-02-25 13:03:03 +00004323 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
4324 return;
4325 }
sewardj0852a132005-02-21 08:28:46 +00004326 if (tyd == Ity_V128) {
sewardjbf0d86c2007-11-26 23:18:52 +00004327 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004328 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
sewardj18303862005-02-21 12:36:54 +00004329 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00004330 return;
4331 }
sewardjc4530ae2012-05-21 10:18:49 +00004332 if (tyd == Ity_V256) {
4333 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
4334 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
4335 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
4336 HReg vHi, vLo;
4337 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data);
4338 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4339 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4340 return;
4341 }
sewardj0852a132005-02-21 08:28:46 +00004342 break;
sewardj05b3b6a2005-02-04 01:44:33 +00004343 }
sewardjf67eadf2005-02-03 03:53:52 +00004344
4345 /* --------- PUT --------- */
4346 case Ist_Put: {
4347 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4348 if (ty == Ity_I64) {
4349 /* We're going to write to memory, so compute the RHS into an
4350 AMD64RI. */
4351 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
4352 addInstr(env,
4353 AMD64Instr_Alu64M(
4354 Aalu_MOV,
4355 ri,
4356 AMD64AMode_IR(stmt->Ist.Put.offset,
4357 hregAMD64_RBP())
4358 ));
4359 return;
4360 }
sewardjf67eadf2005-02-03 03:53:52 +00004361 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
4362 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4363 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00004364 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00004365 r,
4366 AMD64AMode_IR(stmt->Ist.Put.offset,
4367 hregAMD64_RBP())));
4368 return;
4369 }
sewardj8d965312005-02-25 02:48:47 +00004370 if (ty == Ity_F32) {
4371 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4372 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
4373 set_SSE_rounding_default(env); /* paranoia */
4374 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
4375 return;
4376 }
sewardj1a01e652005-02-23 11:39:21 +00004377 if (ty == Ity_F64) {
4378 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4379 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
4380 hregAMD64_RBP() );
4381 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
4382 return;
4383 }
sewardjc4530ae2012-05-21 10:18:49 +00004384 if (ty == Ity_V128) {
4385 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4386 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
4387 hregAMD64_RBP());
4388 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
4389 return;
4390 }
4391 if (ty == Ity_V256) {
4392 HReg vHi, vLo;
4393 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data);
4394 HReg rbp = hregAMD64_RBP();
4395 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp);
4396 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp);
4397 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4398 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4399 return;
4400 }
sewardjf67eadf2005-02-03 03:53:52 +00004401 break;
4402 }
4403
sewardj8d965312005-02-25 02:48:47 +00004404 /* --------- Indexed PUT --------- */
4405 case Ist_PutI: {
floriand6f38b32012-05-31 15:46:18 +00004406 IRPutI *puti = stmt->Ist.PutI.details;
4407
sewardj8d965312005-02-25 02:48:47 +00004408 AMD64AMode* am
4409 = genGuestArrayOffset(
floriand6f38b32012-05-31 15:46:18 +00004410 env, puti->descr,
4411 puti->ix, puti->bias );
sewardj8d965312005-02-25 02:48:47 +00004412
floriand6f38b32012-05-31 15:46:18 +00004413 IRType ty = typeOfIRExpr(env->type_env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004414 if (ty == Ity_F64) {
floriand6f38b32012-05-31 15:46:18 +00004415 HReg val = iselDblExpr(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004416 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
4417 return;
4418 }
4419 if (ty == Ity_I8) {
floriand6f38b32012-05-31 15:46:18 +00004420 HReg r = iselIntExpr_R(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004421 addInstr(env, AMD64Instr_Store( 1, r, am ));
4422 return;
4423 }
sewardj1e015d82005-04-23 23:41:46 +00004424 if (ty == Ity_I64) {
floriand6f38b32012-05-31 15:46:18 +00004425 AMD64RI* ri = iselIntExpr_RI(env, puti->data);
sewardj1e015d82005-04-23 23:41:46 +00004426 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
4427 return;
4428 }
sewardj8d965312005-02-25 02:48:47 +00004429 break;
4430 }
sewardj614b3fb2005-02-02 02:16:03 +00004431
4432 /* --------- TMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +00004433 case Ist_WrTmp: {
4434 IRTemp tmp = stmt->Ist.WrTmp.tmp;
sewardj614b3fb2005-02-02 02:16:03 +00004435 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj6ce1a232007-03-31 19:12:38 +00004436
4437 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
4438 compute it into an AMode and then use LEA. This usually
4439 produces fewer instructions, often because (for memcheck
4440 created IR) we get t = address-expression, (t is later used
4441 twice) and so doing this naturally turns address-expression
4442 back into an AMD64 amode. */
4443 if (ty == Ity_I64
4444 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4445 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
4446 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4447 HReg dst = lookupIRTemp(env, tmp);
4448 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
4449 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4450 value into a register. Just emit a normal reg-reg move
4451 so reg-alloc can coalesce it away in the usual way. */
4452 HReg src = am->Aam.IR.reg;
4453 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
4454 } else {
4455 addInstr(env, AMD64Instr_Lea64(am,dst));
4456 }
4457 return;
4458 }
4459
sewardj9b967672005-02-08 11:13:09 +00004460 if (ty == Ity_I64 || ty == Ity_I32
4461 || ty == Ity_I16 || ty == Ity_I8) {
sewardjdd40fdf2006-12-24 02:20:24 +00004462 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
sewardj614b3fb2005-02-02 02:16:03 +00004463 HReg dst = lookupIRTemp(env, tmp);
4464 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
4465 return;
4466 }
sewardj9b967672005-02-08 11:13:09 +00004467 if (ty == Ity_I128) {
4468 HReg rHi, rLo, dstHi, dstLo;
sewardjdd40fdf2006-12-24 02:20:24 +00004469 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
sewardjc4530ae2012-05-21 10:18:49 +00004470 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
sewardj9b967672005-02-08 11:13:09 +00004471 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4472 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4473 return;
4474 }
sewardja5bd0af2005-03-24 20:40:12 +00004475 if (ty == Ity_I1) {
sewardjdd40fdf2006-12-24 02:20:24 +00004476 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
sewardja5bd0af2005-03-24 20:40:12 +00004477 HReg dst = lookupIRTemp(env, tmp);
4478 addInstr(env, AMD64Instr_Set64(cond, dst));
4479 return;
4480 }
sewardj18303862005-02-21 12:36:54 +00004481 if (ty == Ity_F64) {
4482 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004483 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004484 addInstr(env, mk_vMOVsd_RR(src, dst));
4485 return;
4486 }
sewardjc49ce232005-02-25 13:03:03 +00004487 if (ty == Ity_F32) {
4488 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004489 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
sewardjc49ce232005-02-25 13:03:03 +00004490 addInstr(env, mk_vMOVsd_RR(src, dst));
4491 return;
4492 }
sewardj0852a132005-02-21 08:28:46 +00004493 if (ty == Ity_V128) {
4494 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004495 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004496 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00004497 return;
4498 }
sewardjc4530ae2012-05-21 10:18:49 +00004499 if (ty == Ity_V256) {
4500 HReg rHi, rLo, dstHi, dstLo;
4501 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4502 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4503 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) );
4504 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) );
4505 return;
4506 }
sewardj614b3fb2005-02-02 02:16:03 +00004507 break;
4508 }
4509
sewardjd0a12df2005-02-10 02:07:43 +00004510 /* --------- Call to DIRTY helper --------- */
4511 case Ist_Dirty: {
sewardjd0a12df2005-02-10 02:07:43 +00004512 IRDirty* d = stmt->Ist.Dirty.details;
sewardjd0a12df2005-02-10 02:07:43 +00004513
sewardjcfe046e2013-01-17 14:23:53 +00004514 /* Figure out the return type, if any. */
4515 IRType retty = Ity_INVALID;
4516 if (d->tmp != IRTemp_INVALID)
4517 retty = typeOfIRTemp(env->type_env, d->tmp);
4518
sewardj74142b82013-08-08 10:28:59 +00004519 /* Throw out any return types we don't know about. */
4520 Bool retty_ok = False;
sewardjcfe046e2013-01-17 14:23:53 +00004521 switch (retty) {
4522 case Ity_INVALID: /* function doesn't return anything */
sewardj74142b82013-08-08 10:28:59 +00004523 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj82cc37c2013-08-16 08:32:15 +00004524 case Ity_V128: case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +00004525 retty_ok = True; break;
sewardjcfe046e2013-01-17 14:23:53 +00004526 default:
4527 break;
4528 }
sewardj74142b82013-08-08 10:28:59 +00004529 if (!retty_ok)
sewardjcfe046e2013-01-17 14:23:53 +00004530 break; /* will go to stmt_fail: */
4531
sewardj74142b82013-08-08 10:28:59 +00004532 /* Marshal args, do the call, and set the return value to
4533 0x555..555 if this is a conditional call that returns a value
4534 and the call is skipped. */
4535 UInt addToSp = 0;
4536 RetLoc rloc = mk_RetLoc_INVALID();
4537 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4538 vassert(is_sane_RetLoc(rloc));
sewardjd0a12df2005-02-10 02:07:43 +00004539
4540 /* Now figure out what to do with the returned value, if any. */
sewardj74142b82013-08-08 10:28:59 +00004541 switch (retty) {
4542 case Ity_INVALID: {
4543 /* No return value. Nothing to do. */
4544 vassert(d->tmp == IRTemp_INVALID);
4545 vassert(rloc.pri == RLPri_None);
4546 vassert(addToSp == 0);
4547 return;
4548 }
4549 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4550 /* The returned value is in %rax. Park it in the register
4551 associated with tmp. */
4552 vassert(rloc.pri == RLPri_Int);
4553 vassert(addToSp == 0);
4554 HReg dst = lookupIRTemp(env, d->tmp);
4555 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
4556 return;
4557 }
4558 case Ity_V128: {
sewardj54eea4e2013-09-02 13:17:49 +00004559 /* The returned value is on the stack, and rloc.spOff
4560 tells us where. Fish it off the stack and then move
4561 the stack pointer upwards to clear it, as directed by
sewardj74142b82013-08-08 10:28:59 +00004562 doHelperCall. */
4563 vassert(rloc.pri == RLPri_V128SpRel);
4564 vassert(addToSp >= 16);
4565 HReg dst = lookupIRTemp(env, d->tmp);
4566 AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4567 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
4568 add_to_rsp(env, addToSp);
4569 return;
4570 }
sewardj82cc37c2013-08-16 08:32:15 +00004571 case Ity_V256: {
4572 /* See comments for Ity_V128. */
4573 vassert(rloc.pri == RLPri_V256SpRel);
4574 vassert(addToSp >= 32);
4575 HReg dstLo, dstHi;
4576 lookupIRTempPair(&dstHi, &dstLo, env, d->tmp);
4577 AMD64AMode* amLo = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4578 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstLo, amLo ));
4579 AMD64AMode* amHi = AMD64AMode_IR(rloc.spOff+16, hregAMD64_RSP());
4580 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstHi, amHi ));
4581 add_to_rsp(env, addToSp);
4582 return;
4583 }
sewardj74142b82013-08-08 10:28:59 +00004584 default:
4585 /*NOTREACHED*/
4586 vassert(0);
sewardjd0a12df2005-02-10 02:07:43 +00004587 }
4588 break;
4589 }
4590
4591 /* --------- MEM FENCE --------- */
sewardjc4356f02007-11-09 21:15:04 +00004592 case Ist_MBE:
4593 switch (stmt->Ist.MBE.event) {
4594 case Imbe_Fence:
4595 addInstr(env, AMD64Instr_MFence());
4596 return;
sewardjc4356f02007-11-09 21:15:04 +00004597 default:
4598 break;
4599 }
4600 break;
sewardjf8c37f72005-02-07 18:55:29 +00004601
sewardje9d8a262009-07-01 08:06:34 +00004602 /* --------- ACAS --------- */
4603 case Ist_CAS:
4604 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4605 /* "normal" singleton CAS */
4606 UChar sz;
4607 IRCAS* cas = stmt->Ist.CAS.details;
4608 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4609 /* get: cas->expd into %rax, and cas->data into %rbx */
4610 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4611 HReg rData = iselIntExpr_R(env, cas->dataLo);
4612 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4613 HReg rOld = lookupIRTemp(env, cas->oldLo);
4614 vassert(cas->expdHi == NULL);
4615 vassert(cas->dataHi == NULL);
4616 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
4617 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
4618 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
4619 switch (ty) {
4620 case Ity_I64: sz = 8; break;
4621 case Ity_I32: sz = 4; break;
4622 case Ity_I16: sz = 2; break;
4623 case Ity_I8: sz = 1; break;
4624 default: goto unhandled_cas;
4625 }
4626 addInstr(env, AMD64Instr_ACAS(am, sz));
4627 addInstr(env, AMD64Instr_CMov64(
4628 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
4629 return;
4630 } else {
4631 /* double CAS */
4632 UChar sz;
4633 IRCAS* cas = stmt->Ist.CAS.details;
4634 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4635 /* only 32-bit and 64-bit allowed in this case */
4636 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4637 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4638 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4639 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4640 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4641 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4642 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4643 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4644 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4645 switch (ty) {
4646 case Ity_I64:
4647 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
4648 goto unhandled_cas; /* we'd have to generate
4649 cmpxchg16b, but the host
4650 doesn't support that */
4651 sz = 8;
4652 break;
4653 case Ity_I32:
4654 sz = 4;
4655 break;
4656 default:
4657 goto unhandled_cas;
4658 }
4659 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4660 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4661 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
4662 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
4663 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
4664 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
4665 addInstr(env, AMD64Instr_DACAS(am, sz));
4666 addInstr(env,
4667 AMD64Instr_CMov64(
4668 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
4669 addInstr(env,
4670 AMD64Instr_CMov64(
4671 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
4672 return;
4673 }
4674 unhandled_cas:
4675 break;
4676
sewardjd20b2902005-03-22 00:15:00 +00004677 /* --------- INSTR MARK --------- */
4678 /* Doesn't generate any executable code ... */
4679 case Ist_IMark:
4680 return;
4681
sewardj5a9ffab2005-05-12 17:55:01 +00004682 /* --------- ABI HINT --------- */
4683 /* These have no meaning (denotation in the IR) and so we ignore
4684 them ... if any actually made it this far. */
4685 case Ist_AbiHint:
4686 return;
4687
sewardjd20b2902005-03-22 00:15:00 +00004688 /* --------- NO-OP --------- */
4689 case Ist_NoOp:
4690 return;
4691
sewardjf8c37f72005-02-07 18:55:29 +00004692 /* --------- EXIT --------- */
4693 case Ist_Exit: {
sewardjf8c37f72005-02-07 18:55:29 +00004694 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4695 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00004696
4697 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4698 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
4699 hregAMD64_RBP());
4700
4701 /* Case: boring transfer to known address */
4702 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4703 if (env->chainingAllowed) {
4704 /* .. almost always true .. */
4705 /* Skip the event check at the dst if this is a forwards
4706 edge. */
4707 Bool toFastEP
4708 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4709 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4710 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4711 amRIP, cc, toFastEP));
4712 } else {
4713 /* .. very occasionally .. */
4714 /* We can't use chaining, so ask for an assisted transfer,
4715 as that's the only alternative that is allowable. */
4716 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4717 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
4718 }
4719 return;
4720 }
4721
4722 /* Case: assisted transfer to arbitrary address */
4723 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004724 /* Keep this list in sync with that in iselNext below */
4725 case Ijk_ClientReq:
4726 case Ijk_EmWarn:
4727 case Ijk_NoDecode:
4728 case Ijk_NoRedir:
4729 case Ijk_SigSEGV:
4730 case Ijk_SigTRAP:
4731 case Ijk_Sys_syscall:
sewardj05f5e012014-05-04 10:52:11 +00004732 case Ijk_InvalICache:
sewardj2f6902b2012-04-23 09:48:14 +00004733 case Ijk_Yield:
4734 {
sewardjc6f970f2012-04-02 21:54:49 +00004735 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4736 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
4737 return;
4738 }
4739 default:
4740 break;
4741 }
4742
4743 /* Do we ever expect to see any other kind? */
4744 goto stmt_fail;
sewardjf8c37f72005-02-07 18:55:29 +00004745 }
sewardjc33671d2005-02-01 20:30:00 +00004746
4747 default: break;
4748 }
sewardjaf1ceca2005-06-30 23:31:27 +00004749 stmt_fail:
sewardjc33671d2005-02-01 20:30:00 +00004750 ppIRStmt(stmt);
4751 vpanic("iselStmt(amd64)");
4752}
4753
4754
4755/*---------------------------------------------------------*/
4756/*--- ISEL: Basic block terminators (Nexts) ---*/
4757/*---------------------------------------------------------*/
4758
sewardjc6f970f2012-04-02 21:54:49 +00004759static void iselNext ( ISelEnv* env,
4760 IRExpr* next, IRJumpKind jk, Int offsIP )
sewardjf67eadf2005-02-03 03:53:52 +00004761{
sewardjf67eadf2005-02-03 03:53:52 +00004762 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00004763 vex_printf( "\n-- PUT(%d) = ", offsIP);
4764 ppIRExpr( next );
4765 vex_printf( "; exit-");
sewardjf67eadf2005-02-03 03:53:52 +00004766 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00004767 vex_printf( "\n");
sewardjf67eadf2005-02-03 03:53:52 +00004768 }
sewardjc6f970f2012-04-02 21:54:49 +00004769
4770 /* Case: boring transfer to known address */
4771 if (next->tag == Iex_Const) {
4772 IRConst* cdst = next->Iex.Const.con;
4773 vassert(cdst->tag == Ico_U64);
4774 if (jk == Ijk_Boring || jk == Ijk_Call) {
4775 /* Boring transfer to known address */
4776 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4777 if (env->chainingAllowed) {
4778 /* .. almost always true .. */
4779 /* Skip the event check at the dst if this is a forwards
4780 edge. */
4781 Bool toFastEP
4782 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4783 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4784 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
4785 amRIP, Acc_ALWAYS,
4786 toFastEP));
4787 } else {
4788 /* .. very occasionally .. */
4789 /* We can't use chaining, so ask for an indirect transfer,
4790 as that's the cheapest alternative that is
4791 allowable. */
4792 HReg r = iselIntExpr_R(env, next);
4793 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4794 Ijk_Boring));
4795 }
4796 return;
4797 }
4798 }
4799
4800 /* Case: call/return (==boring) transfer to any address */
4801 switch (jk) {
4802 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4803 HReg r = iselIntExpr_R(env, next);
4804 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4805 if (env->chainingAllowed) {
4806 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
4807 } else {
4808 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4809 Ijk_Boring));
4810 }
4811 return;
4812 }
4813 default:
4814 break;
4815 }
4816
sewardj2f6902b2012-04-23 09:48:14 +00004817 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00004818 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004819 /* Keep this list in sync with that for Ist_Exit above */
4820 case Ijk_ClientReq:
4821 case Ijk_EmWarn:
sewardj3d0e38e2012-04-21 07:38:29 +00004822 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00004823 case Ijk_NoRedir:
4824 case Ijk_SigSEGV:
4825 case Ijk_SigTRAP:
4826 case Ijk_Sys_syscall:
sewardj05f5e012014-05-04 10:52:11 +00004827 case Ijk_InvalICache:
sewardj2f6902b2012-04-23 09:48:14 +00004828 case Ijk_Yield: {
sewardjc6f970f2012-04-02 21:54:49 +00004829 HReg r = iselIntExpr_R(env, next);
4830 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4831 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
4832 return;
4833 }
4834 default:
4835 break;
4836 }
4837
4838 vex_printf( "\n-- PUT(%d) = ", offsIP);
4839 ppIRExpr( next );
4840 vex_printf( "; exit-");
4841 ppIRJumpKind(jk);
4842 vex_printf( "\n");
4843 vassert(0); // are we expecting any other kind?
sewardjc33671d2005-02-01 20:30:00 +00004844}
4845
4846
4847/*---------------------------------------------------------*/
4848/*--- Insn selector top-level ---*/
4849/*---------------------------------------------------------*/
4850
sewardjdd40fdf2006-12-24 02:20:24 +00004851/* Translate an entire SB to amd64 code. */
sewardjc33671d2005-02-01 20:30:00 +00004852
sewardjc6f970f2012-04-02 21:54:49 +00004853HInstrArray* iselSB_AMD64 ( IRSB* bb,
4854 VexArch arch_host,
floriand8c64e02014-10-08 08:54:44 +00004855 const VexArchInfo* archinfo_host,
4856 const VexAbiInfo* vbi/*UNUSED*/,
sewardjc6f970f2012-04-02 21:54:49 +00004857 Int offs_Host_EvC_Counter,
4858 Int offs_Host_EvC_FailAddr,
4859 Bool chainingAllowed,
4860 Bool addProfInc,
4861 Addr64 max_ga )
sewardjc33671d2005-02-01 20:30:00 +00004862{
sewardjc6f970f2012-04-02 21:54:49 +00004863 Int i, j;
4864 HReg hreg, hregHI;
4865 ISelEnv* env;
4866 UInt hwcaps_host = archinfo_host->hwcaps;
4867 AMD64AMode *amCounter, *amFailAddr;
sewardjc33671d2005-02-01 20:30:00 +00004868
4869 /* sanity ... */
sewardj8f073592006-05-01 02:14:17 +00004870 vassert(arch_host == VexArchAMD64);
sewardj536fbab2010-07-29 15:39:05 +00004871 vassert(0 == (hwcaps_host
4872 & ~(VEX_HWCAPS_AMD64_SSE3
4873 | VEX_HWCAPS_AMD64_CX16
sewardjf350a422012-04-26 14:16:52 +00004874 | VEX_HWCAPS_AMD64_LZCNT
sewardj818c7302013-03-26 13:53:18 +00004875 | VEX_HWCAPS_AMD64_AVX
sewardjcc3d2192013-03-27 11:37:33 +00004876 | VEX_HWCAPS_AMD64_RDTSCP
4877 | VEX_HWCAPS_AMD64_BMI
4878 | VEX_HWCAPS_AMD64_AVX2)));
sewardjc33671d2005-02-01 20:30:00 +00004879
sewardj9b769162014-07-24 12:42:03 +00004880 /* Check that the host's endianness is as expected. */
4881 vassert(archinfo_host->endness == VexEndnessLE);
4882
sewardjc33671d2005-02-01 20:30:00 +00004883 /* Make up an initial environment to use. */
sewardj9a036bf2005-03-14 18:19:08 +00004884 env = LibVEX_Alloc(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00004885 env->vreg_ctr = 0;
4886
4887 /* Set up output code array. */
4888 env->code = newHInstrArray();
4889
4890 /* Copy BB's type env. */
4891 env->type_env = bb->tyenv;
4892
4893 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4894 change as we go along. */
4895 env->n_vregmap = bb->tyenv->types_used;
4896 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardj9b967672005-02-08 11:13:09 +00004897 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00004898
4899 /* and finally ... */
sewardjc6f970f2012-04-02 21:54:49 +00004900 env->chainingAllowed = chainingAllowed;
4901 env->hwcaps = hwcaps_host;
4902 env->max_ga = max_ga;
sewardjc33671d2005-02-01 20:30:00 +00004903
4904 /* For each IR temporary, allocate a suitably-kinded virtual
4905 register. */
4906 j = 0;
4907 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00004908 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00004909 switch (bb->tyenv->types[i]) {
4910 case Ity_I1:
sewardjc4530ae2012-05-21 10:18:49 +00004911 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4912 hreg = mkHReg(j++, HRcInt64, True);
4913 break;
4914 case Ity_I128:
4915 hreg = mkHReg(j++, HRcInt64, True);
4916 hregHI = mkHReg(j++, HRcInt64, True);
4917 break;
sewardjc33671d2005-02-01 20:30:00 +00004918 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00004919 case Ity_F64:
sewardjc4530ae2012-05-21 10:18:49 +00004920 case Ity_V128:
4921 hreg = mkHReg(j++, HRcVec128, True);
4922 break;
4923 case Ity_V256:
4924 hreg = mkHReg(j++, HRcVec128, True);
4925 hregHI = mkHReg(j++, HRcVec128, True);
4926 break;
4927 default:
4928 ppIRType(bb->tyenv->types[i]);
4929 vpanic("iselBB(amd64): IRTemp type");
sewardjc33671d2005-02-01 20:30:00 +00004930 }
4931 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00004932 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00004933 }
4934 env->vreg_ctr = j;
4935
sewardjc6f970f2012-04-02 21:54:49 +00004936 /* The very first instruction must be an event check. */
4937 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
4938 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
4939 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
4940
4941 /* Possibly a block counter increment (for profiling). At this
4942 point we don't know the address of the counter, so just pretend
4943 it is zero. It will have to be patched later, but before this
4944 translation is used, by a call to LibVEX_patchProfCtr. */
4945 if (addProfInc) {
4946 addInstr(env, AMD64Instr_ProfInc());
4947 }
4948
sewardjc33671d2005-02-01 20:30:00 +00004949 /* Ok, finally we can iterate over the statements. */
4950 for (i = 0; i < bb->stmts_used; i++)
4951 if (bb->stmts[i])
sewardjc6f970f2012-04-02 21:54:49 +00004952 iselStmt(env, bb->stmts[i]);
sewardjc33671d2005-02-01 20:30:00 +00004953
sewardjc6f970f2012-04-02 21:54:49 +00004954 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardjc33671d2005-02-01 20:30:00 +00004955
4956 /* record the number of vregs we used. */
4957 env->code->n_vregs = env->vreg_ctr;
4958 return env->code;
4959}
sewardja3e98302005-02-01 15:55:05 +00004960
4961
4962/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00004963/*--- end host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004964/*---------------------------------------------------------------*/