blob: 2ea772187870e3ee6835b2c23f3a6a9973dcf50e [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
sewardj25e54732012-08-05 15:36:51 +000010 Copyright (C) 2004-2012 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
sewardj69d98e32010-06-18 08:17:41 +000045#include "host_generic_simd128.h"
sewardjcc3d2192013-03-27 11:37:33 +000046#include "host_generic_simd256.h"
47#include "host_generic_maddf.h"
sewardjcef7d3e2009-07-02 12:21:59 +000048#include "host_amd64_defs.h"
sewardj1a01e652005-02-23 11:39:21 +000049
50
51/*---------------------------------------------------------*/
52/*--- x87/SSE control word stuff ---*/
53/*---------------------------------------------------------*/
54
55/* Vex-generated code expects to run with the FPU set as follows: all
56 exceptions masked, round-to-nearest, precision = 53 bits. This
57 corresponds to a FPU control word value of 0x027F.
58
59 Similarly the SSE control word (%mxcsr) should be 0x1F80.
60
61 %fpucw and %mxcsr should have these values on entry to
62 Vex-generated code, and should those values should be
63 unchanged at exit.
64*/
65
66#define DEFAULT_FPUCW 0x027F
67
68#define DEFAULT_MXCSR 0x1F80
69
70/* debugging only, do not use */
71/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000072
73
74/*---------------------------------------------------------*/
75/*--- misc helpers ---*/
76/*---------------------------------------------------------*/
77
78/* These are duplicated in guest-amd64/toIR.c */
79static IRExpr* unop ( IROp op, IRExpr* a )
80{
81 return IRExpr_Unop(op, a);
82}
83
84static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
85{
86 return IRExpr_Binop(op, a1, a2);
87}
88
sewardj05b3b6a2005-02-04 01:44:33 +000089static IRExpr* bind ( Int binder )
90{
91 return IRExpr_Binder(binder);
92}
sewardjc33671d2005-02-01 20:30:00 +000093
sewardj009230b2013-01-26 11:47:55 +000094static Bool isZeroU8 ( IRExpr* e )
95{
96 return e->tag == Iex_Const
97 && e->Iex.Const.con->tag == Ico_U8
98 && e->Iex.Const.con->Ico.U8 == 0;
99}
100
sewardjc33671d2005-02-01 20:30:00 +0000101
sewardjc33671d2005-02-01 20:30:00 +0000102/*---------------------------------------------------------*/
103/*--- ISelEnv ---*/
104/*---------------------------------------------------------*/
105
106/* This carries around:
107
108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
109 might encounter. This is computed before insn selection starts,
110 and does not change.
111
112 - A mapping from IRTemp to HReg. This tells the insn selector
113 which virtual register is associated with each IRTemp
114 temporary. This is computed before insn selection starts, and
115 does not change. We expect this mapping to map precisely the
116 same set of IRTemps as the type mapping does.
117
sewardj9b967672005-02-08 11:13:09 +0000118 - vregmap holds the primary register for the IRTemp.
119 - vregmapHI is only used for 128-bit integer-typed
120 IRTemps. It holds the identity of a second
121 64-bit virtual HReg, which holds the high half
122 of the value.
123
sewardjc6f970f2012-04-02 21:54:49 +0000124 - The host subarchitecture we are selecting insns for.
125 This is set at the start and does not change.
126
sewardjc33671d2005-02-01 20:30:00 +0000127 - The code array, that is, the insns selected so far.
128
129 - A counter, for generating new virtual registers.
130
sewardjc6f970f2012-04-02 21:54:49 +0000131 - A Bool for indicating whether we may generate chain-me
132 instructions for control flow transfers, or whether we must use
133 XAssisted.
134
135 - The maximum guest address of any guest insn in this block.
136 Actually, the address of the highest-addressed byte from any insn
137 in this block. Is set at the start and does not change. This is
138 used for detecting jumps which are definitely forward-edges from
139 this block, and therefore can be made (chained) to the fast entry
140 point of the destination, thereby avoiding the destination's
141 event check.
sewardjc33671d2005-02-01 20:30:00 +0000142
143 Note, this is all host-independent. (JRS 20050201: well, kinda
144 ... not completely. Compare with ISelEnv for X86.)
145*/
146
147typedef
148 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000149 /* Constant -- are set at the start and do not change. */
sewardjc33671d2005-02-01 20:30:00 +0000150 IRTypeEnv* type_env;
151
152 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000153 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000154 Int n_vregmap;
155
sewardj5117ce12006-01-27 21:20:15 +0000156 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000157
158 Bool chainingAllowed;
159 Addr64 max_ga;
160
161 /* These are modified as we go along. */
162 HInstrArray* code;
163 Int vreg_ctr;
sewardjc33671d2005-02-01 20:30:00 +0000164 }
165 ISelEnv;
166
167
168static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
169{
170 vassert(tmp >= 0);
171 vassert(tmp < env->n_vregmap);
172 return env->vregmap[tmp];
173}
174
sewardjc4530ae2012-05-21 10:18:49 +0000175static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
176 ISelEnv* env, IRTemp tmp )
sewardj9b967672005-02-08 11:13:09 +0000177{
178 vassert(tmp >= 0);
179 vassert(tmp < env->n_vregmap);
florian79efdc62013-02-11 00:47:35 +0000180 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
sewardj9b967672005-02-08 11:13:09 +0000181 *vrLO = env->vregmap[tmp];
182 *vrHI = env->vregmapHI[tmp];
183}
sewardj614b3fb2005-02-02 02:16:03 +0000184
185static void addInstr ( ISelEnv* env, AMD64Instr* instr )
186{
187 addHInstr(env->code, instr);
188 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjf355f6b2006-08-16 00:23:21 +0000189 ppAMD64Instr(instr, True);
sewardj614b3fb2005-02-02 02:16:03 +0000190 vex_printf("\n");
191 }
192}
193
sewardj8258a8c2005-02-02 03:11:24 +0000194static HReg newVRegI ( ISelEnv* env )
195{
196 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
197 env->vreg_ctr++;
198 return reg;
199}
200
sewardj0852a132005-02-21 08:28:46 +0000201static HReg newVRegV ( ISelEnv* env )
202{
203 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
204 env->vreg_ctr++;
205 return reg;
206}
sewardj614b3fb2005-02-02 02:16:03 +0000207
208
209/*---------------------------------------------------------*/
210/*--- ISEL: Forward declarations ---*/
211/*---------------------------------------------------------*/
212
213/* These are organised as iselXXX and iselXXX_wrk pairs. The
214 iselXXX_wrk do the real work, but are not to be called directly.
215 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
216 checks that all returned registers are virtual. You should not
217 call the _wrk version directly.
218*/
219static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
220static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
221
222static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
223static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
224
225static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
226static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
227
228static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
230
231static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
232static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
233
sewardjc4530ae2012-05-21 10:18:49 +0000234static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000235 ISelEnv* env, IRExpr* e );
sewardjc4530ae2012-05-21 10:18:49 +0000236static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000237 ISelEnv* env, IRExpr* e );
238
sewardj614b3fb2005-02-02 02:16:03 +0000239static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
240static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
241
sewardj18303862005-02-21 12:36:54 +0000242static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000244
sewardj8d965312005-02-25 02:48:47 +0000245static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
246static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000247
sewardj0852a132005-02-21 08:28:46 +0000248static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000250
sewardjc4530ae2012-05-21 10:18:49 +0000251static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
252 ISelEnv* env, IRExpr* e );
253static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
254 ISelEnv* env, IRExpr* e );
255
sewardj614b3fb2005-02-02 02:16:03 +0000256
257/*---------------------------------------------------------*/
258/*--- ISEL: Misc helpers ---*/
259/*---------------------------------------------------------*/
260
261static Bool sane_AMode ( AMD64AMode* am )
262{
263 switch (am->tag) {
264 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000265 return
266 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
267 && (hregIsVirtual(am->Aam.IR.reg)
florian79efdc62013-02-11 00:47:35 +0000268 || sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
sewardj614b3fb2005-02-02 02:16:03 +0000269 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000270 return
271 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
272 && hregIsVirtual(am->Aam.IRRS.base)
273 && hregClass(am->Aam.IRRS.index) == HRcInt64
274 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000275 default:
276 vpanic("sane_AMode: unknown amd64 amode tag");
277 }
278}
279
280
281/* Can the lower 32 bits be signedly widened to produce the whole
282 64-bit value? In other words, are the top 33 bits either all 0 or
283 all 1 ? */
284static Bool fitsIn32Bits ( ULong x )
285{
286 Long y0 = (Long)x;
287 Long y1 = y0;
288 y1 <<= 32;
289 y1 >>=/*s*/ 32;
290 return toBool(x == y1);
291}
292
sewardjeb17e492007-08-25 23:07:44 +0000293/* Is this a 64-bit zero expression? */
294
295static Bool isZeroU64 ( IRExpr* e )
296{
297 return e->tag == Iex_Const
298 && e->Iex.Const.con->tag == Ico_U64
299 && e->Iex.Const.con->Ico.U64 == 0ULL;
300}
301
302static Bool isZeroU32 ( IRExpr* e )
303{
304 return e->tag == Iex_Const
305 && e->Iex.Const.con->tag == Ico_U32
306 && e->Iex.Const.con->Ico.U32 == 0;
307}
sewardj8258a8c2005-02-02 03:11:24 +0000308
309/* Make a int reg-reg move. */
310
311static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
312{
313 vassert(hregClass(src) == HRcInt64);
314 vassert(hregClass(dst) == HRcInt64);
315 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
316}
317
sewardjc4530ae2012-05-21 10:18:49 +0000318/* Make a vector (128 bit) reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000319
sewardj0852a132005-02-21 08:28:46 +0000320static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
321{
322 vassert(hregClass(src) == HRcVec128);
323 vassert(hregClass(dst) == HRcVec128);
324 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
325}
326
327/* Advance/retreat %rsp by n. */
328
329static void add_to_rsp ( ISelEnv* env, Int n )
330{
331 vassert(n > 0 && n < 256 && (n%8) == 0);
332 addInstr(env,
333 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
334 hregAMD64_RSP()));
335}
336
sewardj18303862005-02-21 12:36:54 +0000337static void sub_from_rsp ( ISelEnv* env, Int n )
338{
339 vassert(n > 0 && n < 256 && (n%8) == 0);
340 addInstr(env,
341 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
342 hregAMD64_RSP()));
343}
344
ded403e792010-04-02 14:15:58 +0000345/* Push 64-bit constants on the stack. */
346static void push_uimm64( ISelEnv* env, ULong uimm64 )
347{
348 /* If uimm64 can be expressed as the sign extension of its
349 lower 32 bits, we can do it the easy way. */
350 Long simm64 = (Long)uimm64;
351 if ( simm64 == ((simm64 << 32) >> 32) ) {
352 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
353 } else {
354 HReg tmp = newVRegI(env);
355 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
356 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
357 }
358}
sewardj18303862005-02-21 12:36:54 +0000359
sewardj05b3b6a2005-02-04 01:44:33 +0000360
sewardj4d77a9c2007-08-25 23:21:08 +0000361/* Used only in doHelperCall. If possible, produce a single
362 instruction which computes 'e' into 'dst'. If not possible, return
363 NULL. */
364
365static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
366 HReg dst,
367 IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +0000368{
sewardj74142b82013-08-08 10:28:59 +0000369 /* Per comments in doHelperCall below, appearance of
370 IRExprP__VECRET implies ill-formed IR. */
371 vassert(e != IRExprP__VECRET);
372
373 /* In this case we give out a copy of the BaseBlock pointer. */
374 if (UNLIKELY(e == IRExprP__BBPTR)) {
375 return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
376 }
377
sewardj4d77a9c2007-08-25 23:21:08 +0000378 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
379
380 if (e->tag == Iex_Const) {
381 vassert(e->Iex.Const.con->tag == Ico_U64);
382 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
383 return AMD64Instr_Alu64R(
384 Aalu_MOV,
385 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
386 dst
387 );
388 } else {
389 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
390 }
sewardj05b3b6a2005-02-04 01:44:33 +0000391 }
sewardj4d77a9c2007-08-25 23:21:08 +0000392
393 if (e->tag == Iex_RdTmp) {
394 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
395 return mk_iMOVsd_RR(src, dst);
396 }
397
398 if (e->tag == Iex_Get) {
399 vassert(e->Iex.Get.ty == Ity_I64);
400 return AMD64Instr_Alu64R(
401 Aalu_MOV,
402 AMD64RMI_Mem(
403 AMD64AMode_IR(e->Iex.Get.offset,
404 hregAMD64_RBP())),
405 dst);
406 }
407
408 if (e->tag == Iex_Unop
409 && e->Iex.Unop.op == Iop_32Uto64
410 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
411 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
sewardjca257bc2010-09-08 08:34:52 +0000412 return AMD64Instr_MovxLQ(False, src, dst);
sewardj4d77a9c2007-08-25 23:21:08 +0000413 }
414
415 if (0) { ppIRExpr(e); vex_printf("\n"); }
416
417 return NULL;
sewardj05b3b6a2005-02-04 01:44:33 +0000418}
419
420
sewardj74142b82013-08-08 10:28:59 +0000421/* Do a complete function call. |guard| is a Ity_Bit expression
sewardj05b3b6a2005-02-04 01:44:33 +0000422 indicating whether or not the call happens. If guard==NULL, the
sewardj74142b82013-08-08 10:28:59 +0000423 call is unconditional. |retloc| is set to indicate where the
424 return value is after the call. The caller (of this fn) must
425 generate code to add |stackAdjustAfterCall| to the stack pointer
426 after the call is done. */
sewardj05b3b6a2005-02-04 01:44:33 +0000427
428static
sewardj74142b82013-08-08 10:28:59 +0000429void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
430 /*OUT*/RetLoc* retloc,
431 ISelEnv* env,
432 IRExpr* guard,
433 IRCallee* cee, IRType retTy, IRExpr** args )
sewardj05b3b6a2005-02-04 01:44:33 +0000434{
435 AMD64CondCode cc;
436 HReg argregs[6];
437 HReg tmpregs[6];
sewardj4d77a9c2007-08-25 23:21:08 +0000438 AMD64Instr* fastinstrs[6];
sewardj74142b82013-08-08 10:28:59 +0000439 UInt n_args, i;
440
441 /* Set default returns. We'll update them later if needed. */
442 *stackAdjustAfterCall = 0;
443 *retloc = mk_RetLoc_INVALID();
444
445 /* These are used for cross-checking that IR-level constraints on
446 the use of IRExprP__VECRET and IRExprP__BBPTR are observed. */
447 UInt nVECRETs = 0;
448 UInt nBBPTRs = 0;
sewardj05b3b6a2005-02-04 01:44:33 +0000449
450 /* Marshal args for a call and do the call.
451
sewardj05b3b6a2005-02-04 01:44:33 +0000452 This function only deals with a tiny set of possibilities, which
453 cover all helpers in practice. The restrictions are that only
454 arguments in registers are supported, hence only 6x64 integer
455 bits in total can be passed. In fact the only supported arg
456 type is I64.
457
sewardj74142b82013-08-08 10:28:59 +0000458 The return type can be I{64,32,16,8} or V{128,256}. In the
459 latter two cases, it is expected that |args| will contain the
460 special value IRExprP__VECRET, in which case this routine
461 generates code to allocate space on the stack for the vector
462 return value. Since we are not passing any scalars on the
463 stack, it is enough to preallocate the return space before
464 marshalling any arguments, in this case.
465
466 |args| may also contain IRExprP__BBPTR, in which case the
467 value in %rbp is passed as the corresponding argument.
468
sewardj05b3b6a2005-02-04 01:44:33 +0000469 Generating code which is both efficient and correct when
470 parameters are to be passed in registers is difficult, for the
471 reasons elaborated in detail in comments attached to
472 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
473 of the method described in those comments.
474
475 The problem is split into two cases: the fast scheme and the
476 slow scheme. In the fast scheme, arguments are computed
477 directly into the target (real) registers. This is only safe
478 when we can be sure that computation of each argument will not
479 trash any real registers set by computation of any other
480 argument.
481
482 In the slow scheme, all args are first computed into vregs, and
483 once they are all done, they are moved to the relevant real
484 regs. This always gives correct code, but it also gives a bunch
485 of vreg-to-rreg moves which are usually redundant but are hard
486 for the register allocator to get rid of.
487
488 To decide which scheme to use, all argument expressions are
489 first examined. If they are all so simple that it is clear they
490 will be evaluated without use of any fixed registers, use the
491 fast scheme, else use the slow scheme. Note also that only
492 unconditional calls may use the fast scheme, since having to
493 compute a condition expression could itself trash real
sewardj74142b82013-08-08 10:28:59 +0000494 registers. Note that for simplicity, in the case where
495 IRExprP__VECRET is present, we use the slow scheme. This is
496 motivated by the desire to avoid any possible complexity
497 w.r.t. nested calls.
sewardj05b3b6a2005-02-04 01:44:33 +0000498
499 Note this requires being able to examine an expression and
500 determine whether or not evaluation of it might use a fixed
501 register. That requires knowledge of how the rest of this insn
502 selector works. Currently just the following 3 are regarded as
503 safe -- hopefully they cover the majority of arguments in
504 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
505 */
506
507 /* Note that the cee->regparms field is meaningless on AMD64 host
508 (since there is only one calling convention) and so we always
509 ignore it. */
sewardj05b3b6a2005-02-04 01:44:33 +0000510 n_args = 0;
511 for (i = 0; args[i]; i++)
512 n_args++;
513
sewardj74142b82013-08-08 10:28:59 +0000514 if (n_args > 6)
sewardj05b3b6a2005-02-04 01:44:33 +0000515 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
516
517 argregs[0] = hregAMD64_RDI();
518 argregs[1] = hregAMD64_RSI();
519 argregs[2] = hregAMD64_RDX();
520 argregs[3] = hregAMD64_RCX();
521 argregs[4] = hregAMD64_R8();
522 argregs[5] = hregAMD64_R9();
523
524 tmpregs[0] = tmpregs[1] = tmpregs[2] =
525 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
526
sewardj4d77a9c2007-08-25 23:21:08 +0000527 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
528 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
529
sewardj05b3b6a2005-02-04 01:44:33 +0000530 /* First decide which scheme (slow or fast) is to be used. First
531 assume the fast scheme, and select slow if any contraindications
532 (wow) appear. */
533
sewardj74142b82013-08-08 10:28:59 +0000534 /* We'll need space on the stack for the return value. Avoid
535 possible complications with nested calls by using the slow
536 scheme. */
537 if (retTy == Ity_V128 || retTy == Ity_V256)
538 goto slowscheme;
539
sewardj05b3b6a2005-02-04 01:44:33 +0000540 if (guard) {
541 if (guard->tag == Iex_Const
542 && guard->Iex.Const.con->tag == Ico_U1
543 && guard->Iex.Const.con->Ico.U1 == True) {
544 /* unconditional */
545 } else {
546 /* Not manifestly unconditional -- be conservative. */
sewardj4d77a9c2007-08-25 23:21:08 +0000547 goto slowscheme;
sewardj05b3b6a2005-02-04 01:44:33 +0000548 }
549 }
550
sewardj4d77a9c2007-08-25 23:21:08 +0000551 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
552 use the slow scheme. Because this is tentative, we can't call
553 addInstr (that is, commit to) any instructions until we're
554 handled all the arguments. So park the resulting instructions
555 in a buffer and emit that if we're successful. */
556
557 /* FAST SCHEME */
sewardj74142b82013-08-08 10:28:59 +0000558 /* In this loop, we process args that can be computed into the
559 destination (real) register with a single instruction, without
560 using any fixed regs. That also includes IRExprP__BBPTR, but
561 not IRExprP__VECRET. Indeed, if the IR is well-formed, we can
562 never see IRExprP__VECRET at this point, since the return-type
563 check above should ensure all those cases use the slow scheme
564 instead. */
565 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000566 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000567 IRExpr* arg = args[i];
568 if (LIKELY(!is_IRExprP__VECRET_or_BBPTR(arg))) {
569 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
570 }
571 fastinstrs[i]
572 = iselIntExpr_single_instruction( env, argregs[i], args[i] );
573 if (fastinstrs[i] == NULL)
sewardj4d77a9c2007-08-25 23:21:08 +0000574 goto slowscheme;
sewardj4d77a9c2007-08-25 23:21:08 +0000575 }
576
577 /* Looks like we're in luck. Emit the accumulated instructions and
578 move on to doing the call itself. */
sewardj74142b82013-08-08 10:28:59 +0000579 for (i = 0; i < n_args; i++)
sewardj4d77a9c2007-08-25 23:21:08 +0000580 addInstr(env, fastinstrs[i]);
581
582 /* Fast scheme only applies for unconditional calls. Hence: */
583 cc = Acc_ALWAYS;
584
585 goto handle_call;
586
587
588 /* SLOW SCHEME; move via temporaries */
589 slowscheme:
sewardj74142b82013-08-08 10:28:59 +0000590 {}
sewardjc4530ae2012-05-21 10:18:49 +0000591# if 0 /* debug only */
592 if (n_args > 0) {for (i = 0; args[i]; i++) {
593 ppIRExpr(args[i]); vex_printf(" "); }
594 vex_printf("\n");}
595# endif
sewardj4d77a9c2007-08-25 23:21:08 +0000596
sewardj74142b82013-08-08 10:28:59 +0000597 /* If we have a vector return type, allocate a place for it on the
598 stack and record its address. */
599 HReg r_vecRetAddr = INVALID_HREG;
600 if (retTy == Ity_V128) {
601 r_vecRetAddr = newVRegI(env);
602 sub_from_rsp(env, 16);
603 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
604 }
605 else if (retTy == Ity_V256) {
606 vassert(0); //ATC
607 r_vecRetAddr = newVRegI(env);
608 sub_from_rsp(env, 32);
609 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
sewardj4d77a9c2007-08-25 23:21:08 +0000610 }
611
sewardj74142b82013-08-08 10:28:59 +0000612 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000613 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000614 IRExpr* arg = args[i];
615 if (UNLIKELY(arg == IRExprP__BBPTR)) {
616 tmpregs[i] = newVRegI(env);
617 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
618 nBBPTRs++;
619 }
620 else if (UNLIKELY(arg == IRExprP__VECRET)) {
621 /* We stashed the address of the return slot earlier, so just
622 retrieve it now. */
623 vassert(!hregIsInvalid(r_vecRetAddr));
624 tmpregs[i] = r_vecRetAddr;
625 nVECRETs++;
626 }
627 else {
628 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
629 tmpregs[i] = iselIntExpr_R(env, args[i]);
630 }
sewardj4d77a9c2007-08-25 23:21:08 +0000631 }
632
633 /* Now we can compute the condition. We can't do it earlier
634 because the argument computations could trash the condition
635 codes. Be a bit clever to handle the common case where the
636 guard is 1:Bit. */
637 cc = Acc_ALWAYS;
638 if (guard) {
639 if (guard->tag == Iex_Const
640 && guard->Iex.Const.con->tag == Ico_U1
641 && guard->Iex.Const.con->Ico.U1 == True) {
642 /* unconditional -- do nothing */
643 } else {
644 cc = iselCondCode( env, guard );
sewardj05b3b6a2005-02-04 01:44:33 +0000645 }
646 }
647
sewardj4d77a9c2007-08-25 23:21:08 +0000648 /* Move the args to their final destinations. */
sewardj74142b82013-08-08 10:28:59 +0000649 for (i = 0; i < n_args; i++) {
sewardj4d77a9c2007-08-25 23:21:08 +0000650 /* None of these insns, including any spill code that might
651 be generated, may alter the condition codes. */
652 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
sewardj05b3b6a2005-02-04 01:44:33 +0000653 }
654
sewardj4d77a9c2007-08-25 23:21:08 +0000655
sewardj74142b82013-08-08 10:28:59 +0000656 /* Do final checks, set the return values, and generate the call
657 instruction proper. */
sewardj4d77a9c2007-08-25 23:21:08 +0000658 handle_call:
sewardj74142b82013-08-08 10:28:59 +0000659
660 if (retTy == Ity_V128 || retTy == Ity_V256) {
661 vassert(nVECRETs == 1);
662 } else {
663 vassert(nVECRETs == 0);
664 }
665
666 vassert(nBBPTRs == 0 || nBBPTRs == 1);
667
668 vassert(*stackAdjustAfterCall == 0);
669 vassert(is_RetLoc_INVALID(*retloc));
670 switch (retTy) {
671 case Ity_INVALID:
672 /* Function doesn't return a value. */
673 *retloc = mk_RetLoc_simple(RLPri_None);
674 break;
675 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
676 *retloc = mk_RetLoc_simple(RLPri_Int);
677 break;
678 case Ity_V128:
679 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
680 *stackAdjustAfterCall = 16;
681 break;
682 case Ity_V256:
683 vassert(0); // ATC
684 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
685 *stackAdjustAfterCall = 32;
686 break;
687 default:
688 /* IR can denote other possible return types, but we don't
689 handle those here. */
690 vassert(0);
691 }
692
693 /* Finally, generate the call itself. This needs the *retloc value
694 set in the switch above, which is why it's at the end. */
695 addInstr(env,
696 AMD64Instr_Call(cc, Ptr_to_ULong(cee->addr), n_args, *retloc));
sewardj05b3b6a2005-02-04 01:44:33 +0000697}
698
699
sewardj8d965312005-02-25 02:48:47 +0000700/* Given a guest-state array descriptor, an index expression and a
701 bias, generate an AMD64AMode holding the relevant guest state
702 offset. */
703
704static
sewardjdd40fdf2006-12-24 02:20:24 +0000705AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
sewardj8d965312005-02-25 02:48:47 +0000706 IRExpr* off, Int bias )
707{
708 HReg tmp, roff;
709 Int elemSz = sizeofIRType(descr->elemTy);
710 Int nElems = descr->nElems;
711
712 /* Throw out any cases not generated by an amd64 front end. In
713 theory there might be a day where we need to handle them -- if
714 we ever run non-amd64-guest on amd64 host. */
715
716 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
717 vpanic("genGuestArrayOffset(amd64 host)");
718
719 /* Compute off into a reg, %off. Then return:
720
721 movq %off, %tmp
722 addq $bias, %tmp (if bias != 0)
723 andq %tmp, 7
724 ... base(%rbp, %tmp, shift) ...
725 */
726 tmp = newVRegI(env);
727 roff = iselIntExpr_R(env, off);
728 addInstr(env, mk_iMOVsd_RR(roff, tmp));
729 if (bias != 0) {
730 /* Make sure the bias is sane, in the sense that there are
731 no significant bits above bit 30 in it. */
732 vassert(-10000 < bias && bias < 10000);
733 addInstr(env,
734 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
735 }
736 addInstr(env,
737 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
738 vassert(elemSz == 1 || elemSz == 8);
739 return
740 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
741 elemSz==8 ? 3 : 0);
742}
743
sewardj1a01e652005-02-23 11:39:21 +0000744
745/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
746static
747void set_SSE_rounding_default ( ISelEnv* env )
748{
749 /* pushq $DEFAULT_MXCSR
750 ldmxcsr 0(%rsp)
751 addq $8, %rsp
752 */
753 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
754 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
755 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
756 add_to_rsp(env, 8);
757}
758
sewardj25a85812005-05-08 23:03:48 +0000759/* Mess with the FPU's rounding mode: set to the default rounding mode
760 (DEFAULT_FPUCW). */
761static
762void set_FPU_rounding_default ( ISelEnv* env )
763{
764 /* movq $DEFAULT_FPUCW, -8(%rsp)
765 fldcw -8(%esp)
766 */
767 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
768 addInstr(env, AMD64Instr_Alu64M(
769 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
770 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
771}
sewardj1a01e652005-02-23 11:39:21 +0000772
773
774/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
775 expression denoting a value in the range 0 .. 3, indicating a round
776 mode encoded as per type IRRoundingMode. Set the SSE machinery to
777 have the same rounding.
778*/
779static
780void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
781{
782 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
783 both rounding bits == 0. If that wasn't the case, we couldn't
784 create a new rounding field simply by ORing the new value into
785 place. */
786
787 /* movq $3, %reg
788 andq [[mode]], %reg -- shouldn't be needed; paranoia
789 shlq $13, %reg
790 orq $DEFAULT_MXCSR, %reg
791 pushq %reg
792 ldmxcsr 0(%esp)
793 addq $8, %rsp
794 */
795 HReg reg = newVRegI(env);
796 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
797 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
798 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
799 iselIntExpr_RMI(env, mode), reg));
sewardj501a3392005-05-11 15:37:50 +0000800 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
sewardj1a01e652005-02-23 11:39:21 +0000801 addInstr(env, AMD64Instr_Alu64R(
802 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
803 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
804 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
805 add_to_rsp(env, 8);
806}
807
808
sewardj25a85812005-05-08 23:03:48 +0000809/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
810 expression denoting a value in the range 0 .. 3, indicating a round
811 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
812 the same rounding.
813*/
814static
815void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
816{
817 HReg rrm = iselIntExpr_R(env, mode);
818 HReg rrm2 = newVRegI(env);
819 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
820
821 /* movq %rrm, %rrm2
822 andq $3, %rrm2 -- shouldn't be needed; paranoia
823 shlq $10, %rrm2
824 orq $DEFAULT_FPUCW, %rrm2
825 movq %rrm2, -8(%rsp)
826 fldcw -8(%esp)
827 */
828 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
829 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
sewardj501a3392005-05-11 15:37:50 +0000830 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
sewardj25a85812005-05-08 23:03:48 +0000831 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
832 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
833 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
834 AMD64RI_Reg(rrm2), m8_rsp));
835 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
836}
sewardj8d965312005-02-25 02:48:47 +0000837
838
sewardjac530442005-05-11 16:13:37 +0000839/* Generate all-zeroes into a new vector register.
840*/
841static HReg generate_zeroes_V128 ( ISelEnv* env )
842{
843 HReg dst = newVRegV(env);
844 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
845 return dst;
846}
847
848/* Generate all-ones into a new vector register.
849*/
850static HReg generate_ones_V128 ( ISelEnv* env )
851{
852 HReg dst = newVRegV(env);
853 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
854 return dst;
855}
856
857
sewardj09717342005-05-05 21:34:02 +0000858/* Generate !src into a new vector register. Amazing that there isn't
859 a less crappy way to do this.
sewardj8d965312005-02-25 02:48:47 +0000860*/
861static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
862{
sewardjac530442005-05-11 16:13:37 +0000863 HReg dst = generate_ones_V128(env);
sewardj8d965312005-02-25 02:48:47 +0000864 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
865 return dst;
866}
867
868
sewardjacfbd7d2010-08-17 22:52:08 +0000869/* Expand the given byte into a 64-bit word, by cloning each bit
870 8 times. */
871static ULong bitmask8_to_bytemask64 ( UShort w8 )
872{
873 vassert(w8 == (w8 & 0xFF));
874 ULong w64 = 0;
875 Int i;
876 for (i = 0; i < 8; i++) {
877 if (w8 & (1<<i))
878 w64 |= (0xFFULL << (8 * i));
879 }
880 return w64;
881}
882
883
sewardj8258a8c2005-02-02 03:11:24 +0000884/*---------------------------------------------------------*/
885/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
886/*---------------------------------------------------------*/
887
888/* Select insns for an integer-typed expression, and add them to the
889 code list. Return a reg holding the result. This reg will be a
890 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
891 want to modify it, ask for a new vreg, copy it in there, and modify
892 the copy. The register allocator will do its best to map both
893 vregs to the same real register, so the copies will often disappear
894 later in the game.
895
896 This should handle expressions of 64, 32, 16 and 8-bit type. All
897 results are returned in a 64-bit register. For 32-, 16- and 8-bit
sewardje13074c2012-11-08 10:57:08 +0000898 expressions, the upper 32/48/56 bits are arbitrary, so you should
sewardj8258a8c2005-02-02 03:11:24 +0000899 mask or sign extend partial values if necessary.
900*/
901
902static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
903{
904 HReg r = iselIntExpr_R_wrk(env, e);
905 /* sanity checks ... */
906# if 0
907 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
908# endif
909 vassert(hregClass(r) == HRcInt64);
910 vassert(hregIsVirtual(r));
911 return r;
912}
913
914/* DO NOT CALL THIS DIRECTLY ! */
915static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
916{
sewardje7905662005-05-09 18:15:21 +0000917 /* Used for unary/binary SIMD64 ops. */
918 HWord fn = 0;
sewardj8711f662005-05-09 17:52:56 +0000919 Bool second_is_UInt;
sewardje7905662005-05-09 18:15:21 +0000920
sewardj05b3b6a2005-02-04 01:44:33 +0000921 MatchInfo mi;
sewardj176ad2f2005-04-27 11:55:08 +0000922 DECLARE_PATTERN(p_1Uto8_64to1);
sewardjca257bc2010-09-08 08:34:52 +0000923 DECLARE_PATTERN(p_LDle8_then_8Uto64);
924 DECLARE_PATTERN(p_LDle16_then_16Uto64);
sewardj8258a8c2005-02-02 03:11:24 +0000925
926 IRType ty = typeOfIRExpr(env->type_env,e);
sewardj13f12a52011-05-03 07:51:49 +0000927 switch (ty) {
928 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
929 default: vassert(0);
930 }
sewardj8258a8c2005-02-02 03:11:24 +0000931
932 switch (e->tag) {
933
934 /* --------- TEMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +0000935 case Iex_RdTmp: {
936 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj8258a8c2005-02-02 03:11:24 +0000937 }
938
939 /* --------- LOAD --------- */
sewardjaf1ceca2005-06-30 23:31:27 +0000940 case Iex_Load: {
sewardj8258a8c2005-02-02 03:11:24 +0000941 HReg dst = newVRegI(env);
sewardjaf1ceca2005-06-30 23:31:27 +0000942 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
943
sewardje9d8a262009-07-01 08:06:34 +0000944 /* We can't handle big-endian loads, nor load-linked. */
sewardjaf1ceca2005-06-30 23:31:27 +0000945 if (e->Iex.Load.end != Iend_LE)
946 goto irreducible;
947
sewardjf67eadf2005-02-03 03:53:52 +0000948 if (ty == Ity_I64) {
949 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
950 AMD64RMI_Mem(amode), dst) );
951 return dst;
952 }
sewardj8258a8c2005-02-02 03:11:24 +0000953 if (ty == Ity_I32) {
954 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
955 return dst;
956 }
sewardj05b3b6a2005-02-04 01:44:33 +0000957 if (ty == Ity_I16) {
958 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
959 return dst;
960 }
sewardj7f039c42005-02-04 21:13:55 +0000961 if (ty == Ity_I8) {
962 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
963 return dst;
964 }
sewardj8258a8c2005-02-02 03:11:24 +0000965 break;
966 }
967
968 /* --------- BINARY OP --------- */
969 case Iex_Binop: {
970 AMD64AluOp aluOp;
971 AMD64ShiftOp shOp;
sewardj8711f662005-05-09 17:52:56 +0000972
sewardjeb17e492007-08-25 23:07:44 +0000973 /* Pattern: Sub64(0,x) */
974 /* and: Sub32(0,x) */
975 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
976 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
977 HReg dst = newVRegI(env);
978 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
979 addInstr(env, mk_iMOVsd_RR(reg,dst));
980 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
981 return dst;
982 }
983
sewardj8258a8c2005-02-02 03:11:24 +0000984 /* Is it an addition or logical style op? */
985 switch (e->Iex.Binop.op) {
986 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
987 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000988 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
989 aluOp = Aalu_SUB; break;
990 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
991 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000992 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000993 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000994 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
995 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000996 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000997 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000998 default:
999 aluOp = Aalu_INVALID; break;
1000 }
1001 /* For commutative ops we assume any literal
1002 values are on the second operand. */
1003 if (aluOp != Aalu_INVALID) {
1004 HReg dst = newVRegI(env);
1005 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1006 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1007 addInstr(env, mk_iMOVsd_RR(reg,dst));
1008 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
1009 return dst;
1010 }
1011
1012 /* Perhaps a shift op? */
1013 switch (e->Iex.Binop.op) {
1014 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
1015 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +00001016 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
1017 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +00001018 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
1019 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +00001020 default:
1021 shOp = Ash_INVALID; break;
1022 }
1023 if (shOp != Ash_INVALID) {
1024 HReg dst = newVRegI(env);
1025
1026 /* regL = the value to be shifted */
1027 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1028 addInstr(env, mk_iMOVsd_RR(regL,dst));
1029
1030 /* Do any necessary widening for 32/16/8 bit operands */
1031 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +00001032 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
1033 break;
sewardj85520e42005-02-19 15:22:38 +00001034 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +00001035 break;
sewardj85520e42005-02-19 15:22:38 +00001036 case Iop_Shr8:
1037 addInstr(env, AMD64Instr_Alu64R(
1038 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
1039 break;
1040 case Iop_Shr16:
1041 addInstr(env, AMD64Instr_Alu64R(
1042 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
1043 break;
sewardjb095fba2005-02-13 14:13:04 +00001044 case Iop_Shr32:
sewardjca257bc2010-09-08 08:34:52 +00001045 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
sewardjb095fba2005-02-13 14:13:04 +00001046 break;
sewardje83d9b22005-08-13 23:58:34 +00001047 case Iop_Sar8:
1048 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1049 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
1050 break;
1051 case Iop_Sar16:
1052 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
1053 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
1054 break;
sewardj05b3b6a2005-02-04 01:44:33 +00001055 case Iop_Sar32:
sewardjca257bc2010-09-08 08:34:52 +00001056 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
sewardj05b3b6a2005-02-04 01:44:33 +00001057 break;
1058 default:
sewardj909c06d2005-02-19 22:47:41 +00001059 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +00001060 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +00001061 }
1062
1063 /* Now consider the shift amount. If it's a literal, we
1064 can do a much better job than the general case. */
1065 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1066 /* assert that the IR is well-typed */
1067 Int nshift;
1068 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1069 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1070 vassert(nshift >= 0);
1071 if (nshift > 0)
1072 /* Can't allow nshift==0 since that means %cl */
sewardj501a3392005-05-11 15:37:50 +00001073 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001074 } else {
1075 /* General case; we have to force the amount into %cl. */
1076 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1077 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
sewardj501a3392005-05-11 15:37:50 +00001078 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001079 }
1080 return dst;
1081 }
1082
sewardj8711f662005-05-09 17:52:56 +00001083 /* Deal with 64-bit SIMD binary ops */
1084 second_is_UInt = False;
1085 switch (e->Iex.Binop.op) {
1086 case Iop_Add8x8:
1087 fn = (HWord)h_generic_calc_Add8x8; break;
1088 case Iop_Add16x4:
1089 fn = (HWord)h_generic_calc_Add16x4; break;
1090 case Iop_Add32x2:
1091 fn = (HWord)h_generic_calc_Add32x2; break;
sewardja7ba8c42005-05-10 20:08:34 +00001092
1093 case Iop_Avg8Ux8:
1094 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1095 case Iop_Avg16Ux4:
1096 fn = (HWord)h_generic_calc_Avg16Ux4; break;
sewardj8711f662005-05-09 17:52:56 +00001097
1098 case Iop_CmpEQ8x8:
1099 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1100 case Iop_CmpEQ16x4:
1101 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1102 case Iop_CmpEQ32x2:
1103 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1104
1105 case Iop_CmpGT8Sx8:
1106 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1107 case Iop_CmpGT16Sx4:
1108 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1109 case Iop_CmpGT32Sx2:
1110 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1111
1112 case Iop_InterleaveHI8x8:
1113 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1114 case Iop_InterleaveLO8x8:
1115 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1116 case Iop_InterleaveHI16x4:
1117 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1118 case Iop_InterleaveLO16x4:
1119 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1120 case Iop_InterleaveHI32x2:
1121 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1122 case Iop_InterleaveLO32x2:
1123 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
sewardjd166e282008-02-06 11:42:45 +00001124 case Iop_CatOddLanes16x4:
1125 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1126 case Iop_CatEvenLanes16x4:
1127 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1128 case Iop_Perm8x8:
1129 fn = (HWord)h_generic_calc_Perm8x8; break;
sewardj8711f662005-05-09 17:52:56 +00001130
sewardja7ba8c42005-05-10 20:08:34 +00001131 case Iop_Max8Ux8:
1132 fn = (HWord)h_generic_calc_Max8Ux8; break;
1133 case Iop_Max16Sx4:
1134 fn = (HWord)h_generic_calc_Max16Sx4; break;
1135 case Iop_Min8Ux8:
1136 fn = (HWord)h_generic_calc_Min8Ux8; break;
1137 case Iop_Min16Sx4:
1138 fn = (HWord)h_generic_calc_Min16Sx4; break;
sewardj8711f662005-05-09 17:52:56 +00001139
1140 case Iop_Mul16x4:
1141 fn = (HWord)h_generic_calc_Mul16x4; break;
sewardjd166e282008-02-06 11:42:45 +00001142 case Iop_Mul32x2:
1143 fn = (HWord)h_generic_calc_Mul32x2; break;
sewardj8711f662005-05-09 17:52:56 +00001144 case Iop_MulHi16Sx4:
1145 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
sewardja7ba8c42005-05-10 20:08:34 +00001146 case Iop_MulHi16Ux4:
1147 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1148
sewardj8711f662005-05-09 17:52:56 +00001149 case Iop_QAdd8Sx8:
1150 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1151 case Iop_QAdd16Sx4:
1152 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1153 case Iop_QAdd8Ux8:
1154 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1155 case Iop_QAdd16Ux4:
1156 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1157
sewardj5f438dd2011-06-16 11:36:23 +00001158 case Iop_QNarrowBin32Sto16Sx4:
1159 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
1160 case Iop_QNarrowBin16Sto8Sx8:
1161 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
1162 case Iop_QNarrowBin16Sto8Ux8:
1163 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
sewardjad2c9ea2011-10-22 09:32:16 +00001164 case Iop_NarrowBin16to8x8:
1165 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
1166 case Iop_NarrowBin32to16x4:
1167 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
sewardj8711f662005-05-09 17:52:56 +00001168
1169 case Iop_QSub8Sx8:
1170 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1171 case Iop_QSub16Sx4:
1172 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1173 case Iop_QSub8Ux8:
1174 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1175 case Iop_QSub16Ux4:
1176 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1177
1178 case Iop_Sub8x8:
1179 fn = (HWord)h_generic_calc_Sub8x8; break;
1180 case Iop_Sub16x4:
1181 fn = (HWord)h_generic_calc_Sub16x4; break;
1182 case Iop_Sub32x2:
1183 fn = (HWord)h_generic_calc_Sub32x2; break;
1184
1185 case Iop_ShlN32x2:
1186 fn = (HWord)h_generic_calc_ShlN32x2;
1187 second_is_UInt = True;
1188 break;
1189 case Iop_ShlN16x4:
1190 fn = (HWord)h_generic_calc_ShlN16x4;
1191 second_is_UInt = True;
1192 break;
sewardjd166e282008-02-06 11:42:45 +00001193 case Iop_ShlN8x8:
1194 fn = (HWord)h_generic_calc_ShlN8x8;
1195 second_is_UInt = True;
1196 break;
sewardj8711f662005-05-09 17:52:56 +00001197 case Iop_ShrN32x2:
1198 fn = (HWord)h_generic_calc_ShrN32x2;
1199 second_is_UInt = True;
1200 break;
1201 case Iop_ShrN16x4:
1202 fn = (HWord)h_generic_calc_ShrN16x4;
1203 second_is_UInt = True;
1204 break;
1205 case Iop_SarN32x2:
1206 fn = (HWord)h_generic_calc_SarN32x2;
1207 second_is_UInt = True;
1208 break;
1209 case Iop_SarN16x4:
1210 fn = (HWord)h_generic_calc_SarN16x4;
1211 second_is_UInt = True;
1212 break;
sewardj02f79f12007-09-01 18:59:53 +00001213 case Iop_SarN8x8:
1214 fn = (HWord)h_generic_calc_SarN8x8;
1215 second_is_UInt = True;
1216 break;
sewardj8711f662005-05-09 17:52:56 +00001217
1218 default:
1219 fn = (HWord)0; break;
1220 }
1221 if (fn != (HWord)0) {
1222 /* Note: the following assumes all helpers are of signature
1223 ULong fn ( ULong, ULong ), and they are
1224 not marked as regparm functions.
1225 */
1226 HReg dst = newVRegI(env);
1227 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1228 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1229 if (second_is_UInt)
sewardjca257bc2010-09-08 08:34:52 +00001230 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
sewardj8711f662005-05-09 17:52:56 +00001231 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1232 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
sewardj74142b82013-08-08 10:28:59 +00001233 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
1234 mk_RetLoc_simple(RLPri_Int) ));
sewardj8711f662005-05-09 17:52:56 +00001235 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1236 return dst;
1237 }
1238
sewardj7de0d3c2005-02-13 02:26:41 +00001239 /* Handle misc other ops. */
1240
sewardj478646f2008-05-01 20:13:04 +00001241 if (e->Iex.Binop.op == Iop_Max32U) {
sewardj9cc2bbf2011-06-05 17:56:03 +00001242 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1243 HReg dst = newVRegI(env);
1244 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1245 addInstr(env, mk_iMOVsd_RR(src1, dst));
1246 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
sewardj478646f2008-05-01 20:13:04 +00001247 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
1248 return dst;
1249 }
1250
sewardj7de0d3c2005-02-13 02:26:41 +00001251 if (e->Iex.Binop.op == Iop_DivModS64to32
1252 || e->Iex.Binop.op == Iop_DivModU64to32) {
1253 /* 64 x 32 -> (32(rem),32(div)) division */
1254 /* Get the 64-bit operand into edx:eax, and the other into
1255 any old R/M. */
1256 HReg rax = hregAMD64_RAX();
1257 HReg rdx = hregAMD64_RDX();
1258 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00001259 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +00001260 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +00001261 /* Compute the left operand into a reg, and then
1262 put the top half in edx and the bottom in eax. */
1263 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +00001264 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1265 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj501a3392005-05-11 15:37:50 +00001266 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001267 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardjca257bc2010-09-08 08:34:52 +00001268 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
1269 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
sewardj501a3392005-05-11 15:37:50 +00001270 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001271 addInstr(env, mk_iMOVsd_RR(rax, dst));
1272 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1273 return dst;
1274 }
1275
1276 if (e->Iex.Binop.op == Iop_32HLto64) {
1277 HReg hi32 = newVRegI(env);
1278 HReg lo32 = newVRegI(env);
1279 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1280 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1281 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1282 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
sewardj501a3392005-05-11 15:37:50 +00001283 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
sewardjca257bc2010-09-08 08:34:52 +00001284 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
sewardj7de0d3c2005-02-13 02:26:41 +00001285 addInstr(env, AMD64Instr_Alu64R(
1286 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1287 return hi32;
1288 }
1289
sewardj85520e42005-02-19 15:22:38 +00001290 if (e->Iex.Binop.op == Iop_16HLto32) {
1291 HReg hi16 = newVRegI(env);
1292 HReg lo16 = newVRegI(env);
1293 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1294 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1295 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1296 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
sewardj501a3392005-05-11 15:37:50 +00001297 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
sewardj85520e42005-02-19 15:22:38 +00001298 addInstr(env, AMD64Instr_Alu64R(
1299 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1300 addInstr(env, AMD64Instr_Alu64R(
1301 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1302 return hi16;
1303 }
sewardj7de0d3c2005-02-13 02:26:41 +00001304
sewardja64f8ad2005-04-24 00:26:37 +00001305 if (e->Iex.Binop.op == Iop_8HLto16) {
1306 HReg hi8 = newVRegI(env);
1307 HReg lo8 = newVRegI(env);
1308 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1309 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1310 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1311 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
sewardj501a3392005-05-11 15:37:50 +00001312 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
sewardja64f8ad2005-04-24 00:26:37 +00001313 addInstr(env, AMD64Instr_Alu64R(
1314 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1315 addInstr(env, AMD64Instr_Alu64R(
1316 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1317 return hi8;
1318 }
sewardj85520e42005-02-19 15:22:38 +00001319
1320 if (e->Iex.Binop.op == Iop_MullS32
1321 || e->Iex.Binop.op == Iop_MullS16
1322 || e->Iex.Binop.op == Iop_MullS8
1323 || e->Iex.Binop.op == Iop_MullU32
1324 || e->Iex.Binop.op == Iop_MullU16
1325 || e->Iex.Binop.op == Iop_MullU8) {
1326 HReg a32 = newVRegI(env);
1327 HReg b32 = newVRegI(env);
1328 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1329 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1330 Int shift = 0;
1331 AMD64ShiftOp shr_op = Ash_SHR;
1332 switch (e->Iex.Binop.op) {
1333 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1334 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1335 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1336 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1337 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1338 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1339 default: vassert(0);
1340 }
1341
1342 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1343 addInstr(env, mk_iMOVsd_RR(b32s, b32));
sewardj501a3392005-05-11 15:37:50 +00001344 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1345 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1346 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1347 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
sewardj85520e42005-02-19 15:22:38 +00001348 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1349 return b32;
1350 }
1351
sewardj18303862005-02-21 12:36:54 +00001352 if (e->Iex.Binop.op == Iop_CmpF64) {
1353 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1354 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1355 HReg dst = newVRegI(env);
1356 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1357 /* Mask out irrelevant parts of the result so as to conform
1358 to the CmpF64 definition. */
1359 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1360 return dst;
1361 }
1362
sewardj6c299f32009-12-31 18:00:12 +00001363 if (e->Iex.Binop.op == Iop_F64toI32S
1364 || e->Iex.Binop.op == Iop_F64toI64S) {
1365 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001366 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1367 HReg dst = newVRegI(env);
1368 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001369 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001370 set_SSE_rounding_default(env);
1371 return dst;
1372 }
1373
sewardj8258a8c2005-02-02 03:11:24 +00001374 break;
1375 }
1376
sewardjf67eadf2005-02-03 03:53:52 +00001377 /* --------- UNARY OP --------- */
1378 case Iex_Unop: {
sewardj7f039c42005-02-04 21:13:55 +00001379
sewardj176ad2f2005-04-27 11:55:08 +00001380 /* 1Uto8(64to1(expr64)) */
sewardjca257bc2010-09-08 08:34:52 +00001381 {
1382 DEFINE_PATTERN( p_1Uto8_64to1,
1383 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1384 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
1385 IRExpr* expr64 = mi.bindee[0];
1386 HReg dst = newVRegI(env);
1387 HReg src = iselIntExpr_R(env, expr64);
1388 addInstr(env, mk_iMOVsd_RR(src,dst) );
1389 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1390 AMD64RMI_Imm(1), dst));
sewardjf67eadf2005-02-03 03:53:52 +00001391 return dst;
1392 }
sewardjca257bc2010-09-08 08:34:52 +00001393 }
1394
1395 /* 8Uto64(LDle(expr64)) */
1396 {
1397 DEFINE_PATTERN(p_LDle8_then_8Uto64,
1398 unop(Iop_8Uto64,
1399 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1400 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
1401 HReg dst = newVRegI(env);
1402 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1403 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
1404 return dst;
1405 }
1406 }
1407
1408 /* 16Uto64(LDle(expr64)) */
1409 {
1410 DEFINE_PATTERN(p_LDle16_then_16Uto64,
1411 unop(Iop_16Uto64,
1412 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1413 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
1414 HReg dst = newVRegI(env);
1415 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1416 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
1417 return dst;
1418 }
1419 }
1420
sewardj9cc2bbf2011-06-05 17:56:03 +00001421 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
1422 Use 32 bit arithmetic and let the default zero-extend rule
1423 do the 32Uto64 for free. */
1424 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
1425 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
1426 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
1427 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
1428 AMD64AluOp aluOp = Aalu_INVALID;
1429 switch (opi) {
1430 case Iop_Add32: aluOp = Aalu_ADD; break;
1431 case Iop_Sub32: aluOp = Aalu_SUB; break;
1432 case Iop_And32: aluOp = Aalu_AND; break;
1433 case Iop_Or32: aluOp = Aalu_OR; break;
1434 case Iop_Xor32: aluOp = Aalu_XOR; break;
1435 default: break;
1436 }
1437 if (aluOp != Aalu_INVALID) {
1438 /* For commutative ops we assume any literal values are on
1439 the second operand. */
1440 HReg dst = newVRegI(env);
1441 HReg reg = iselIntExpr_R(env, argL);
1442 AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
1443 addInstr(env, mk_iMOVsd_RR(reg,dst));
1444 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
1445 return dst;
1446 }
1447 /* just fall through to normal handling for Iop_32Uto64 */
1448 }
1449
1450 /* Fallback cases */
sewardjca257bc2010-09-08 08:34:52 +00001451 switch (e->Iex.Unop.op) {
1452 case Iop_32Uto64:
sewardj05b3b6a2005-02-04 01:44:33 +00001453 case Iop_32Sto64: {
1454 HReg dst = newVRegI(env);
1455 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjca257bc2010-09-08 08:34:52 +00001456 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
1457 src, dst) );
sewardj05b3b6a2005-02-04 01:44:33 +00001458 return dst;
1459 }
sewardj9b967672005-02-08 11:13:09 +00001460 case Iop_128HIto64: {
1461 HReg rHi, rLo;
1462 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1463 return rHi; /* and abandon rLo */
1464 }
1465 case Iop_128to64: {
1466 HReg rHi, rLo;
1467 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1468 return rLo; /* and abandon rHi */
1469 }
sewardj85520e42005-02-19 15:22:38 +00001470 case Iop_8Uto16:
sewardjec93f982005-06-21 13:51:18 +00001471 case Iop_8Uto32:
sewardj176ad2f2005-04-27 11:55:08 +00001472 case Iop_8Uto64:
1473 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001474 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001475 HReg dst = newVRegI(env);
1476 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001477 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1478 || e->Iex.Unop.op==Iop_16Uto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001479 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001480 addInstr(env, mk_iMOVsd_RR(src,dst) );
1481 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1482 AMD64RMI_Imm(mask), dst));
1483 return dst;
1484 }
sewardj85520e42005-02-19 15:22:38 +00001485 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001486 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001487 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001488 case Iop_16Sto32:
1489 case Iop_16Sto64: {
1490 HReg dst = newVRegI(env);
1491 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001492 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1493 || e->Iex.Unop.op==Iop_16Sto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001494 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001495 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001496 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1497 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj486074e2005-02-08 20:10:04 +00001498 return dst;
1499 }
sewardj85520e42005-02-19 15:22:38 +00001500 case Iop_Not8:
1501 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001502 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001503 case Iop_Not64: {
1504 HReg dst = newVRegI(env);
1505 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1506 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001507 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
sewardjd0a12df2005-02-10 02:07:43 +00001508 return dst;
1509 }
de5a70f5c2010-04-01 23:08:59 +00001510 case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001511 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001512 case Iop_64HIto32: {
1513 HReg dst = newVRegI(env);
1514 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1515 Int shift = 0;
1516 switch (e->Iex.Unop.op) {
sewardj9ba870d2010-04-02 11:29:23 +00001517 case Iop_16HIto8: shift = 8; break;
sewardj85520e42005-02-19 15:22:38 +00001518 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001519 case Iop_64HIto32: shift = 32; break;
1520 default: vassert(0);
1521 }
1522 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001523 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
sewardj7de0d3c2005-02-13 02:26:41 +00001524 return dst;
1525 }
sewardj176ad2f2005-04-27 11:55:08 +00001526 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001527 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001528 case Iop_1Uto8: {
1529 HReg dst = newVRegI(env);
1530 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1531 addInstr(env, AMD64Instr_Set64(cond,dst));
1532 return dst;
1533 }
sewardja64f8ad2005-04-24 00:26:37 +00001534 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001535 case Iop_1Sto16:
1536 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001537 case Iop_1Sto64: {
1538 /* could do better than this, but for now ... */
1539 HReg dst = newVRegI(env);
1540 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1541 addInstr(env, AMD64Instr_Set64(cond,dst));
sewardj501a3392005-05-11 15:37:50 +00001542 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1543 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
sewardj42322b52005-04-20 22:57:11 +00001544 return dst;
1545 }
sewardjf53b7352005-04-06 20:01:56 +00001546 case Iop_Ctz64: {
1547 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1548 HReg dst = newVRegI(env);
1549 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1550 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1551 return dst;
1552 }
sewardj537cab02005-04-07 02:03:52 +00001553 case Iop_Clz64: {
1554 /* Count leading zeroes. Do 'bsrq' to establish the index
1555 of the highest set bit, and subtract that value from
1556 63. */
1557 HReg tmp = newVRegI(env);
1558 HReg dst = newVRegI(env);
1559 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1560 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1561 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1562 AMD64RMI_Imm(63), dst));
1563 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1564 AMD64RMI_Reg(tmp), dst));
1565 return dst;
1566 }
sewardjeb17e492007-08-25 23:07:44 +00001567
1568 case Iop_CmpwNEZ64: {
sewardj176ad2f2005-04-27 11:55:08 +00001569 HReg dst = newVRegI(env);
sewardjeb17e492007-08-25 23:07:44 +00001570 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1571 addInstr(env, mk_iMOVsd_RR(src,dst));
sewardj501a3392005-05-11 15:37:50 +00001572 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
sewardjeb17e492007-08-25 23:07:44 +00001573 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1574 AMD64RMI_Reg(src), dst));
1575 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1576 return dst;
1577 }
1578
1579 case Iop_CmpwNEZ32: {
1580 HReg src = newVRegI(env);
1581 HReg dst = newVRegI(env);
1582 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1583 addInstr(env, mk_iMOVsd_RR(pre,src));
sewardjca257bc2010-09-08 08:34:52 +00001584 addInstr(env, AMD64Instr_MovxLQ(False, src, src));
sewardjeb17e492007-08-25 23:07:44 +00001585 addInstr(env, mk_iMOVsd_RR(src,dst));
1586 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1587 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1588 AMD64RMI_Reg(src), dst));
1589 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1590 return dst;
1591 }
1592
1593 case Iop_Left8:
1594 case Iop_Left16:
1595 case Iop_Left32:
1596 case Iop_Left64: {
1597 HReg dst = newVRegI(env);
1598 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1599 addInstr(env, mk_iMOVsd_RR(src, dst));
1600 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1601 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
sewardj176ad2f2005-04-27 11:55:08 +00001602 return dst;
1603 }
sewardj537cab02005-04-07 02:03:52 +00001604
sewardj478fe702005-04-23 01:15:47 +00001605 case Iop_V128to32: {
1606 HReg dst = newVRegI(env);
1607 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1608 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1609 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1610 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1611 return dst;
1612 }
sewardj1a01e652005-02-23 11:39:21 +00001613
1614 /* V128{HI}to64 */
1615 case Iop_V128HIto64:
1616 case Iop_V128to64: {
sewardj1a01e652005-02-23 11:39:21 +00001617 HReg dst = newVRegI(env);
sewardjc4530ae2012-05-21 10:18:49 +00001618 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16;
1619 HReg rsp = hregAMD64_RSP();
sewardj1a01e652005-02-23 11:39:21 +00001620 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
sewardjc4530ae2012-05-21 10:18:49 +00001621 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1622 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1623 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1624 16, vec, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00001625 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
sewardjc4530ae2012-05-21 10:18:49 +00001626 AMD64RMI_Mem(off_rsp), dst ));
1627 return dst;
1628 }
1629
1630 case Iop_V256to64_0: case Iop_V256to64_1:
1631 case Iop_V256to64_2: case Iop_V256to64_3: {
1632 HReg vHi, vLo, vec;
1633 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
1634 /* Do the first part of the selection by deciding which of
1635 the 128 bit registers do look at, and second part using
1636 the same scheme as for V128{HI}to64 above. */
1637 Int off = 0;
1638 switch (e->Iex.Unop.op) {
1639 case Iop_V256to64_0: vec = vLo; off = -16; break;
1640 case Iop_V256to64_1: vec = vLo; off = -8; break;
1641 case Iop_V256to64_2: vec = vHi; off = -16; break;
1642 case Iop_V256to64_3: vec = vHi; off = -8; break;
1643 default: vassert(0);
1644 }
1645 HReg dst = newVRegI(env);
1646 HReg rsp = hregAMD64_RSP();
1647 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1648 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1649 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1650 16, vec, m16_rsp));
1651 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1652 AMD64RMI_Mem(off_rsp), dst ));
sewardj1a01e652005-02-23 11:39:21 +00001653 return dst;
1654 }
1655
sewardj924215b2005-03-26 21:50:31 +00001656 /* ReinterpF64asI64(e) */
1657 /* Given an IEEE754 double, produce an I64 with the same bit
1658 pattern. */
1659 case Iop_ReinterpF64asI64: {
1660 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1661 HReg dst = newVRegI(env);
1662 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1663 /* paranoia */
1664 set_SSE_rounding_default(env);
1665 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1666 addInstr(env, AMD64Instr_Alu64R(
1667 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1668 return dst;
1669 }
1670
sewardj79501112008-07-29 09:48:26 +00001671 /* ReinterpF32asI32(e) */
1672 /* Given an IEEE754 single, produce an I64 with the same bit
1673 pattern in the lower half. */
1674 case Iop_ReinterpF32asI32: {
1675 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1676 HReg dst = newVRegI(env);
1677 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1678 /* paranoia */
1679 set_SSE_rounding_default(env);
1680 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1681 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1682 return dst;
1683 }
1684
sewardj85520e42005-02-19 15:22:38 +00001685 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001686 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001687 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001688 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001689 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001690 case Iop_64to32:
1691 /* These are no-ops. */
1692 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001693
sewardje13074c2012-11-08 10:57:08 +00001694 case Iop_GetMSBs8x8: {
1695 /* Note: the following assumes the helper is of
1696 signature
1697 UInt fn ( ULong ), and is not a regparm fn.
1698 */
1699 HReg dst = newVRegI(env);
1700 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1701 fn = (HWord)h_generic_calc_GetMSBs8x8;
1702 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardjcfe046e2013-01-17 14:23:53 +00001703 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001704 1, mk_RetLoc_simple(RLPri_Int) ));
sewardje13074c2012-11-08 10:57:08 +00001705 /* MovxLQ is not exactly the right thing here. We just
1706 need to get the bottom 8 bits of RAX into dst, and zero
1707 out everything else. Assuming that the helper returns
1708 a UInt with the top 24 bits zeroed out, it'll do,
1709 though. */
1710 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1711 return dst;
1712 }
1713
sewardj78a20592012-12-13 18:29:56 +00001714 case Iop_GetMSBs8x16: {
1715 /* Note: the following assumes the helper is of signature
1716 UInt fn ( ULong w64hi, ULong w64Lo ),
1717 and is not a regparm fn. */
1718 HReg dst = newVRegI(env);
1719 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1720 HReg rsp = hregAMD64_RSP();
1721 fn = (HWord)h_generic_calc_GetMSBs8x16;
1722 AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp);
1723 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1724 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1725 16, vec, m16_rsp));
1726 /* hi 64 bits into RDI -- the first arg */
1727 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1728 AMD64RMI_Mem(m8_rsp),
1729 hregAMD64_RDI() )); /* 1st arg */
1730 /* lo 64 bits into RSI -- the 2nd arg */
1731 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1732 AMD64RMI_Mem(m16_rsp),
1733 hregAMD64_RSI() )); /* 2nd arg */
sewardjcfe046e2013-01-17 14:23:53 +00001734 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001735 2, mk_RetLoc_simple(RLPri_Int) ));
sewardj78a20592012-12-13 18:29:56 +00001736 /* MovxLQ is not exactly the right thing here. We just
sewardj9213c612012-12-19 08:39:11 +00001737 need to get the bottom 16 bits of RAX into dst, and zero
sewardj78a20592012-12-13 18:29:56 +00001738 out everything else. Assuming that the helper returns
sewardj9213c612012-12-19 08:39:11 +00001739 a UInt with the top 16 bits zeroed out, it'll do,
sewardj78a20592012-12-13 18:29:56 +00001740 though. */
1741 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1742 return dst;
1743 }
1744
sewardjf67eadf2005-02-03 03:53:52 +00001745 default:
1746 break;
1747 }
sewardje7905662005-05-09 18:15:21 +00001748
1749 /* Deal with unary 64-bit SIMD ops. */
1750 switch (e->Iex.Unop.op) {
1751 case Iop_CmpNEZ32x2:
1752 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1753 case Iop_CmpNEZ16x4:
1754 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1755 case Iop_CmpNEZ8x8:
1756 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1757 default:
1758 fn = (HWord)0; break;
1759 }
1760 if (fn != (HWord)0) {
1761 /* Note: the following assumes all helpers are of
1762 signature
1763 ULong fn ( ULong ), and they are
1764 not marked as regparm functions.
1765 */
1766 HReg dst = newVRegI(env);
1767 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1768 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardj74142b82013-08-08 10:28:59 +00001769 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
1770 mk_RetLoc_simple(RLPri_Int) ));
sewardje7905662005-05-09 18:15:21 +00001771 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1772 return dst;
1773 }
1774
sewardjf67eadf2005-02-03 03:53:52 +00001775 break;
1776 }
sewardj8258a8c2005-02-02 03:11:24 +00001777
1778 /* --------- GET --------- */
1779 case Iex_Get: {
1780 if (ty == Ity_I64) {
1781 HReg dst = newVRegI(env);
1782 addInstr(env, AMD64Instr_Alu64R(
1783 Aalu_MOV,
1784 AMD64RMI_Mem(
1785 AMD64AMode_IR(e->Iex.Get.offset,
1786 hregAMD64_RBP())),
1787 dst));
1788 return dst;
1789 }
1790 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1791 HReg dst = newVRegI(env);
1792 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001793 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001794 False,
1795 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1796 dst));
1797 return dst;
1798 }
1799 break;
1800 }
1801
sewardj8d965312005-02-25 02:48:47 +00001802 case Iex_GetI: {
1803 AMD64AMode* am
1804 = genGuestArrayOffset(
1805 env, e->Iex.GetI.descr,
1806 e->Iex.GetI.ix, e->Iex.GetI.bias );
1807 HReg dst = newVRegI(env);
1808 if (ty == Ity_I8) {
1809 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1810 return dst;
1811 }
sewardj1e015d82005-04-23 23:41:46 +00001812 if (ty == Ity_I64) {
1813 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1814 return dst;
1815 }
sewardj8d965312005-02-25 02:48:47 +00001816 break;
1817 }
sewardj05b3b6a2005-02-04 01:44:33 +00001818
1819 /* --------- CCALL --------- */
1820 case Iex_CCall: {
1821 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001822 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001823
sewardjcfe046e2013-01-17 14:23:53 +00001824 /* be very restrictive for now. Only 64-bit ints allowed for
sewardj74142b82013-08-08 10:28:59 +00001825 args, and 64 or 32 bits for return type. */
sewardje8aaa872005-07-07 13:12:04 +00001826 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
sewardj05b3b6a2005-02-04 01:44:33 +00001827 goto irreducible;
1828
sewardj7f039c42005-02-04 21:13:55 +00001829 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00001830 UInt addToSp = 0;
1831 RetLoc rloc = mk_RetLoc_INVALID();
1832 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1833 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1834 vassert(is_sane_RetLoc(rloc));
1835 vassert(rloc.pri == RLPri_Int);
1836 vassert(addToSp == 0);
sewardj05b3b6a2005-02-04 01:44:33 +00001837
sewardje8aaa872005-07-07 13:12:04 +00001838 /* Move to dst, and zero out the top 32 bits if the result type is
1839 Ity_I32. Probably overkill, but still .. */
1840 if (e->Iex.CCall.retty == Ity_I64)
1841 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1842 else
sewardjca257bc2010-09-08 08:34:52 +00001843 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
sewardje8aaa872005-07-07 13:12:04 +00001844
sewardj05b3b6a2005-02-04 01:44:33 +00001845 return dst;
1846 }
1847
sewardj7f039c42005-02-04 21:13:55 +00001848 /* --------- LITERAL --------- */
1849 /* 64/32/16/8-bit literals */
1850 case Iex_Const:
1851 if (ty == Ity_I64) {
1852 HReg r = newVRegI(env);
1853 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1854 return r;
1855 } else {
1856 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1857 HReg r = newVRegI(env);
1858 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1859 return r;
1860 }
sewardj05b3b6a2005-02-04 01:44:33 +00001861
1862 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00001863 case Iex_ITE: { // VFD
sewardj009230b2013-01-26 11:47:55 +00001864 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
florian99dd03e2013-01-29 03:56:06 +00001865 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1866 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1867 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
sewardj009230b2013-01-26 11:47:55 +00001868 HReg dst = newVRegI(env);
florian99dd03e2013-01-29 03:56:06 +00001869 addInstr(env, mk_iMOVsd_RR(r1,dst));
1870 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00001871 addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst));
1872 return dst;
sewardj05b3b6a2005-02-04 01:44:33 +00001873 }
1874 break;
1875 }
sewardj8258a8c2005-02-02 03:11:24 +00001876
sewardjf4c803b2006-09-11 11:07:34 +00001877 /* --------- TERNARY OP --------- */
1878 case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001879 IRTriop *triop = e->Iex.Triop.details;
sewardjf4c803b2006-09-11 11:07:34 +00001880 /* C3210 flags following FPU partial remainder (fprem), both
1881 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001882 if (triop->op == Iop_PRemC3210F64
1883 || triop->op == Iop_PRem1C3210F64) {
sewardjf4c803b2006-09-11 11:07:34 +00001884 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00001885 HReg arg1 = iselDblExpr(env, triop->arg2);
1886 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardjf4c803b2006-09-11 11:07:34 +00001887 HReg dst = newVRegI(env);
1888 addInstr(env, AMD64Instr_A87Free(2));
1889
1890 /* one arg -> top of x87 stack */
1891 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001892 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001893
1894 /* other arg -> top of x87 stack */
1895 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001896 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001897
florian420bfa92012-06-02 20:29:22 +00001898 switch (triop->op) {
sewardjf4c803b2006-09-11 11:07:34 +00001899 case Iop_PRemC3210F64:
1900 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1901 break;
sewardj4970e4e2008-10-11 10:07:55 +00001902 case Iop_PRem1C3210F64:
1903 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1904 break;
sewardjf4c803b2006-09-11 11:07:34 +00001905 default:
1906 vassert(0);
1907 }
1908 /* Ignore the result, and instead make off with the FPU's
1909 C3210 flags (in the status word). */
1910 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1911 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1912 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1913 return dst;
1914 }
1915 break;
1916 }
1917
sewardj8258a8c2005-02-02 03:11:24 +00001918 default:
1919 break;
1920 } /* switch (e->tag) */
1921
1922 /* We get here if no pattern matched. */
1923 irreducible:
1924 ppIRExpr(e);
1925 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1926}
sewardj614b3fb2005-02-02 02:16:03 +00001927
1928
1929/*---------------------------------------------------------*/
1930/*--- ISEL: Integer expression auxiliaries ---*/
1931/*---------------------------------------------------------*/
1932
1933/* --------------------- AMODEs --------------------- */
1934
1935/* Return an AMode which computes the value of the specified
1936 expression, possibly also adding insns to the code list as a
1937 result. The expression may only be a 32-bit one.
1938*/
1939
sewardj8258a8c2005-02-02 03:11:24 +00001940static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1941{
1942 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1943 vassert(sane_AMode(am));
1944 return am;
1945}
1946
1947/* DO NOT CALL THIS DIRECTLY ! */
1948static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1949{
sewardj05b3b6a2005-02-04 01:44:33 +00001950 MatchInfo mi;
1951 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001952 IRType ty = typeOfIRExpr(env->type_env,e);
1953 vassert(ty == Ity_I64);
1954
sewardj05b3b6a2005-02-04 01:44:33 +00001955 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1956 /* bind0 bind1 bind2 bind3 */
1957 DEFINE_PATTERN(p_complex,
1958 binop( Iop_Add64,
1959 binop( Iop_Add64,
1960 bind(0),
1961 binop(Iop_Shl64, bind(1), bind(2))
1962 ),
1963 bind(3)
1964 )
1965 );
1966 if (matchIRExpr(&mi, p_complex, e)) {
1967 IRExpr* expr1 = mi.bindee[0];
1968 IRExpr* expr2 = mi.bindee[1];
1969 IRExpr* imm8 = mi.bindee[2];
1970 IRExpr* simm32 = mi.bindee[3];
1971 if (imm8->tag == Iex_Const
1972 && imm8->Iex.Const.con->tag == Ico_U8
1973 && imm8->Iex.Const.con->Ico.U8 < 4
1974 /* imm8 is OK, now check simm32 */
1975 && simm32->tag == Iex_Const
1976 && simm32->Iex.Const.con->tag == Ico_U64
1977 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1978 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001979 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001980 HReg r1 = iselIntExpr_R(env, expr1);
1981 HReg r2 = iselIntExpr_R(env, expr2);
1982 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1983 return AMD64AMode_IRRS(offset, r1, r2, shift);
1984 }
1985 }
1986
sewardj8258a8c2005-02-02 03:11:24 +00001987 /* Add64(expr1, Shl64(expr2, imm)) */
1988 if (e->tag == Iex_Binop
1989 && e->Iex.Binop.op == Iop_Add64
1990 && e->Iex.Binop.arg2->tag == Iex_Binop
1991 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1992 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1993 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1994 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1995 if (shift == 1 || shift == 2 || shift == 3) {
1996 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1997 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1998 return AMD64AMode_IRRS(0, r1, r2, shift);
1999 }
2000 }
2001
2002 /* Add64(expr,i) */
2003 if (e->tag == Iex_Binop
2004 && e->Iex.Binop.op == Iop_Add64
2005 && e->Iex.Binop.arg2->tag == Iex_Const
2006 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2007 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
2008 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2009 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00002010 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00002011 r1
2012 );
2013 }
2014
2015 /* Doesn't match anything in particular. Generate it into
2016 a register and use that. */
2017 {
2018 HReg r1 = iselIntExpr_R(env, e);
2019 return AMD64AMode_IR(0, r1);
2020 }
2021}
sewardj614b3fb2005-02-02 02:16:03 +00002022
2023
2024/* --------------------- RMIs --------------------- */
2025
2026/* Similarly, calculate an expression into an X86RMI operand. As with
2027 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
2028
2029static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
2030{
2031 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
2032 /* sanity checks ... */
2033 switch (rmi->tag) {
2034 case Armi_Imm:
2035 return rmi;
2036 case Armi_Reg:
2037 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
2038 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
2039 return rmi;
2040 case Armi_Mem:
2041 vassert(sane_AMode(rmi->Armi.Mem.am));
2042 return rmi;
2043 default:
2044 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
2045 }
2046}
2047
2048/* DO NOT CALL THIS DIRECTLY ! */
2049static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
2050{
2051 IRType ty = typeOfIRExpr(env->type_env,e);
2052 vassert(ty == Ity_I64 || ty == Ity_I32
2053 || ty == Ity_I16 || ty == Ity_I8);
2054
2055 /* special case: immediate 64/32/16/8 */
2056 if (e->tag == Iex_Const) {
2057 switch (e->Iex.Const.con->tag) {
2058 case Ico_U64:
2059 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002060 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00002061 }
2062 break;
2063 case Ico_U32:
2064 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
2065 case Ico_U16:
2066 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
2067 case Ico_U8:
2068 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
2069 default:
2070 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2071 }
2072 }
2073
2074 /* special case: 64-bit GET */
2075 if (e->tag == Iex_Get && ty == Ity_I64) {
2076 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2077 hregAMD64_RBP()));
2078 }
2079
sewardj0852a132005-02-21 08:28:46 +00002080 /* special case: 64-bit load from memory */
sewardje9d8a262009-07-01 08:06:34 +00002081 if (e->tag == Iex_Load && ty == Ity_I64
sewardje768e922009-11-26 17:17:37 +00002082 && e->Iex.Load.end == Iend_LE) {
sewardjaf1ceca2005-06-30 23:31:27 +00002083 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj0852a132005-02-21 08:28:46 +00002084 return AMD64RMI_Mem(am);
2085 }
sewardj614b3fb2005-02-02 02:16:03 +00002086
2087 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00002088 {
2089 HReg r = iselIntExpr_R ( env, e );
2090 return AMD64RMI_Reg(r);
2091 }
sewardj614b3fb2005-02-02 02:16:03 +00002092}
2093
2094
sewardjf67eadf2005-02-03 03:53:52 +00002095/* --------------------- RIs --------------------- */
2096
2097/* Calculate an expression into an AMD64RI operand. As with
2098 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2099 bits. */
2100
2101static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
2102{
2103 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
2104 /* sanity checks ... */
2105 switch (ri->tag) {
2106 case Ari_Imm:
2107 return ri;
sewardj80d6e6d2008-05-28 09:40:29 +00002108 case Ari_Reg:
sewardjf67eadf2005-02-03 03:53:52 +00002109 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
2110 vassert(hregIsVirtual(ri->Ari.Reg.reg));
2111 return ri;
2112 default:
2113 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2114 }
2115}
2116
2117/* DO NOT CALL THIS DIRECTLY ! */
2118static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2119{
2120 IRType ty = typeOfIRExpr(env->type_env,e);
2121 vassert(ty == Ity_I64 || ty == Ity_I32
2122 || ty == Ity_I16 || ty == Ity_I8);
2123
2124 /* special case: immediate */
2125 if (e->tag == Iex_Const) {
2126 switch (e->Iex.Const.con->tag) {
2127 case Ico_U64:
2128 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002129 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00002130 }
2131 break;
2132 case Ico_U32:
2133 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2134 case Ico_U16:
2135 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2136 case Ico_U8:
2137 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2138 default:
2139 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2140 }
2141 }
2142
2143 /* default case: calculate into a register and return that */
2144 {
2145 HReg r = iselIntExpr_R ( env, e );
2146 return AMD64RI_Reg(r);
2147 }
2148}
2149
2150
sewardj05b3b6a2005-02-04 01:44:33 +00002151/* --------------------- RMs --------------------- */
2152
2153/* Similarly, calculate an expression into an AMD64RM operand. As
2154 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2155 bits. */
2156
2157static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2158{
2159 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2160 /* sanity checks ... */
2161 switch (rm->tag) {
2162 case Arm_Reg:
2163 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2164 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2165 return rm;
2166 case Arm_Mem:
2167 vassert(sane_AMode(rm->Arm.Mem.am));
2168 return rm;
2169 default:
2170 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2171 }
2172}
2173
2174/* DO NOT CALL THIS DIRECTLY ! */
2175static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2176{
2177 IRType ty = typeOfIRExpr(env->type_env,e);
2178 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2179
2180 /* special case: 64-bit GET */
2181 if (e->tag == Iex_Get && ty == Ity_I64) {
2182 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2183 hregAMD64_RBP()));
2184 }
2185
2186 /* special case: load from memory */
2187
2188 /* default case: calculate into a register and return that */
2189 {
2190 HReg r = iselIntExpr_R ( env, e );
2191 return AMD64RM_Reg(r);
2192 }
2193}
2194
2195
2196/* --------------------- CONDCODE --------------------- */
2197
2198/* Generate code to evaluated a bit-typed expression, returning the
2199 condition code which would correspond when the expression would
2200 notionally have returned 1. */
2201
2202static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2203{
2204 /* Uh, there's nothing we can sanity check here, unfortunately. */
2205 return iselCondCode_wrk(env,e);
2206}
2207
2208/* DO NOT CALL THIS DIRECTLY ! */
2209static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2210{
sewardjf8c37f72005-02-07 18:55:29 +00002211 MatchInfo mi;
sewardj0af46ab2005-04-26 01:52:29 +00002212
sewardj05b3b6a2005-02-04 01:44:33 +00002213 vassert(e);
2214 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2215
sewardj176ad2f2005-04-27 11:55:08 +00002216 /* var */
sewardjdd40fdf2006-12-24 02:20:24 +00002217 if (e->tag == Iex_RdTmp) {
2218 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj176ad2f2005-04-27 11:55:08 +00002219 HReg dst = newVRegI(env);
2220 addInstr(env, mk_iMOVsd_RR(r64,dst));
2221 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2222 return Acc_NZ;
2223 }
2224
sewardj109e9352005-07-19 08:42:56 +00002225 /* Constant 1:Bit */
2226 if (e->tag == Iex_Const) {
2227 HReg r;
2228 vassert(e->Iex.Const.con->tag == Ico_U1);
2229 vassert(e->Iex.Const.con->Ico.U1 == True
2230 || e->Iex.Const.con->Ico.U1 == False);
2231 r = newVRegI(env);
2232 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2233 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2234 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2235 }
sewardj486074e2005-02-08 20:10:04 +00002236
2237 /* Not1(...) */
2238 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2239 /* Generate code for the arg, and negate the test condition */
2240 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2241 }
2242
sewardj176ad2f2005-04-27 11:55:08 +00002243 /* --- patterns rooted at: 64to1 --- */
2244
sewardj176ad2f2005-04-27 11:55:08 +00002245 /* 64to1 */
2246 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
sewardj501a3392005-05-11 15:37:50 +00002247 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2248 addInstr(env, AMD64Instr_Test64(1,reg));
sewardjf8c37f72005-02-07 18:55:29 +00002249 return Acc_NZ;
2250 }
2251
florianc862f282012-07-19 17:22:33 +00002252 /* --- patterns rooted at: 32to1 --- */
2253
2254 /* 32to1 */
2255 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
2256 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2257 addInstr(env, AMD64Instr_Test64(1,reg));
2258 return Acc_NZ;
2259 }
2260
sewardj176ad2f2005-04-27 11:55:08 +00002261 /* --- patterns rooted at: CmpNEZ8 --- */
2262
2263 /* CmpNEZ8(x) */
2264 if (e->tag == Iex_Unop
2265 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2266 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002267 addInstr(env, AMD64Instr_Test64(0xFF,r));
sewardj176ad2f2005-04-27 11:55:08 +00002268 return Acc_NZ;
2269 }
2270
sewardj86ec28b2005-04-27 13:39:35 +00002271 /* --- patterns rooted at: CmpNEZ16 --- */
2272
2273 /* CmpNEZ16(x) */
2274 if (e->tag == Iex_Unop
2275 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2276 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002277 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
sewardj86ec28b2005-04-27 13:39:35 +00002278 return Acc_NZ;
2279 }
2280
sewardj176ad2f2005-04-27 11:55:08 +00002281 /* --- patterns rooted at: CmpNEZ32 --- */
2282
2283 /* CmpNEZ32(x) */
2284 if (e->tag == Iex_Unop
2285 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2286 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj176ad2f2005-04-27 11:55:08 +00002287 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
sewardj9cc2bbf2011-06-05 17:56:03 +00002288 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
sewardj176ad2f2005-04-27 11:55:08 +00002289 return Acc_NZ;
2290 }
2291
2292 /* --- patterns rooted at: CmpNEZ64 --- */
2293
sewardj0bc78ab2005-05-11 22:47:32 +00002294 /* CmpNEZ64(Or64(x,y)) */
2295 {
2296 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2297 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2298 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2299 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2300 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2301 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2302 HReg tmp = newVRegI(env);
2303 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2304 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2305 return Acc_NZ;
2306 }
2307 }
2308
sewardj176ad2f2005-04-27 11:55:08 +00002309 /* CmpNEZ64(x) */
2310 if (e->tag == Iex_Unop
2311 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2312 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2313 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2314 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2315 return Acc_NZ;
2316 }
2317
2318 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2319
sewardj42322b52005-04-20 22:57:11 +00002320 /* CmpEQ8 / CmpNE8 */
2321 if (e->tag == Iex_Binop
2322 && (e->Iex.Binop.op == Iop_CmpEQ8
sewardj1fb8c922009-07-12 12:56:53 +00002323 || e->Iex.Binop.op == Iop_CmpNE8
2324 || e->Iex.Binop.op == Iop_CasCmpEQ8
2325 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
sewardj009230b2013-01-26 11:47:55 +00002326 if (isZeroU8(e->Iex.Binop.arg2)) {
2327 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2328 addInstr(env, AMD64Instr_Test64(0xFF,r1));
2329 switch (e->Iex.Binop.op) {
2330 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2331 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2332 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
2333 }
2334 } else {
2335 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2336 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2337 HReg r = newVRegI(env);
2338 addInstr(env, mk_iMOVsd_RR(r1,r));
2339 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2340 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2341 switch (e->Iex.Binop.op) {
2342 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2343 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2344 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
2345 }
sewardj42322b52005-04-20 22:57:11 +00002346 }
2347 }
2348
sewardj0af46ab2005-04-26 01:52:29 +00002349 /* CmpEQ16 / CmpNE16 */
2350 if (e->tag == Iex_Binop
2351 && (e->Iex.Binop.op == Iop_CmpEQ16
sewardj1fb8c922009-07-12 12:56:53 +00002352 || e->Iex.Binop.op == Iop_CmpNE16
2353 || e->Iex.Binop.op == Iop_CasCmpEQ16
2354 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
sewardj0af46ab2005-04-26 01:52:29 +00002355 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2356 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2357 HReg r = newVRegI(env);
2358 addInstr(env, mk_iMOVsd_RR(r1,r));
2359 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2360 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2361 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002362 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2363 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002364 default: vpanic("iselCondCode(amd64): CmpXX16");
2365 }
2366 }
2367
sewardj50d89bf2011-01-10 15:10:48 +00002368 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
2369 Saves a "movq %rax, %tmp" compared to the default route. */
2370 if (e->tag == Iex_Binop
2371 && e->Iex.Binop.op == Iop_CmpNE64
2372 && e->Iex.Binop.arg1->tag == Iex_CCall
2373 && e->Iex.Binop.arg2->tag == Iex_Const) {
2374 IRExpr* cal = e->Iex.Binop.arg1;
2375 IRExpr* con = e->Iex.Binop.arg2;
2376 HReg tmp = newVRegI(env);
2377 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
2378 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
2379 vassert(con->Iex.Const.con->tag == Ico_U64);
2380 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00002381 UInt addToSp = 0;
2382 RetLoc rloc = mk_RetLoc_INVALID();
2383 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2384 cal->Iex.CCall.cee,
2385 cal->Iex.CCall.retty, cal->Iex.CCall.args );
2386 vassert(is_sane_RetLoc(rloc));
2387 vassert(rloc.pri == RLPri_Int);
2388 vassert(addToSp == 0);
2389 /* */
sewardj50d89bf2011-01-10 15:10:48 +00002390 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
2391 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
2392 AMD64RMI_Reg(hregAMD64_RAX()), tmp));
2393 return Acc_NZ;
2394 }
2395
sewardjd0a12df2005-02-10 02:07:43 +00002396 /* Cmp*64*(x,y) */
2397 if (e->tag == Iex_Binop
2398 && (e->Iex.Binop.op == Iop_CmpEQ64
2399 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00002400 || e->Iex.Binop.op == Iop_CmpLT64S
2401 || e->Iex.Binop.op == Iop_CmpLT64U
2402 || e->Iex.Binop.op == Iop_CmpLE64S
sewardja9e4a802005-12-26 19:33:55 +00002403 || e->Iex.Binop.op == Iop_CmpLE64U
sewardj1fb8c922009-07-12 12:56:53 +00002404 || e->Iex.Binop.op == Iop_CasCmpEQ64
sewardje13074c2012-11-08 10:57:08 +00002405 || e->Iex.Binop.op == Iop_CasCmpNE64
2406 || e->Iex.Binop.op == Iop_ExpCmpNE64)) {
sewardjd0a12df2005-02-10 02:07:43 +00002407 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2408 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2409 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2410 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002411 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
sewardje13074c2012-11-08 10:57:08 +00002412 case Iop_CmpNE64:
2413 case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002414 case Iop_CmpLT64S: return Acc_L;
2415 case Iop_CmpLT64U: return Acc_B;
2416 case Iop_CmpLE64S: return Acc_LE;
sewardja9e4a802005-12-26 19:33:55 +00002417 case Iop_CmpLE64U: return Acc_BE;
sewardjd0a12df2005-02-10 02:07:43 +00002418 default: vpanic("iselCondCode(amd64): CmpXX64");
2419 }
2420 }
2421
sewardj9cc2bbf2011-06-05 17:56:03 +00002422 /* Cmp*32*(x,y) */
2423 if (e->tag == Iex_Binop
2424 && (e->Iex.Binop.op == Iop_CmpEQ32
2425 || e->Iex.Binop.op == Iop_CmpNE32
2426 || e->Iex.Binop.op == Iop_CmpLT32S
2427 || e->Iex.Binop.op == Iop_CmpLT32U
2428 || e->Iex.Binop.op == Iop_CmpLE32S
2429 || e->Iex.Binop.op == Iop_CmpLE32U
2430 || e->Iex.Binop.op == Iop_CasCmpEQ32
sewardj009230b2013-01-26 11:47:55 +00002431 || e->Iex.Binop.op == Iop_CasCmpNE32
2432 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
sewardj9cc2bbf2011-06-05 17:56:03 +00002433 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2434 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2435 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
2436 switch (e->Iex.Binop.op) {
2437 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
sewardj009230b2013-01-26 11:47:55 +00002438 case Iop_CmpNE32:
2439 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ;
sewardj9cc2bbf2011-06-05 17:56:03 +00002440 case Iop_CmpLT32S: return Acc_L;
2441 case Iop_CmpLT32U: return Acc_B;
2442 case Iop_CmpLE32S: return Acc_LE;
2443 case Iop_CmpLE32U: return Acc_BE;
2444 default: vpanic("iselCondCode(amd64): CmpXX32");
2445 }
2446 }
2447
sewardj05b3b6a2005-02-04 01:44:33 +00002448 ppIRExpr(e);
2449 vpanic("iselCondCode(amd64)");
2450}
2451
2452
sewardj9b967672005-02-08 11:13:09 +00002453/*---------------------------------------------------------*/
2454/*--- ISEL: Integer expressions (128 bit) ---*/
2455/*---------------------------------------------------------*/
2456
2457/* Compute a 128-bit value into a register pair, which is returned as
2458 the first two parameters. As with iselIntExpr_R, these may be
2459 either real or virtual regs; in any case they must not be changed
2460 by subsequent code emitted by the caller. */
2461
2462static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2463 ISelEnv* env, IRExpr* e )
2464{
2465 iselInt128Expr_wrk(rHi, rLo, env, e);
2466# if 0
2467 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2468# endif
2469 vassert(hregClass(*rHi) == HRcInt64);
2470 vassert(hregIsVirtual(*rHi));
2471 vassert(hregClass(*rLo) == HRcInt64);
2472 vassert(hregIsVirtual(*rLo));
2473}
2474
2475/* DO NOT CALL THIS DIRECTLY ! */
2476static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2477 ISelEnv* env, IRExpr* e )
2478{
sewardj9b967672005-02-08 11:13:09 +00002479 vassert(e);
2480 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2481
sewardj9b967672005-02-08 11:13:09 +00002482 /* read 128-bit IRTemp */
sewardjdd40fdf2006-12-24 02:20:24 +00002483 if (e->tag == Iex_RdTmp) {
sewardjc4530ae2012-05-21 10:18:49 +00002484 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
sewardj9b967672005-02-08 11:13:09 +00002485 return;
2486 }
2487
sewardj9b967672005-02-08 11:13:09 +00002488 /* --------- BINARY ops --------- */
2489 if (e->tag == Iex_Binop) {
2490 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002491 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002492 case Iop_MullU64:
2493 case Iop_MullS64: {
2494 /* get one operand into %rax, and the other into a R/M.
2495 Need to make an educated guess about which is better in
2496 which. */
2497 HReg tLo = newVRegI(env);
2498 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002499 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002500 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2501 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2502 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
sewardj501a3392005-05-11 15:37:50 +00002503 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
sewardj9b967672005-02-08 11:13:09 +00002504 /* Result is now in RDX:RAX. Tell the caller. */
2505 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2506 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2507 *rHi = tHi;
2508 *rLo = tLo;
2509 return;
2510 }
sewardj7de0d3c2005-02-13 02:26:41 +00002511
sewardja6b93d12005-02-17 09:28:28 +00002512 /* 128 x 64 -> (64(rem),64(div)) division */
2513 case Iop_DivModU128to64:
2514 case Iop_DivModS128to64: {
2515 /* Get the 128-bit operand into rdx:rax, and the other into
2516 any old R/M. */
2517 HReg sHi, sLo;
2518 HReg tLo = newVRegI(env);
2519 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002520 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002521 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2522 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2523 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2524 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2525 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2526 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2527 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2528 *rHi = tHi;
2529 *rLo = tLo;
2530 return;
2531 }
2532
2533 /* 64HLto128(e1,e2) */
2534 case Iop_64HLto128:
2535 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2536 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2537 return;
2538
sewardj9b967672005-02-08 11:13:09 +00002539 default:
2540 break;
2541 }
2542 } /* if (e->tag == Iex_Binop) */
2543
sewardj9b967672005-02-08 11:13:09 +00002544 ppIRExpr(e);
2545 vpanic("iselInt128Expr");
2546}
2547
2548
sewardj8d965312005-02-25 02:48:47 +00002549/*---------------------------------------------------------*/
2550/*--- ISEL: Floating point expressions (32 bit) ---*/
2551/*---------------------------------------------------------*/
2552
2553/* Nothing interesting here; really just wrappers for
2554 64-bit stuff. */
2555
2556static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2557{
2558 HReg r = iselFltExpr_wrk( env, e );
2559# if 0
2560 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2561# endif
2562 vassert(hregClass(r) == HRcVec128);
2563 vassert(hregIsVirtual(r));
2564 return r;
2565}
2566
2567/* DO NOT CALL THIS DIRECTLY */
2568static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2569{
2570 IRType ty = typeOfIRExpr(env->type_env,e);
2571 vassert(ty == Ity_F32);
2572
sewardjdd40fdf2006-12-24 02:20:24 +00002573 if (e->tag == Iex_RdTmp) {
2574 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardjc49ce232005-02-25 13:03:03 +00002575 }
2576
sewardje768e922009-11-26 17:17:37 +00002577 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardjc49ce232005-02-25 13:03:03 +00002578 AMD64AMode* am;
2579 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002580 vassert(e->Iex.Load.ty == Ity_F32);
2581 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardjc49ce232005-02-25 13:03:03 +00002582 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2583 return res;
2584 }
sewardj8d965312005-02-25 02:48:47 +00002585
2586 if (e->tag == Iex_Binop
2587 && e->Iex.Binop.op == Iop_F64toF32) {
2588 /* Although the result is still held in a standard SSE register,
2589 we need to round it to reflect the loss of accuracy/range
2590 entailed in casting it to a 32-bit float. */
2591 HReg dst = newVRegV(env);
2592 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2593 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2594 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2595 set_SSE_rounding_default( env );
2596 return dst;
2597 }
2598
sewardjc49ce232005-02-25 13:03:03 +00002599 if (e->tag == Iex_Get) {
2600 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2601 hregAMD64_RBP() );
2602 HReg res = newVRegV(env);
2603 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2604 return res;
2605 }
2606
sewardj5992bd02005-05-11 02:13:42 +00002607 if (e->tag == Iex_Unop
2608 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2609 /* Given an I32, produce an IEEE754 float with the same bit
2610 pattern. */
2611 HReg dst = newVRegV(env);
2612 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2613 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2614 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2615 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2616 return dst;
2617 }
sewardj8d965312005-02-25 02:48:47 +00002618
sewardjd15b5972010-06-27 09:06:34 +00002619 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2620 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2621 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
2622 HReg dst = newVRegV(env);
2623
2624 /* rf now holds the value to be rounded. The first thing to do
2625 is set the FPU's rounding mode accordingly. */
2626
2627 /* Set host x87 rounding mode */
2628 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2629
2630 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
2631 addInstr(env, AMD64Instr_A87Free(1));
2632 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
2633 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2634 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
2635 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
2636
2637 /* Restore default x87 rounding. */
2638 set_FPU_rounding_default( env );
2639
2640 return dst;
2641 }
2642
sewardjcc3d2192013-03-27 11:37:33 +00002643 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) {
2644 /* Sigh ... very rough code. Could do much better. */
2645 /* Get the 128-bit literal 00---0 10---0 into a register
2646 and xor it with the value to be negated. */
2647 HReg r1 = newVRegI(env);
2648 HReg dst = newVRegV(env);
2649 HReg tmp = newVRegV(env);
2650 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2651 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
2652 addInstr(env, mk_vMOVsd_RR(src,tmp));
2653 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2654 addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 ));
2655 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
2656 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
2657 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
2658 add_to_rsp(env, 16);
2659 return dst;
2660 }
2661
2662 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) {
2663 IRQop *qop = e->Iex.Qop.details;
2664 HReg dst = newVRegV(env);
2665 HReg argX = iselFltExpr(env, qop->arg2);
2666 HReg argY = iselFltExpr(env, qop->arg3);
2667 HReg argZ = iselFltExpr(env, qop->arg4);
2668 /* XXXROUNDINGFIXME */
2669 /* set roundingmode here */
2670 /* subq $16, %rsp -- make a space*/
2671 sub_from_rsp(env, 16);
2672 /* Prepare 4 arg regs:
2673 leaq 0(%rsp), %rdi
2674 leaq 4(%rsp), %rsi
2675 leaq 8(%rsp), %rdx
2676 leaq 12(%rsp), %rcx
2677 */
2678 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2679 hregAMD64_RDI()));
2680 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()),
2681 hregAMD64_RSI()));
2682 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2683 hregAMD64_RDX()));
2684 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()),
2685 hregAMD64_RCX()));
2686 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2687 movss %argX, 0(%rsi)
2688 movss %argY, 0(%rdx)
2689 movss %argZ, 0(%rcx)
2690 */
2691 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX,
2692 AMD64AMode_IR(0, hregAMD64_RSI())));
2693 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY,
2694 AMD64AMode_IR(0, hregAMD64_RDX())));
2695 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ,
2696 AMD64AMode_IR(0, hregAMD64_RCX())));
2697 /* call the helper */
2698 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2699 (ULong)(HWord)h_generic_calc_MAddF32,
sewardj74142b82013-08-08 10:28:59 +00002700 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002701 /* fetch the result from memory, using %r_argp, which the
2702 register allocator will keep alive across the call. */
2703 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst,
2704 AMD64AMode_IR(0, hregAMD64_RSP())));
2705 /* and finally, clear the space */
2706 add_to_rsp(env, 16);
2707 return dst;
2708 }
2709
sewardj8d965312005-02-25 02:48:47 +00002710 ppIRExpr(e);
2711 vpanic("iselFltExpr_wrk");
2712}
sewardj18303862005-02-21 12:36:54 +00002713
2714
2715/*---------------------------------------------------------*/
2716/*--- ISEL: Floating point expressions (64 bit) ---*/
2717/*---------------------------------------------------------*/
2718
2719/* Compute a 64-bit floating point value into the lower half of an xmm
2720 register, the identity of which is returned. As with
2721 iselIntExpr_R, the returned reg will be virtual, and it must not be
2722 changed by subsequent code emitted by the caller.
2723*/
2724
2725/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2726
2727 Type S (1 bit) E (11 bits) F (52 bits)
2728 ---- --------- ----------- -----------
2729 signalling NaN u 2047 (max) .0uuuuu---u
2730 (with at least
2731 one 1 bit)
2732 quiet NaN u 2047 (max) .1uuuuu---u
2733
2734 negative infinity 1 2047 (max) .000000---0
2735
2736 positive infinity 0 2047 (max) .000000---0
2737
2738 negative zero 1 0 .000000---0
2739
2740 positive zero 0 0 .000000---0
2741*/
2742
2743static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2744{
2745 HReg r = iselDblExpr_wrk( env, e );
2746# if 0
2747 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2748# endif
2749 vassert(hregClass(r) == HRcVec128);
2750 vassert(hregIsVirtual(r));
2751 return r;
2752}
2753
2754/* DO NOT CALL THIS DIRECTLY */
2755static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2756{
2757 IRType ty = typeOfIRExpr(env->type_env,e);
2758 vassert(e);
2759 vassert(ty == Ity_F64);
2760
sewardjdd40fdf2006-12-24 02:20:24 +00002761 if (e->tag == Iex_RdTmp) {
2762 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj18303862005-02-21 12:36:54 +00002763 }
2764
sewardj8d965312005-02-25 02:48:47 +00002765 if (e->tag == Iex_Const) {
2766 union { ULong u64; Double f64; } u;
2767 HReg res = newVRegV(env);
2768 HReg tmp = newVRegI(env);
2769 vassert(sizeof(u) == 8);
2770 vassert(sizeof(u.u64) == 8);
2771 vassert(sizeof(u.f64) == 8);
2772
2773 if (e->Iex.Const.con->tag == Ico_F64) {
2774 u.f64 = e->Iex.Const.con->Ico.F64;
2775 }
2776 else if (e->Iex.Const.con->tag == Ico_F64i) {
2777 u.u64 = e->Iex.Const.con->Ico.F64i;
2778 }
2779 else
2780 vpanic("iselDblExpr(amd64): const");
2781
2782 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2783 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2784 addInstr(env, AMD64Instr_SseLdSt(
2785 True/*load*/, 8, res,
2786 AMD64AMode_IR(0, hregAMD64_RSP())
2787 ));
2788 add_to_rsp(env, 8);
2789 return res;
2790 }
sewardj9da16972005-02-21 13:58:26 +00002791
sewardje768e922009-11-26 17:17:37 +00002792 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj9da16972005-02-21 13:58:26 +00002793 AMD64AMode* am;
2794 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002795 vassert(e->Iex.Load.ty == Ity_F64);
2796 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj9da16972005-02-21 13:58:26 +00002797 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2798 return res;
2799 }
sewardj18303862005-02-21 12:36:54 +00002800
2801 if (e->tag == Iex_Get) {
2802 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2803 hregAMD64_RBP() );
2804 HReg res = newVRegV(env);
2805 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2806 return res;
2807 }
2808
sewardj8d965312005-02-25 02:48:47 +00002809 if (e->tag == Iex_GetI) {
2810 AMD64AMode* am
2811 = genGuestArrayOffset(
2812 env, e->Iex.GetI.descr,
2813 e->Iex.GetI.ix, e->Iex.GetI.bias );
2814 HReg res = newVRegV(env);
2815 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2816 return res;
2817 }
2818
sewardj4796d662006-02-05 16:06:26 +00002819 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00002820 IRTriop *triop = e->Iex.Triop.details;
sewardj137015d2005-03-27 04:01:15 +00002821 AMD64SseOp op = Asse_INVALID;
florian420bfa92012-06-02 20:29:22 +00002822 switch (triop->op) {
sewardj137015d2005-03-27 04:01:15 +00002823 case Iop_AddF64: op = Asse_ADDF; break;
2824 case Iop_SubF64: op = Asse_SUBF; break;
2825 case Iop_MulF64: op = Asse_MULF; break;
2826 case Iop_DivF64: op = Asse_DIVF; break;
2827 default: break;
2828 }
2829 if (op != Asse_INVALID) {
2830 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002831 HReg argL = iselDblExpr(env, triop->arg2);
2832 HReg argR = iselDblExpr(env, triop->arg3);
sewardj137015d2005-03-27 04:01:15 +00002833 addInstr(env, mk_vMOVsd_RR(argL, dst));
sewardj4796d662006-02-05 16:06:26 +00002834 /* XXXROUNDINGFIXME */
2835 /* set roundingmode here */
sewardj137015d2005-03-27 04:01:15 +00002836 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2837 return dst;
2838 }
2839 }
2840
sewardjcc3d2192013-03-27 11:37:33 +00002841 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) {
2842 IRQop *qop = e->Iex.Qop.details;
2843 HReg dst = newVRegV(env);
2844 HReg argX = iselDblExpr(env, qop->arg2);
2845 HReg argY = iselDblExpr(env, qop->arg3);
2846 HReg argZ = iselDblExpr(env, qop->arg4);
2847 /* XXXROUNDINGFIXME */
2848 /* set roundingmode here */
2849 /* subq $32, %rsp -- make a space*/
2850 sub_from_rsp(env, 32);
2851 /* Prepare 4 arg regs:
2852 leaq 0(%rsp), %rdi
2853 leaq 8(%rsp), %rsi
2854 leaq 16(%rsp), %rdx
2855 leaq 24(%rsp), %rcx
2856 */
2857 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2858 hregAMD64_RDI()));
2859 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2860 hregAMD64_RSI()));
2861 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()),
2862 hregAMD64_RDX()));
2863 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()),
2864 hregAMD64_RCX()));
2865 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2866 movsd %argX, 0(%rsi)
2867 movsd %argY, 0(%rdx)
2868 movsd %argZ, 0(%rcx)
2869 */
2870 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX,
2871 AMD64AMode_IR(0, hregAMD64_RSI())));
2872 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY,
2873 AMD64AMode_IR(0, hregAMD64_RDX())));
2874 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ,
2875 AMD64AMode_IR(0, hregAMD64_RCX())));
2876 /* call the helper */
2877 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2878 (ULong)(HWord)h_generic_calc_MAddF64,
sewardj74142b82013-08-08 10:28:59 +00002879 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002880 /* fetch the result from memory, using %r_argp, which the
2881 register allocator will keep alive across the call. */
2882 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst,
2883 AMD64AMode_IR(0, hregAMD64_RSP())));
2884 /* and finally, clear the space */
2885 add_to_rsp(env, 32);
2886 return dst;
2887 }
2888
sewardjb183b852006-02-03 16:08:03 +00002889 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
sewardj25a85812005-05-08 23:03:48 +00002890 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2891 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2892 HReg dst = newVRegV(env);
2893
2894 /* rf now holds the value to be rounded. The first thing to do
2895 is set the FPU's rounding mode accordingly. */
2896
2897 /* Set host x87 rounding mode */
2898 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2899
2900 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2901 addInstr(env, AMD64Instr_A87Free(1));
sewardjd15b5972010-06-27 09:06:34 +00002902 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002903 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
sewardjd15b5972010-06-27 09:06:34 +00002904 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002905 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2906
2907 /* Restore default x87 rounding. */
2908 set_FPU_rounding_default( env );
2909
2910 return dst;
2911 }
2912
florian420bfa92012-06-02 20:29:22 +00002913 IRTriop *triop = e->Iex.Triop.details;
sewardj4796d662006-02-05 16:06:26 +00002914 if (e->tag == Iex_Triop
florian420bfa92012-06-02 20:29:22 +00002915 && (triop->op == Iop_ScaleF64
2916 || triop->op == Iop_AtanF64
2917 || triop->op == Iop_Yl2xF64
2918 || triop->op == Iop_Yl2xp1F64
2919 || triop->op == Iop_PRemF64
2920 || triop->op == Iop_PRem1F64)
sewardj25a85812005-05-08 23:03:48 +00002921 ) {
2922 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00002923 HReg arg1 = iselDblExpr(env, triop->arg2);
2924 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardj25a85812005-05-08 23:03:48 +00002925 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002926 Bool arg2first = toBool(triop->op == Iop_ScaleF64
2927 || triop->op == Iop_PRemF64
2928 || triop->op == Iop_PRem1F64);
sewardj25a85812005-05-08 23:03:48 +00002929 addInstr(env, AMD64Instr_A87Free(2));
2930
2931 /* one arg -> top of x87 stack */
2932 addInstr(env, AMD64Instr_SseLdSt(
2933 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002934 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002935
2936 /* other arg -> top of x87 stack */
2937 addInstr(env, AMD64Instr_SseLdSt(
2938 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002939 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002940
2941 /* do it */
sewardj4796d662006-02-05 16:06:26 +00002942 /* XXXROUNDINGFIXME */
2943 /* set roundingmode here */
florian420bfa92012-06-02 20:29:22 +00002944 switch (triop->op) {
sewardj25a85812005-05-08 23:03:48 +00002945 case Iop_ScaleF64:
2946 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2947 break;
2948 case Iop_AtanF64:
2949 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2950 break;
2951 case Iop_Yl2xF64:
2952 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2953 break;
sewardj5e205372005-05-09 02:57:08 +00002954 case Iop_Yl2xp1F64:
2955 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2956 break;
sewardjf4c803b2006-09-11 11:07:34 +00002957 case Iop_PRemF64:
2958 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2959 break;
sewardj4970e4e2008-10-11 10:07:55 +00002960 case Iop_PRem1F64:
2961 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
2962 break;
sewardj25a85812005-05-08 23:03:48 +00002963 default:
2964 vassert(0);
2965 }
2966
2967 /* save result */
sewardjd15b5972010-06-27 09:06:34 +00002968 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002969 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2970 return dst;
2971 }
sewardj1a01e652005-02-23 11:39:21 +00002972
sewardj6c299f32009-12-31 18:00:12 +00002973 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002974 HReg dst = newVRegV(env);
2975 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2976 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2977 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2978 set_SSE_rounding_default( env );
2979 return dst;
2980 }
2981
sewardj6c299f32009-12-31 18:00:12 +00002982 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002983 HReg dst = newVRegV(env);
2984 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2985 set_SSE_rounding_default( env );
2986 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2987 return dst;
2988 }
2989
sewardj137015d2005-03-27 04:01:15 +00002990 if (e->tag == Iex_Unop
2991 && (e->Iex.Unop.op == Iop_NegF64
2992 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00002993 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00002994 /* Get the 128-bit literal 00---0 10---0 into a register
2995 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00002996 HReg r1 = newVRegI(env);
2997 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00002998 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00002999 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3000 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00003001 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00003002 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3003 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3004 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00003005 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3006
3007 if (e->Iex.Unop.op == Iop_NegF64)
3008 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3009 else
3010 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3011
sewardj8d965312005-02-25 02:48:47 +00003012 add_to_rsp(env, 16);
3013 return dst;
3014 }
3015
sewardj4796d662006-02-05 16:06:26 +00003016 if (e->tag == Iex_Binop) {
sewardj25a85812005-05-08 23:03:48 +00003017 A87FpOp fpop = Afp_INVALID;
sewardj4796d662006-02-05 16:06:26 +00003018 switch (e->Iex.Binop.op) {
sewardj25a85812005-05-08 23:03:48 +00003019 case Iop_SqrtF64: fpop = Afp_SQRT; break;
sewardj5e205372005-05-09 02:57:08 +00003020 case Iop_SinF64: fpop = Afp_SIN; break;
3021 case Iop_CosF64: fpop = Afp_COS; break;
3022 case Iop_TanF64: fpop = Afp_TAN; break;
sewardj25a85812005-05-08 23:03:48 +00003023 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3024 default: break;
3025 }
3026 if (fpop != Afp_INVALID) {
3027 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00003028 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
sewardj25a85812005-05-08 23:03:48 +00003029 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00003030 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
sewardj25a85812005-05-08 23:03:48 +00003031 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
sewardj5e205372005-05-09 02:57:08 +00003032 addInstr(env, AMD64Instr_A87Free(nNeeded));
sewardjd15b5972010-06-27 09:06:34 +00003033 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj4796d662006-02-05 16:06:26 +00003034 /* XXXROUNDINGFIXME */
3035 /* set roundingmode here */
sewardj25a85812005-05-08 23:03:48 +00003036 addInstr(env, AMD64Instr_A87FpOp(fpop));
sewardj4796d662006-02-05 16:06:26 +00003037 if (e->Iex.Binop.op==Iop_TanF64) {
sewardj5e205372005-05-09 02:57:08 +00003038 /* get rid of the extra 1.0 that fptan pushes */
sewardjd15b5972010-06-27 09:06:34 +00003039 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj5e205372005-05-09 02:57:08 +00003040 }
sewardjd15b5972010-06-27 09:06:34 +00003041 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00003042 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3043 return dst;
3044 }
3045 }
sewardjc49ce232005-02-25 13:03:03 +00003046
3047 if (e->tag == Iex_Unop) {
3048 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00003049//.. case Iop_I32toF64: {
3050//.. HReg dst = newVRegF(env);
3051//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3052//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3053//.. set_FPU_rounding_default(env);
3054//.. addInstr(env, X86Instr_FpLdStI(
3055//.. True/*load*/, 4, dst,
3056//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00003057//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00003058//.. return dst;
3059//.. }
sewardj924215b2005-03-26 21:50:31 +00003060 case Iop_ReinterpI64asF64: {
3061 /* Given an I64, produce an IEEE754 double with the same
3062 bit pattern. */
3063 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3064 HReg dst = newVRegV(env);
3065 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3066 /* paranoia */
3067 set_SSE_rounding_default(env);
3068 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3069 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3070 return dst;
3071 }
sewardjc49ce232005-02-25 13:03:03 +00003072 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00003073 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00003074 HReg f64 = newVRegV(env);
3075 /* this shouldn't be necessary, but be paranoid ... */
3076 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00003077 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00003078 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3079 return f64;
3080 }
3081 default:
3082 break;
3083 }
3084 }
sewardj8d965312005-02-25 02:48:47 +00003085
3086 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00003087 if (e->tag == Iex_ITE) { // VFD
3088 HReg r1, r0, dst;
sewardj8d965312005-02-25 02:48:47 +00003089 vassert(ty == Ity_F64);
florian99dd03e2013-01-29 03:56:06 +00003090 vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1);
3091 r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3092 r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
sewardj8d965312005-02-25 02:48:47 +00003093 dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003094 addInstr(env, mk_vMOVsd_RR(r1,dst));
3095 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003096 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardj8d965312005-02-25 02:48:47 +00003097 return dst;
3098 }
sewardj18303862005-02-21 12:36:54 +00003099
3100 ppIRExpr(e);
3101 vpanic("iselDblExpr_wrk");
3102}
sewardjc2bcb6f2005-02-07 00:17:12 +00003103
sewardj0852a132005-02-21 08:28:46 +00003104
3105/*---------------------------------------------------------*/
3106/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3107/*---------------------------------------------------------*/
3108
3109static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3110{
3111 HReg r = iselVecExpr_wrk( env, e );
3112# if 0
3113 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3114# endif
3115 vassert(hregClass(r) == HRcVec128);
3116 vassert(hregIsVirtual(r));
3117 return r;
3118}
3119
3120
3121/* DO NOT CALL THIS DIRECTLY */
3122static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3123{
sewardj69d98e32010-06-18 08:17:41 +00003124 HWord fn = 0; /* address of helper fn, if required */
3125 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00003126 AMD64SseOp op = Asse_INVALID;
3127 IRType ty = typeOfIRExpr(env->type_env,e);
3128 vassert(e);
3129 vassert(ty == Ity_V128);
3130
sewardjdd40fdf2006-12-24 02:20:24 +00003131 if (e->tag == Iex_RdTmp) {
3132 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj0852a132005-02-21 08:28:46 +00003133 }
3134
3135 if (e->tag == Iex_Get) {
3136 HReg dst = newVRegV(env);
3137 addInstr(env, AMD64Instr_SseLdSt(
3138 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00003139 16,
sewardj0852a132005-02-21 08:28:46 +00003140 dst,
3141 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3142 )
3143 );
3144 return dst;
3145 }
3146
sewardje768e922009-11-26 17:17:37 +00003147 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj1a01e652005-02-23 11:39:21 +00003148 HReg dst = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00003149 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj1a01e652005-02-23 11:39:21 +00003150 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3151 return dst;
3152 }
3153
3154 if (e->tag == Iex_Const) {
3155 HReg dst = newVRegV(env);
3156 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj9ba870d2010-04-02 11:29:23 +00003157 switch (e->Iex.Const.con->Ico.V128) {
3158 case 0x0000:
3159 dst = generate_zeroes_V128(env);
sewardjacfbd7d2010-08-17 22:52:08 +00003160 break;
sewardj9ba870d2010-04-02 11:29:23 +00003161 case 0xFFFF:
3162 dst = generate_ones_V128(env);
sewardj9ba870d2010-04-02 11:29:23 +00003163 break;
sewardjacfbd7d2010-08-17 22:52:08 +00003164 default: {
3165 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3166 /* do push_uimm64 twice, first time for the high-order half. */
3167 push_uimm64(env, bitmask8_to_bytemask64(
3168 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
3169 ));
3170 push_uimm64(env, bitmask8_to_bytemask64(
3171 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
3172 ));
3173 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3174 add_to_rsp(env, 16);
3175 break;
3176 }
sewardj1a01e652005-02-23 11:39:21 +00003177 }
sewardj9ba870d2010-04-02 11:29:23 +00003178 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003179 }
sewardj0852a132005-02-21 08:28:46 +00003180
3181 if (e->tag == Iex_Unop) {
3182 switch (e->Iex.Unop.op) {
3183
sewardj8d965312005-02-25 02:48:47 +00003184 case Iop_NotV128: {
3185 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3186 return do_sse_NotV128(env, arg);
3187 }
3188
sewardj09717342005-05-05 21:34:02 +00003189 case Iop_CmpNEZ64x2: {
3190 /* We can use SSE2 instructions for this. */
3191 /* Ideally, we want to do a 64Ix2 comparison against zero of
3192 the operand. Problem is no such insn exists. Solution
3193 therefore is to do a 32Ix4 comparison instead, and bitwise-
3194 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3195 let the not'd result of this initial comparison be a:b:c:d.
3196 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3197 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3198 giving the required result.
3199
3200 The required selection sequence is 2,3,0,1, which
3201 according to Intel's documentation means the pshufd
3202 literal value is 0xB1, that is,
3203 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3204 */
3205 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
sewardjac530442005-05-11 16:13:37 +00003206 HReg tmp = generate_zeroes_V128(env);
sewardj09717342005-05-05 21:34:02 +00003207 HReg dst = newVRegV(env);
sewardj09717342005-05-05 21:34:02 +00003208 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3209 tmp = do_sse_NotV128(env, tmp);
3210 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3211 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3212 return dst;
3213 }
3214
sewardjac530442005-05-11 16:13:37 +00003215 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3216 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3217 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3218 do_CmpNEZ_vector:
3219 {
3220 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3221 HReg tmp = newVRegV(env);
3222 HReg zero = generate_zeroes_V128(env);
3223 HReg dst;
3224 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3225 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3226 dst = do_sse_NotV128(env, tmp);
3227 return dst;
3228 }
sewardja7ba8c42005-05-10 20:08:34 +00003229
3230 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3231 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3232 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
3233 do_32Fx4_unary:
3234 {
3235 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3236 HReg dst = newVRegV(env);
3237 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3238 return dst;
3239 }
3240
sewardj97628592005-05-10 22:42:54 +00003241 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
3242 do_64Fx2_unary:
3243 {
3244 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3245 HReg dst = newVRegV(env);
3246 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3247 return dst;
3248 }
sewardja7ba8c42005-05-10 20:08:34 +00003249
3250 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3251 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3252 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
3253 do_32F0x4_unary:
3254 {
3255 /* A bit subtle. We have to copy the arg to the result
3256 register first, because actually doing the SSE scalar insn
3257 leaves the upper 3/4 of the destination register
3258 unchanged. Whereas the required semantics of these
3259 primops is that the upper 3/4 is simply copied in from the
3260 argument. */
3261 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3262 HReg dst = newVRegV(env);
3263 addInstr(env, mk_vMOVsd_RR(arg, dst));
3264 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3265 return dst;
3266 }
3267
sewardj0852a132005-02-21 08:28:46 +00003268 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3269 do_64F0x2_unary:
3270 {
3271 /* A bit subtle. We have to copy the arg to the result
3272 register first, because actually doing the SSE scalar insn
3273 leaves the upper half of the destination register
3274 unchanged. Whereas the required semantics of these
3275 primops is that the upper half is simply copied in from the
3276 argument. */
3277 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3278 HReg dst = newVRegV(env);
3279 addInstr(env, mk_vMOVsd_RR(arg, dst));
3280 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3281 return dst;
3282 }
3283
sewardj8d965312005-02-25 02:48:47 +00003284 case Iop_32UtoV128: {
3285 HReg dst = newVRegV(env);
3286 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3287 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3288 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3289 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3290 return dst;
3291 }
sewardj0852a132005-02-21 08:28:46 +00003292
3293 case Iop_64UtoV128: {
3294 HReg dst = newVRegV(env);
3295 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3296 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3297 addInstr(env, AMD64Instr_Push(rmi));
3298 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3299 add_to_rsp(env, 8);
3300 return dst;
3301 }
3302
sewardj4b1cc832012-06-13 11:10:20 +00003303 case Iop_V256toV128_0:
3304 case Iop_V256toV128_1: {
3305 HReg vHi, vLo;
3306 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
3307 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
3308 }
3309
sewardj0852a132005-02-21 08:28:46 +00003310 default:
3311 break;
3312 } /* switch (e->Iex.Unop.op) */
3313 } /* if (e->tag == Iex_Unop) */
3314
3315 if (e->tag == Iex_Binop) {
3316 switch (e->Iex.Binop.op) {
3317
sewardjc4530ae2012-05-21 10:18:49 +00003318 /* FIXME: could we generate MOVQ here? */
sewardj18303862005-02-21 12:36:54 +00003319 case Iop_SetV128lo64: {
3320 HReg dst = newVRegV(env);
3321 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3322 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003323 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3324 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3325 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3326 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3327 return dst;
3328 }
3329
sewardjc4530ae2012-05-21 10:18:49 +00003330 /* FIXME: could we generate MOVD here? */
sewardj478fe702005-04-23 01:15:47 +00003331 case Iop_SetV128lo32: {
3332 HReg dst = newVRegV(env);
3333 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3334 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3335 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3336 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3337 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3338 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003339 return dst;
3340 }
3341
sewardj1a01e652005-02-23 11:39:21 +00003342 case Iop_64HLtoV128: {
sewardjc4530ae2012-05-21 10:18:49 +00003343 HReg rsp = hregAMD64_RSP();
3344 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
3345 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
3346 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1);
3347 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2);
3348 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp));
3349 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003350 HReg dst = newVRegV(env);
sewardjc4530ae2012-05-21 10:18:49 +00003351 /* One store-forwarding stall coming up, oh well :-( */
3352 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003353 return dst;
3354 }
3355
sewardj432f8b62005-05-10 02:50:05 +00003356 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3357 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3358 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
sewardjb9282632005-11-05 02:33:25 +00003359 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003360 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
3361 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
3362 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3363 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
3364 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
3365 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
3366 do_32Fx4:
3367 {
3368 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3369 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3370 HReg dst = newVRegV(env);
3371 addInstr(env, mk_vMOVsd_RR(argL, dst));
3372 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3373 return dst;
3374 }
3375
sewardj97628592005-05-10 22:42:54 +00003376 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3377 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3378 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
sewardjb9282632005-11-05 02:33:25 +00003379 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003380 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
sewardj5992bd02005-05-11 02:13:42 +00003381 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
3382 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3383 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003384 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
3385 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
3386 do_64Fx2:
3387 {
3388 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3389 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3390 HReg dst = newVRegV(env);
3391 addInstr(env, mk_vMOVsd_RR(argL, dst));
3392 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3393 return dst;
3394 }
sewardj8d965312005-02-25 02:48:47 +00003395
sewardj432f8b62005-05-10 02:50:05 +00003396 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003397 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardj4c328cf2005-05-05 12:05:54 +00003398 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
sewardjb9282632005-11-05 02:33:25 +00003399 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003400 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003401 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003402 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3403 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003404 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3405 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3406 do_32F0x4: {
3407 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3408 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3409 HReg dst = newVRegV(env);
3410 addInstr(env, mk_vMOVsd_RR(argL, dst));
3411 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3412 return dst;
3413 }
3414
sewardj137015d2005-03-27 04:01:15 +00003415 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003416 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003417 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardjb9282632005-11-05 02:33:25 +00003418 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003419 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3420 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003421 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003422 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003423 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3424 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3425 do_64F0x2: {
3426 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3427 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3428 HReg dst = newVRegV(env);
3429 addInstr(env, mk_vMOVsd_RR(argL, dst));
3430 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3431 return dst;
3432 }
3433
sewardj5f438dd2011-06-16 11:36:23 +00003434 case Iop_QNarrowBin32Sto16Sx8:
sewardj97628592005-05-10 22:42:54 +00003435 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003436 case Iop_QNarrowBin16Sto8Sx16:
sewardj97628592005-05-10 22:42:54 +00003437 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003438 case Iop_QNarrowBin16Sto8Ux16:
sewardj97628592005-05-10 22:42:54 +00003439 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3440
3441 case Iop_InterleaveHI8x16:
3442 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3443 case Iop_InterleaveHI16x8:
3444 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3445 case Iop_InterleaveHI32x4:
3446 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3447 case Iop_InterleaveHI64x2:
3448 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3449
3450 case Iop_InterleaveLO8x16:
3451 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3452 case Iop_InterleaveLO16x8:
3453 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3454 case Iop_InterleaveLO32x4:
3455 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3456 case Iop_InterleaveLO64x2:
3457 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3458
sewardj1a01e652005-02-23 11:39:21 +00003459 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003460 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003461 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003462 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003463 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003464 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003465 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003466 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3467 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3468 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3469 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3470 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3471 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3472 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3473 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3474 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3475 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3476 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3477 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
sewardjadffcef2005-05-11 00:03:06 +00003478 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3479 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3480 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3481 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3482 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3483 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3484 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003485 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3486 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3487 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003488 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003489 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3490 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3491 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3492 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003493 do_SseReRg: {
3494 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3495 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3496 HReg dst = newVRegV(env);
3497 if (arg1isEReg) {
sewardj9da16972005-02-21 13:58:26 +00003498 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3499 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3500 } else {
3501 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3502 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3503 }
3504 return dst;
3505 }
3506
sewardjadffcef2005-05-11 00:03:06 +00003507 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3508 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3509 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3510 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3511 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3512 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3513 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
sewardj09717342005-05-05 21:34:02 +00003514 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3515 do_SseShift: {
3516 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3517 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3518 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3519 HReg ereg = newVRegV(env);
3520 HReg dst = newVRegV(env);
3521 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3522 addInstr(env, AMD64Instr_Push(rmi));
3523 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3524 addInstr(env, mk_vMOVsd_RR(greg, dst));
3525 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3526 add_to_rsp(env, 16);
3527 return dst;
3528 }
sewardj0852a132005-02-21 08:28:46 +00003529
sewardj69d98e32010-06-18 08:17:41 +00003530 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
3531 goto do_SseAssistedBinary;
3532 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
3533 goto do_SseAssistedBinary;
3534 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
3535 goto do_SseAssistedBinary;
3536 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
3537 goto do_SseAssistedBinary;
3538 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
3539 goto do_SseAssistedBinary;
3540 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
3541 goto do_SseAssistedBinary;
3542 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
3543 goto do_SseAssistedBinary;
3544 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
3545 goto do_SseAssistedBinary;
3546 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
3547 goto do_SseAssistedBinary;
sewardjd8815622011-10-19 15:24:01 +00003548 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2;
3549 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003550 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
3551 goto do_SseAssistedBinary;
sewardjd8bca7e2012-06-20 11:46:19 +00003552 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4;
3553 goto do_SseAssistedBinary;
sewardj5f438dd2011-06-16 11:36:23 +00003554 case Iop_QNarrowBin32Sto16Ux8:
3555 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
sewardj2260b992011-06-15 16:05:07 +00003556 goto do_SseAssistedBinary;
sewardjad2c9ea2011-10-22 09:32:16 +00003557 case Iop_NarrowBin16to8x16:
3558 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3559 goto do_SseAssistedBinary;
3560 case Iop_NarrowBin32to16x8:
3561 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3562 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003563 do_SseAssistedBinary: {
3564 /* RRRufff! RRRufff code is what we're generating here. Oh
3565 well. */
3566 vassert(fn != 0);
3567 HReg dst = newVRegV(env);
3568 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3569 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3570 HReg argp = newVRegI(env);
3571 /* subq $112, %rsp -- make a space*/
3572 sub_from_rsp(env, 112);
3573 /* leaq 48(%rsp), %r_argp -- point into it */
3574 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3575 argp));
3576 /* andq $-16, %r_argp -- 16-align the pointer */
3577 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3578 AMD64RMI_Imm( ~(UInt)15 ),
3579 argp));
3580 /* Prepare 3 arg regs:
3581 leaq 0(%r_argp), %rdi
3582 leaq 16(%r_argp), %rsi
3583 leaq 32(%r_argp), %rdx
3584 */
3585 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3586 hregAMD64_RDI()));
3587 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3588 hregAMD64_RSI()));
3589 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
3590 hregAMD64_RDX()));
3591 /* Store the two args, at (%rsi) and (%rdx):
3592 movupd %argL, 0(%rsi)
3593 movupd %argR, 0(%rdx)
3594 */
3595 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3596 AMD64AMode_IR(0, hregAMD64_RSI())));
3597 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
3598 AMD64AMode_IR(0, hregAMD64_RDX())));
3599 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003600 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003601 3, mk_RetLoc_simple(RLPri_None) ));
sewardj69d98e32010-06-18 08:17:41 +00003602 /* fetch the result from memory, using %r_argp, which the
3603 register allocator will keep alive across the call. */
3604 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3605 AMD64AMode_IR(0, argp)));
3606 /* and finally, clear the space */
3607 add_to_rsp(env, 112);
3608 return dst;
3609 }
3610
sewardj0874bee2011-01-17 10:32:18 +00003611 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
3612 goto do_SseAssistedVectorAndScalar;
3613 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
3614 goto do_SseAssistedVectorAndScalar;
3615 do_SseAssistedVectorAndScalar: {
3616 /* RRRufff! RRRufff code is what we're generating here. Oh
3617 well. */
3618 vassert(fn != 0);
3619 HReg dst = newVRegV(env);
3620 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3621 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3622 HReg argp = newVRegI(env);
3623 /* subq $112, %rsp -- make a space*/
3624 sub_from_rsp(env, 112);
3625 /* leaq 48(%rsp), %r_argp -- point into it */
3626 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3627 argp));
3628 /* andq $-16, %r_argp -- 16-align the pointer */
3629 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3630 AMD64RMI_Imm( ~(UInt)15 ),
3631 argp));
3632 /* Prepare 2 vector arg regs:
3633 leaq 0(%r_argp), %rdi
3634 leaq 16(%r_argp), %rsi
3635 */
3636 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3637 hregAMD64_RDI()));
3638 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3639 hregAMD64_RSI()));
3640 /* Store the vector arg, at (%rsi):
3641 movupd %argL, 0(%rsi)
3642 */
3643 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3644 AMD64AMode_IR(0, hregAMD64_RSI())));
3645 /* And get the scalar value into rdx */
3646 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
3647
3648 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003649 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003650 3, mk_RetLoc_simple(RLPri_None) ));
sewardj0874bee2011-01-17 10:32:18 +00003651 /* fetch the result from memory, using %r_argp, which the
3652 register allocator will keep alive across the call. */
3653 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3654 AMD64AMode_IR(0, argp)));
3655 /* and finally, clear the space */
3656 add_to_rsp(env, 112);
3657 return dst;
3658 }
3659
sewardj0852a132005-02-21 08:28:46 +00003660 default:
3661 break;
3662 } /* switch (e->Iex.Binop.op) */
3663 } /* if (e->tag == Iex_Binop) */
3664
florian99dd03e2013-01-29 03:56:06 +00003665 if (e->tag == Iex_ITE) { // VFD
3666 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3667 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
sewardjadffcef2005-05-11 00:03:06 +00003668 HReg dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003669 addInstr(env, mk_vMOVsd_RR(r1,dst));
floriane6be61f2013-02-01 16:11:51 +00003670 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003671 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardjadffcef2005-05-11 00:03:06 +00003672 return dst;
3673 }
3674
sewardjacfbd7d2010-08-17 22:52:08 +00003675 //vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003676 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
sewardj5117ce12006-01-27 21:20:15 +00003677 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
sewardj0852a132005-02-21 08:28:46 +00003678 ppIRExpr(e);
3679 vpanic("iselVecExpr_wrk");
3680}
sewardjc33671d2005-02-01 20:30:00 +00003681
3682
3683/*---------------------------------------------------------*/
sewardjc4530ae2012-05-21 10:18:49 +00003684/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
3685/*---------------------------------------------------------*/
3686
sewardj56c30312012-06-12 08:45:39 +00003687static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003688 ISelEnv* env, IRExpr* e )
3689{
3690 iselDVecExpr_wrk( rHi, rLo, env, e );
3691# if 0
3692 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3693# endif
3694 vassert(hregClass(*rHi) == HRcVec128);
3695 vassert(hregClass(*rLo) == HRcVec128);
3696 vassert(hregIsVirtual(*rHi));
3697 vassert(hregIsVirtual(*rLo));
3698}
3699
3700
3701/* DO NOT CALL THIS DIRECTLY */
sewardj56c30312012-06-12 08:45:39 +00003702static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003703 ISelEnv* env, IRExpr* e )
3704{
sewardjcc3d2192013-03-27 11:37:33 +00003705 HWord fn = 0; /* address of helper fn, if required */
sewardjc4530ae2012-05-21 10:18:49 +00003706 vassert(e);
3707 IRType ty = typeOfIRExpr(env->type_env,e);
3708 vassert(ty == Ity_V256);
3709
sewardj56c30312012-06-12 08:45:39 +00003710 AMD64SseOp op = Asse_INVALID;
3711
sewardjc4530ae2012-05-21 10:18:49 +00003712 /* read 256-bit IRTemp */
3713 if (e->tag == Iex_RdTmp) {
3714 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3715 return;
3716 }
3717
3718 if (e->tag == Iex_Get) {
3719 HReg vHi = newVRegV(env);
3720 HReg vLo = newVRegV(env);
3721 HReg rbp = hregAMD64_RBP();
3722 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp);
3723 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp);
3724 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3725 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3726 *rHi = vHi;
3727 *rLo = vLo;
3728 return;
3729 }
3730
3731 if (e->tag == Iex_Load) {
3732 HReg vHi = newVRegV(env);
3733 HReg vLo = newVRegV(env);
3734 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr);
3735 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
3736 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
3737 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3738 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3739 *rHi = vHi;
3740 *rLo = vLo;
3741 return;
3742 }
3743
sewardj37a505b2012-06-29 15:28:24 +00003744 if (e->tag == Iex_Const) {
3745 vassert(e->Iex.Const.con->tag == Ico_V256);
3746 switch (e->Iex.Const.con->Ico.V256) {
3747 case 0x00000000: {
3748 HReg vHi = generate_zeroes_V128(env);
3749 HReg vLo = newVRegV(env);
3750 addInstr(env, mk_vMOVsd_RR(vHi, vLo));
3751 *rHi = vHi;
3752 *rLo = vLo;
3753 return;
3754 }
3755 default:
3756 break; /* give up. Until such time as is necessary. */
3757 }
3758 }
3759
sewardj2a2bda92012-06-14 23:32:02 +00003760 if (e->tag == Iex_Unop) {
3761 switch (e->Iex.Unop.op) {
3762
3763 case Iop_NotV256: {
3764 HReg argHi, argLo;
3765 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3766 *rHi = do_sse_NotV128(env, argHi);
3767 *rLo = do_sse_NotV128(env, argLo);
3768 return;
3769 }
3770
sewardj82096922012-06-24 14:57:59 +00003771 case Iop_Recip32Fx8: op = Asse_RCPF; goto do_32Fx8_unary;
sewardj66becf32012-06-18 23:15:16 +00003772 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary;
3773 case Iop_RSqrt32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary;
3774 do_32Fx8_unary:
3775 {
3776 HReg argHi, argLo;
3777 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3778 HReg dstHi = newVRegV(env);
3779 HReg dstLo = newVRegV(env);
3780 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi));
3781 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo));
3782 *rHi = dstHi;
3783 *rLo = dstLo;
3784 return;
3785 }
3786
3787 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary;
3788 do_64Fx4_unary:
3789 {
3790 HReg argHi, argLo;
3791 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3792 HReg dstHi = newVRegV(env);
3793 HReg dstLo = newVRegV(env);
3794 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi));
3795 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo));
3796 *rHi = dstHi;
3797 *rLo = dstLo;
3798 return;
3799 }
3800
sewardj23db8a02012-06-25 07:46:18 +00003801 case Iop_CmpNEZ64x4: {
3802 /* We can use SSE2 instructions for this. */
3803 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide
3804 (obviously). See comment on Iop_CmpNEZ64x2 for
3805 explanation of what's going on here. */
3806 HReg argHi, argLo;
3807 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3808 HReg tmpHi = generate_zeroes_V128(env);
3809 HReg tmpLo = newVRegV(env);
3810 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo));
3811 HReg dstHi = newVRegV(env);
3812 HReg dstLo = newVRegV(env);
3813 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi));
3814 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo));
3815 tmpHi = do_sse_NotV128(env, tmpHi);
3816 tmpLo = do_sse_NotV128(env, tmpLo);
3817 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi));
3818 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo));
3819 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi));
3820 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo));
3821 *rHi = dstHi;
3822 *rLo = dstLo;
3823 return;
3824 }
3825
3826 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
sewardjcc3d2192013-03-27 11:37:33 +00003827 case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3828 case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
sewardj23db8a02012-06-25 07:46:18 +00003829 do_CmpNEZ_vector:
3830 {
3831 HReg argHi, argLo;
3832 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3833 HReg tmpHi = newVRegV(env);
3834 HReg tmpLo = newVRegV(env);
3835 HReg zero = generate_zeroes_V128(env);
3836 HReg dstHi, dstLo;
3837 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi));
3838 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo));
3839 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi));
3840 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo));
3841 dstHi = do_sse_NotV128(env, tmpHi);
3842 dstLo = do_sse_NotV128(env, tmpLo);
3843 *rHi = dstHi;
3844 *rLo = dstLo;
3845 return;
3846 }
3847
sewardj2a2bda92012-06-14 23:32:02 +00003848 default:
3849 break;
3850 } /* switch (e->Iex.Unop.op) */
3851 } /* if (e->tag == Iex_Unop) */
3852
sewardj56c30312012-06-12 08:45:39 +00003853 if (e->tag == Iex_Binop) {
3854 switch (e->Iex.Binop.op) {
3855
3856 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4;
3857 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4;
3858 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4;
3859 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4;
sewardj8eb7ae82012-06-24 14:00:27 +00003860 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4;
3861 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4;
sewardj56c30312012-06-12 08:45:39 +00003862 do_64Fx4:
3863 {
3864 HReg argLhi, argLlo, argRhi, argRlo;
3865 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3866 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3867 HReg dstHi = newVRegV(env);
3868 HReg dstLo = newVRegV(env);
3869 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3870 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3871 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
3872 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
3873 *rHi = dstHi;
3874 *rLo = dstLo;
3875 return;
3876 }
3877
3878 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8;
3879 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8;
3880 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8;
3881 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8;
sewardj8eb7ae82012-06-24 14:00:27 +00003882 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8;
3883 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8;
sewardj56c30312012-06-12 08:45:39 +00003884 do_32Fx8:
3885 {
3886 HReg argLhi, argLlo, argRhi, argRlo;
3887 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3888 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3889 HReg dstHi = newVRegV(env);
3890 HReg dstLo = newVRegV(env);
3891 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3892 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3893 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
3894 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
3895 *rHi = dstHi;
3896 *rLo = dstLo;
3897 return;
3898 }
3899
sewardj4b1cc832012-06-13 11:10:20 +00003900 case Iop_AndV256: op = Asse_AND; goto do_SseReRg;
sewardj2a2bda92012-06-14 23:32:02 +00003901 case Iop_OrV256: op = Asse_OR; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003902 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg;
sewardjcc3d2192013-03-27 11:37:33 +00003903 case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg;
3904 case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg;
3905 case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg;
3906 case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg;
3907 case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg;
3908 case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg;
3909 case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg;
3910 case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg;
3911 case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg;
3912 case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg;
3913 case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg;
3914 case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg;
3915 case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg;
3916 case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg;
3917 case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg;
3918 case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg;
3919 case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg;
3920 case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg;
3921 case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg;
3922 case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg;
3923 case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg;
3924 case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg;
3925 case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg;
3926 case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg;
3927 case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg;
3928 case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg;
3929 case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg;
3930 case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg;
3931 case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg;
3932 case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg;
3933 case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003934 do_SseReRg:
3935 {
3936 HReg argLhi, argLlo, argRhi, argRlo;
3937 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3938 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3939 HReg dstHi = newVRegV(env);
3940 HReg dstLo = newVRegV(env);
3941 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3942 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3943 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi));
3944 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo));
3945 *rHi = dstHi;
3946 *rLo = dstLo;
3947 return;
3948 }
3949
sewardjcc3d2192013-03-27 11:37:33 +00003950 case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift;
3951 case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift;
3952 case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift;
3953 case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift;
3954 case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift;
3955 case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift;
3956 case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift;
3957 case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift;
3958 do_SseShift: {
3959 HReg gregHi, gregLo;
3960 iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1);
3961 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3962 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3963 HReg ereg = newVRegV(env);
3964 HReg dstHi = newVRegV(env);
3965 HReg dstLo = newVRegV(env);
3966 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3967 addInstr(env, AMD64Instr_Push(rmi));
3968 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3969 addInstr(env, mk_vMOVsd_RR(gregHi, dstHi));
3970 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi));
3971 addInstr(env, mk_vMOVsd_RR(gregLo, dstLo));
3972 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo));
3973 add_to_rsp(env, 16);
3974 *rHi = dstHi;
3975 *rLo = dstLo;
3976 return;
3977 }
3978
sewardj4b1cc832012-06-13 11:10:20 +00003979 case Iop_V128HLtoV256: {
3980 *rHi = iselVecExpr(env, e->Iex.Binop.arg1);
3981 *rLo = iselVecExpr(env, e->Iex.Binop.arg2);
3982 return;
3983 }
3984
sewardjcc3d2192013-03-27 11:37:33 +00003985 case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4;
3986 goto do_SseAssistedBinary;
3987 case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4;
3988 goto do_SseAssistedBinary;
3989 case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4;
3990 goto do_SseAssistedBinary;
3991 case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4;
3992 goto do_SseAssistedBinary;
3993 case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4;
3994 goto do_SseAssistedBinary;
3995 case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8;
3996 goto do_SseAssistedBinary;
3997 case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8;
3998 goto do_SseAssistedBinary;
3999 case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16;
4000 goto do_SseAssistedBinary;
4001 case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16;
4002 goto do_SseAssistedBinary;
4003 case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2;
4004 goto do_SseAssistedBinary;
4005 case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2;
4006 goto do_SseAssistedBinary;
4007 do_SseAssistedBinary: {
4008 /* RRRufff! RRRufff code is what we're generating here. Oh
4009 well. */
4010 vassert(fn != 0);
4011 HReg dstHi = newVRegV(env);
4012 HReg dstLo = newVRegV(env);
4013 HReg argLhi, argLlo, argRhi, argRlo;
4014 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4015 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4016 HReg argp = newVRegI(env);
4017 /* subq $160, %rsp -- make a space*/
4018 sub_from_rsp(env, 160);
4019 /* leaq 48(%rsp), %r_argp -- point into it */
4020 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4021 argp));
4022 /* andq $-16, %r_argp -- 16-align the pointer */
4023 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4024 AMD64RMI_Imm( ~(UInt)15 ),
4025 argp));
4026 /* Prepare 3 arg regs:
4027 leaq 0(%r_argp), %rdi
4028 leaq 16(%r_argp), %rsi
4029 leaq 32(%r_argp), %rdx
4030 */
4031 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4032 hregAMD64_RDI()));
4033 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
4034 hregAMD64_RSI()));
4035 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4036 hregAMD64_RDX()));
4037 /* Store the two high args, at (%rsi) and (%rdx):
4038 movupd %argLhi, 0(%rsi)
4039 movupd %argRhi, 0(%rdx)
4040 */
4041 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4042 AMD64AMode_IR(0, hregAMD64_RSI())));
4043 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4044 AMD64AMode_IR(0, hregAMD64_RDX())));
4045 /* Store the two low args, at 48(%rsi) and 48(%rdx):
4046 movupd %argLlo, 48(%rsi)
4047 movupd %argRlo, 48(%rdx)
4048 */
4049 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4050 AMD64AMode_IR(48, hregAMD64_RSI())));
4051 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4052 AMD64AMode_IR(48, hregAMD64_RDX())));
4053 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004054 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4055 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004056 /* Prepare 3 arg regs:
4057 leaq 48(%r_argp), %rdi
4058 leaq 64(%r_argp), %rsi
4059 leaq 80(%r_argp), %rdx
4060 */
4061 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp),
4062 hregAMD64_RDI()));
4063 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4064 hregAMD64_RSI()));
4065 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp),
4066 hregAMD64_RDX()));
4067 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004068 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4069 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004070 /* fetch the result from memory, using %r_argp, which the
4071 register allocator will keep alive across the call. */
4072 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4073 AMD64AMode_IR(0, argp)));
4074 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4075 AMD64AMode_IR(48, argp)));
4076 /* and finally, clear the space */
4077 add_to_rsp(env, 160);
4078 *rHi = dstHi;
4079 *rLo = dstLo;
4080 return;
4081 }
4082
4083 case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8;
4084 goto do_SseAssistedBinary256;
4085 do_SseAssistedBinary256: {
4086 /* RRRufff! RRRufff code is what we're generating here. Oh
4087 well. */
4088 vassert(fn != 0);
4089 HReg dstHi = newVRegV(env);
4090 HReg dstLo = newVRegV(env);
4091 HReg argLhi, argLlo, argRhi, argRlo;
4092 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4093 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4094 HReg argp = newVRegI(env);
4095 /* subq $160, %rsp -- make a space*/
4096 sub_from_rsp(env, 160);
4097 /* leaq 48(%rsp), %r_argp -- point into it */
4098 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4099 argp));
4100 /* andq $-16, %r_argp -- 16-align the pointer */
4101 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4102 AMD64RMI_Imm( ~(UInt)15 ),
4103 argp));
4104 /* Prepare 3 arg regs:
4105 leaq 0(%r_argp), %rdi
4106 leaq 32(%r_argp), %rsi
4107 leaq 64(%r_argp), %rdx
4108 */
4109 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4110 hregAMD64_RDI()));
4111 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4112 hregAMD64_RSI()));
4113 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4114 hregAMD64_RDX()));
4115 /* Store the two args, at (%rsi) and (%rdx):
4116 movupd %argLlo, 0(%rsi)
4117 movupd %argLhi, 16(%rsi)
4118 movupd %argRlo, 0(%rdx)
4119 movupd %argRhi, 16(%rdx)
4120 */
4121 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4122 AMD64AMode_IR(0, hregAMD64_RSI())));
4123 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4124 AMD64AMode_IR(16, hregAMD64_RSI())));
4125 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4126 AMD64AMode_IR(0, hregAMD64_RDX())));
4127 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4128 AMD64AMode_IR(16, hregAMD64_RDX())));
4129 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004130 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4131 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004132 /* fetch the result from memory, using %r_argp, which the
4133 register allocator will keep alive across the call. */
4134 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4135 AMD64AMode_IR(0, argp)));
4136 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4137 AMD64AMode_IR(16, argp)));
4138 /* and finally, clear the space */
4139 add_to_rsp(env, 160);
4140 *rHi = dstHi;
4141 *rLo = dstLo;
4142 return;
4143 }
4144
sewardj56c30312012-06-12 08:45:39 +00004145 default:
4146 break;
4147 } /* switch (e->Iex.Binop.op) */
4148 } /* if (e->tag == Iex_Binop) */
4149
florian96d7cc32012-06-01 20:41:24 +00004150 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) {
sewardjc4530ae2012-05-21 10:18:49 +00004151 HReg rsp = hregAMD64_RSP();
4152 HReg vHi = newVRegV(env);
4153 HReg vLo = newVRegV(env);
4154 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
4155 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
4156 /* arg1 is the most significant (Q3), arg4 the least (Q0) */
4157 /* Get all the args into regs, before messing with the stack. */
florian96d7cc32012-06-01 20:41:24 +00004158 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1);
4159 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2);
4160 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3);
4161 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4);
sewardjc4530ae2012-05-21 10:18:49 +00004162 /* less significant lane (Q2) at the lower address (-16(rsp)) */
4163 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp));
4164 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp));
4165 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp));
4166 /* and then the lower half .. */
4167 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp));
4168 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp));
4169 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp));
4170 *rHi = vHi;
4171 *rLo = vLo;
4172 return;
4173 }
4174
sewardjcc3d2192013-03-27 11:37:33 +00004175 if (e->tag == Iex_ITE) {
4176 HReg r1Hi, r1Lo, r0Hi, r0Lo;
4177 iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue);
4178 iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse);
4179 HReg dstHi = newVRegV(env);
4180 HReg dstLo = newVRegV(env);
4181 addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi));
4182 addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo));
4183 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
4184 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi));
4185 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo));
4186 *rHi = dstHi;
4187 *rLo = dstLo;
4188 return;
4189 }
4190
sewardjc4530ae2012-05-21 10:18:49 +00004191 //avx_fail:
4192 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n",
4193 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
4194 ppIRExpr(e);
4195 vpanic("iselDVecExpr_wrk");
4196}
4197
4198
4199/*---------------------------------------------------------*/
sewardjc33671d2005-02-01 20:30:00 +00004200/*--- ISEL: Statements ---*/
4201/*---------------------------------------------------------*/
4202
4203static void iselStmt ( ISelEnv* env, IRStmt* stmt )
4204{
4205 if (vex_traceflags & VEX_TRACE_VCODE) {
4206 vex_printf("\n-- ");
4207 ppIRStmt(stmt);
4208 vex_printf("\n");
4209 }
4210
4211 switch (stmt->tag) {
4212
sewardj05b3b6a2005-02-04 01:44:33 +00004213 /* --------- STORE --------- */
sewardjaf1ceca2005-06-30 23:31:27 +00004214 case Ist_Store: {
sewardje9d8a262009-07-01 08:06:34 +00004215 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
4216 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
4217 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00004218
sewardje768e922009-11-26 17:17:37 +00004219 if (tya != Ity_I64 || end != Iend_LE)
sewardjaf1ceca2005-06-30 23:31:27 +00004220 goto stmt_fail;
4221
sewardj31191072005-02-05 18:24:47 +00004222 if (tyd == Ity_I64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004223 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004224 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
sewardj31191072005-02-05 18:24:47 +00004225 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
4226 return;
4227 }
sewardj05b3b6a2005-02-04 01:44:33 +00004228 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004229 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004230 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
sewardj428fabd2005-03-21 03:11:17 +00004231 addInstr(env, AMD64Instr_Store(
4232 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
4233 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00004234 return;
4235 }
sewardj8d965312005-02-25 02:48:47 +00004236 if (tyd == Ity_F64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004237 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004238 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
sewardj8d965312005-02-25 02:48:47 +00004239 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
4240 return;
4241 }
sewardjc49ce232005-02-25 13:03:03 +00004242 if (tyd == Ity_F32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004243 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004244 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
sewardjc49ce232005-02-25 13:03:03 +00004245 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
4246 return;
4247 }
sewardj0852a132005-02-21 08:28:46 +00004248 if (tyd == Ity_V128) {
sewardjbf0d86c2007-11-26 23:18:52 +00004249 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004250 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
sewardj18303862005-02-21 12:36:54 +00004251 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00004252 return;
4253 }
sewardjc4530ae2012-05-21 10:18:49 +00004254 if (tyd == Ity_V256) {
4255 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
4256 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
4257 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
4258 HReg vHi, vLo;
4259 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data);
4260 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4261 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4262 return;
4263 }
sewardj0852a132005-02-21 08:28:46 +00004264 break;
sewardj05b3b6a2005-02-04 01:44:33 +00004265 }
sewardjf67eadf2005-02-03 03:53:52 +00004266
4267 /* --------- PUT --------- */
4268 case Ist_Put: {
4269 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4270 if (ty == Ity_I64) {
4271 /* We're going to write to memory, so compute the RHS into an
4272 AMD64RI. */
4273 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
4274 addInstr(env,
4275 AMD64Instr_Alu64M(
4276 Aalu_MOV,
4277 ri,
4278 AMD64AMode_IR(stmt->Ist.Put.offset,
4279 hregAMD64_RBP())
4280 ));
4281 return;
4282 }
sewardjf67eadf2005-02-03 03:53:52 +00004283 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
4284 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4285 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00004286 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00004287 r,
4288 AMD64AMode_IR(stmt->Ist.Put.offset,
4289 hregAMD64_RBP())));
4290 return;
4291 }
sewardj8d965312005-02-25 02:48:47 +00004292 if (ty == Ity_F32) {
4293 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4294 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
4295 set_SSE_rounding_default(env); /* paranoia */
4296 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
4297 return;
4298 }
sewardj1a01e652005-02-23 11:39:21 +00004299 if (ty == Ity_F64) {
4300 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4301 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
4302 hregAMD64_RBP() );
4303 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
4304 return;
4305 }
sewardjc4530ae2012-05-21 10:18:49 +00004306 if (ty == Ity_V128) {
4307 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4308 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
4309 hregAMD64_RBP());
4310 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
4311 return;
4312 }
4313 if (ty == Ity_V256) {
4314 HReg vHi, vLo;
4315 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data);
4316 HReg rbp = hregAMD64_RBP();
4317 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp);
4318 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp);
4319 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4320 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4321 return;
4322 }
sewardjf67eadf2005-02-03 03:53:52 +00004323 break;
4324 }
4325
sewardj8d965312005-02-25 02:48:47 +00004326 /* --------- Indexed PUT --------- */
4327 case Ist_PutI: {
floriand6f38b32012-05-31 15:46:18 +00004328 IRPutI *puti = stmt->Ist.PutI.details;
4329
sewardj8d965312005-02-25 02:48:47 +00004330 AMD64AMode* am
4331 = genGuestArrayOffset(
floriand6f38b32012-05-31 15:46:18 +00004332 env, puti->descr,
4333 puti->ix, puti->bias );
sewardj8d965312005-02-25 02:48:47 +00004334
floriand6f38b32012-05-31 15:46:18 +00004335 IRType ty = typeOfIRExpr(env->type_env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004336 if (ty == Ity_F64) {
floriand6f38b32012-05-31 15:46:18 +00004337 HReg val = iselDblExpr(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004338 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
4339 return;
4340 }
4341 if (ty == Ity_I8) {
floriand6f38b32012-05-31 15:46:18 +00004342 HReg r = iselIntExpr_R(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004343 addInstr(env, AMD64Instr_Store( 1, r, am ));
4344 return;
4345 }
sewardj1e015d82005-04-23 23:41:46 +00004346 if (ty == Ity_I64) {
floriand6f38b32012-05-31 15:46:18 +00004347 AMD64RI* ri = iselIntExpr_RI(env, puti->data);
sewardj1e015d82005-04-23 23:41:46 +00004348 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
4349 return;
4350 }
sewardj8d965312005-02-25 02:48:47 +00004351 break;
4352 }
sewardj614b3fb2005-02-02 02:16:03 +00004353
4354 /* --------- TMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +00004355 case Ist_WrTmp: {
4356 IRTemp tmp = stmt->Ist.WrTmp.tmp;
sewardj614b3fb2005-02-02 02:16:03 +00004357 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj6ce1a232007-03-31 19:12:38 +00004358
4359 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
4360 compute it into an AMode and then use LEA. This usually
4361 produces fewer instructions, often because (for memcheck
4362 created IR) we get t = address-expression, (t is later used
4363 twice) and so doing this naturally turns address-expression
4364 back into an AMD64 amode. */
4365 if (ty == Ity_I64
4366 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4367 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
4368 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4369 HReg dst = lookupIRTemp(env, tmp);
4370 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
4371 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4372 value into a register. Just emit a normal reg-reg move
4373 so reg-alloc can coalesce it away in the usual way. */
4374 HReg src = am->Aam.IR.reg;
4375 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
4376 } else {
4377 addInstr(env, AMD64Instr_Lea64(am,dst));
4378 }
4379 return;
4380 }
4381
sewardj9b967672005-02-08 11:13:09 +00004382 if (ty == Ity_I64 || ty == Ity_I32
4383 || ty == Ity_I16 || ty == Ity_I8) {
sewardjdd40fdf2006-12-24 02:20:24 +00004384 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
sewardj614b3fb2005-02-02 02:16:03 +00004385 HReg dst = lookupIRTemp(env, tmp);
4386 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
4387 return;
4388 }
sewardj9b967672005-02-08 11:13:09 +00004389 if (ty == Ity_I128) {
4390 HReg rHi, rLo, dstHi, dstLo;
sewardjdd40fdf2006-12-24 02:20:24 +00004391 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
sewardjc4530ae2012-05-21 10:18:49 +00004392 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
sewardj9b967672005-02-08 11:13:09 +00004393 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4394 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4395 return;
4396 }
sewardja5bd0af2005-03-24 20:40:12 +00004397 if (ty == Ity_I1) {
sewardjdd40fdf2006-12-24 02:20:24 +00004398 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
sewardja5bd0af2005-03-24 20:40:12 +00004399 HReg dst = lookupIRTemp(env, tmp);
4400 addInstr(env, AMD64Instr_Set64(cond, dst));
4401 return;
4402 }
sewardj18303862005-02-21 12:36:54 +00004403 if (ty == Ity_F64) {
4404 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004405 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004406 addInstr(env, mk_vMOVsd_RR(src, dst));
4407 return;
4408 }
sewardjc49ce232005-02-25 13:03:03 +00004409 if (ty == Ity_F32) {
4410 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004411 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
sewardjc49ce232005-02-25 13:03:03 +00004412 addInstr(env, mk_vMOVsd_RR(src, dst));
4413 return;
4414 }
sewardj0852a132005-02-21 08:28:46 +00004415 if (ty == Ity_V128) {
4416 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004417 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004418 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00004419 return;
4420 }
sewardjc4530ae2012-05-21 10:18:49 +00004421 if (ty == Ity_V256) {
4422 HReg rHi, rLo, dstHi, dstLo;
4423 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4424 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4425 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) );
4426 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) );
4427 return;
4428 }
sewardj614b3fb2005-02-02 02:16:03 +00004429 break;
4430 }
4431
sewardjd0a12df2005-02-10 02:07:43 +00004432 /* --------- Call to DIRTY helper --------- */
4433 case Ist_Dirty: {
sewardjd0a12df2005-02-10 02:07:43 +00004434 IRDirty* d = stmt->Ist.Dirty.details;
sewardjd0a12df2005-02-10 02:07:43 +00004435
sewardjcfe046e2013-01-17 14:23:53 +00004436 /* Figure out the return type, if any. */
4437 IRType retty = Ity_INVALID;
4438 if (d->tmp != IRTemp_INVALID)
4439 retty = typeOfIRTemp(env->type_env, d->tmp);
4440
sewardj74142b82013-08-08 10:28:59 +00004441 /* Throw out any return types we don't know about. */
4442 Bool retty_ok = False;
sewardjcfe046e2013-01-17 14:23:53 +00004443 switch (retty) {
4444 case Ity_INVALID: /* function doesn't return anything */
sewardj74142b82013-08-08 10:28:59 +00004445 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
4446 case Ity_V128:
4447 retty_ok = True; break;
sewardjcfe046e2013-01-17 14:23:53 +00004448 default:
4449 break;
4450 }
sewardj74142b82013-08-08 10:28:59 +00004451 if (!retty_ok)
sewardjcfe046e2013-01-17 14:23:53 +00004452 break; /* will go to stmt_fail: */
4453
sewardj74142b82013-08-08 10:28:59 +00004454 /* Marshal args, do the call, and set the return value to
4455 0x555..555 if this is a conditional call that returns a value
4456 and the call is skipped. */
4457 UInt addToSp = 0;
4458 RetLoc rloc = mk_RetLoc_INVALID();
4459 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4460 vassert(is_sane_RetLoc(rloc));
sewardjd0a12df2005-02-10 02:07:43 +00004461
4462 /* Now figure out what to do with the returned value, if any. */
sewardj74142b82013-08-08 10:28:59 +00004463 switch (retty) {
4464 case Ity_INVALID: {
4465 /* No return value. Nothing to do. */
4466 vassert(d->tmp == IRTemp_INVALID);
4467 vassert(rloc.pri == RLPri_None);
4468 vassert(addToSp == 0);
4469 return;
4470 }
4471 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4472 /* The returned value is in %rax. Park it in the register
4473 associated with tmp. */
4474 vassert(rloc.pri == RLPri_Int);
4475 vassert(addToSp == 0);
4476 HReg dst = lookupIRTemp(env, d->tmp);
4477 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
4478 return;
4479 }
4480 case Ity_V128: {
4481 /* The returned value is on the stack, and *retloc tells
4482 us where. Fish it off the stack and then move the
4483 stack pointer upwards to clear it, as directed by
4484 doHelperCall. */
4485 vassert(rloc.pri == RLPri_V128SpRel);
4486 vassert(addToSp >= 16);
4487 HReg dst = lookupIRTemp(env, d->tmp);
4488 AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4489 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
4490 add_to_rsp(env, addToSp);
4491 return;
4492 }
4493 default:
4494 /*NOTREACHED*/
4495 vassert(0);
sewardjd0a12df2005-02-10 02:07:43 +00004496 }
4497 break;
4498 }
4499
4500 /* --------- MEM FENCE --------- */
sewardjc4356f02007-11-09 21:15:04 +00004501 case Ist_MBE:
4502 switch (stmt->Ist.MBE.event) {
4503 case Imbe_Fence:
4504 addInstr(env, AMD64Instr_MFence());
4505 return;
sewardjc4356f02007-11-09 21:15:04 +00004506 default:
4507 break;
4508 }
4509 break;
sewardjf8c37f72005-02-07 18:55:29 +00004510
sewardje9d8a262009-07-01 08:06:34 +00004511 /* --------- ACAS --------- */
4512 case Ist_CAS:
4513 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4514 /* "normal" singleton CAS */
4515 UChar sz;
4516 IRCAS* cas = stmt->Ist.CAS.details;
4517 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4518 /* get: cas->expd into %rax, and cas->data into %rbx */
4519 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4520 HReg rData = iselIntExpr_R(env, cas->dataLo);
4521 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4522 HReg rOld = lookupIRTemp(env, cas->oldLo);
4523 vassert(cas->expdHi == NULL);
4524 vassert(cas->dataHi == NULL);
4525 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
4526 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
4527 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
4528 switch (ty) {
4529 case Ity_I64: sz = 8; break;
4530 case Ity_I32: sz = 4; break;
4531 case Ity_I16: sz = 2; break;
4532 case Ity_I8: sz = 1; break;
4533 default: goto unhandled_cas;
4534 }
4535 addInstr(env, AMD64Instr_ACAS(am, sz));
4536 addInstr(env, AMD64Instr_CMov64(
4537 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
4538 return;
4539 } else {
4540 /* double CAS */
4541 UChar sz;
4542 IRCAS* cas = stmt->Ist.CAS.details;
4543 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4544 /* only 32-bit and 64-bit allowed in this case */
4545 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4546 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4547 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4548 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4549 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4550 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4551 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4552 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4553 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4554 switch (ty) {
4555 case Ity_I64:
4556 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
4557 goto unhandled_cas; /* we'd have to generate
4558 cmpxchg16b, but the host
4559 doesn't support that */
4560 sz = 8;
4561 break;
4562 case Ity_I32:
4563 sz = 4;
4564 break;
4565 default:
4566 goto unhandled_cas;
4567 }
4568 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4569 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4570 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
4571 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
4572 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
4573 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
4574 addInstr(env, AMD64Instr_DACAS(am, sz));
4575 addInstr(env,
4576 AMD64Instr_CMov64(
4577 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
4578 addInstr(env,
4579 AMD64Instr_CMov64(
4580 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
4581 return;
4582 }
4583 unhandled_cas:
4584 break;
4585
sewardjd20b2902005-03-22 00:15:00 +00004586 /* --------- INSTR MARK --------- */
4587 /* Doesn't generate any executable code ... */
4588 case Ist_IMark:
4589 return;
4590
sewardj5a9ffab2005-05-12 17:55:01 +00004591 /* --------- ABI HINT --------- */
4592 /* These have no meaning (denotation in the IR) and so we ignore
4593 them ... if any actually made it this far. */
4594 case Ist_AbiHint:
4595 return;
4596
sewardjd20b2902005-03-22 00:15:00 +00004597 /* --------- NO-OP --------- */
4598 case Ist_NoOp:
4599 return;
4600
sewardjf8c37f72005-02-07 18:55:29 +00004601 /* --------- EXIT --------- */
4602 case Ist_Exit: {
sewardjf8c37f72005-02-07 18:55:29 +00004603 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4604 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00004605
4606 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4607 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
4608 hregAMD64_RBP());
4609
4610 /* Case: boring transfer to known address */
4611 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4612 if (env->chainingAllowed) {
4613 /* .. almost always true .. */
4614 /* Skip the event check at the dst if this is a forwards
4615 edge. */
4616 Bool toFastEP
4617 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4618 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4619 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4620 amRIP, cc, toFastEP));
4621 } else {
4622 /* .. very occasionally .. */
4623 /* We can't use chaining, so ask for an assisted transfer,
4624 as that's the only alternative that is allowable. */
4625 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4626 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
4627 }
4628 return;
4629 }
4630
4631 /* Case: assisted transfer to arbitrary address */
4632 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004633 /* Keep this list in sync with that in iselNext below */
4634 case Ijk_ClientReq:
4635 case Ijk_EmWarn:
4636 case Ijk_NoDecode:
4637 case Ijk_NoRedir:
4638 case Ijk_SigSEGV:
4639 case Ijk_SigTRAP:
4640 case Ijk_Sys_syscall:
4641 case Ijk_TInval:
4642 case Ijk_Yield:
4643 {
sewardjc6f970f2012-04-02 21:54:49 +00004644 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4645 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
4646 return;
4647 }
4648 default:
4649 break;
4650 }
4651
4652 /* Do we ever expect to see any other kind? */
4653 goto stmt_fail;
sewardjf8c37f72005-02-07 18:55:29 +00004654 }
sewardjc33671d2005-02-01 20:30:00 +00004655
4656 default: break;
4657 }
sewardjaf1ceca2005-06-30 23:31:27 +00004658 stmt_fail:
sewardjc33671d2005-02-01 20:30:00 +00004659 ppIRStmt(stmt);
4660 vpanic("iselStmt(amd64)");
4661}
4662
4663
4664/*---------------------------------------------------------*/
4665/*--- ISEL: Basic block terminators (Nexts) ---*/
4666/*---------------------------------------------------------*/
4667
sewardjc6f970f2012-04-02 21:54:49 +00004668static void iselNext ( ISelEnv* env,
4669 IRExpr* next, IRJumpKind jk, Int offsIP )
sewardjf67eadf2005-02-03 03:53:52 +00004670{
sewardjf67eadf2005-02-03 03:53:52 +00004671 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00004672 vex_printf( "\n-- PUT(%d) = ", offsIP);
4673 ppIRExpr( next );
4674 vex_printf( "; exit-");
sewardjf67eadf2005-02-03 03:53:52 +00004675 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00004676 vex_printf( "\n");
sewardjf67eadf2005-02-03 03:53:52 +00004677 }
sewardjc6f970f2012-04-02 21:54:49 +00004678
4679 /* Case: boring transfer to known address */
4680 if (next->tag == Iex_Const) {
4681 IRConst* cdst = next->Iex.Const.con;
4682 vassert(cdst->tag == Ico_U64);
4683 if (jk == Ijk_Boring || jk == Ijk_Call) {
4684 /* Boring transfer to known address */
4685 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4686 if (env->chainingAllowed) {
4687 /* .. almost always true .. */
4688 /* Skip the event check at the dst if this is a forwards
4689 edge. */
4690 Bool toFastEP
4691 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4692 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4693 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
4694 amRIP, Acc_ALWAYS,
4695 toFastEP));
4696 } else {
4697 /* .. very occasionally .. */
4698 /* We can't use chaining, so ask for an indirect transfer,
4699 as that's the cheapest alternative that is
4700 allowable. */
4701 HReg r = iselIntExpr_R(env, next);
4702 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4703 Ijk_Boring));
4704 }
4705 return;
4706 }
4707 }
4708
4709 /* Case: call/return (==boring) transfer to any address */
4710 switch (jk) {
4711 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4712 HReg r = iselIntExpr_R(env, next);
4713 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4714 if (env->chainingAllowed) {
4715 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
4716 } else {
4717 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4718 Ijk_Boring));
4719 }
4720 return;
4721 }
4722 default:
4723 break;
4724 }
4725
sewardj2f6902b2012-04-23 09:48:14 +00004726 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00004727 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004728 /* Keep this list in sync with that for Ist_Exit above */
4729 case Ijk_ClientReq:
4730 case Ijk_EmWarn:
sewardj3d0e38e2012-04-21 07:38:29 +00004731 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00004732 case Ijk_NoRedir:
4733 case Ijk_SigSEGV:
4734 case Ijk_SigTRAP:
4735 case Ijk_Sys_syscall:
4736 case Ijk_TInval:
4737 case Ijk_Yield: {
sewardjc6f970f2012-04-02 21:54:49 +00004738 HReg r = iselIntExpr_R(env, next);
4739 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4740 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
4741 return;
4742 }
4743 default:
4744 break;
4745 }
4746
4747 vex_printf( "\n-- PUT(%d) = ", offsIP);
4748 ppIRExpr( next );
4749 vex_printf( "; exit-");
4750 ppIRJumpKind(jk);
4751 vex_printf( "\n");
4752 vassert(0); // are we expecting any other kind?
sewardjc33671d2005-02-01 20:30:00 +00004753}
4754
4755
4756/*---------------------------------------------------------*/
4757/*--- Insn selector top-level ---*/
4758/*---------------------------------------------------------*/
4759
sewardjdd40fdf2006-12-24 02:20:24 +00004760/* Translate an entire SB to amd64 code. */
sewardjc33671d2005-02-01 20:30:00 +00004761
sewardjc6f970f2012-04-02 21:54:49 +00004762HInstrArray* iselSB_AMD64 ( IRSB* bb,
4763 VexArch arch_host,
4764 VexArchInfo* archinfo_host,
4765 VexAbiInfo* vbi/*UNUSED*/,
4766 Int offs_Host_EvC_Counter,
4767 Int offs_Host_EvC_FailAddr,
4768 Bool chainingAllowed,
4769 Bool addProfInc,
4770 Addr64 max_ga )
sewardjc33671d2005-02-01 20:30:00 +00004771{
sewardjc6f970f2012-04-02 21:54:49 +00004772 Int i, j;
4773 HReg hreg, hregHI;
4774 ISelEnv* env;
4775 UInt hwcaps_host = archinfo_host->hwcaps;
4776 AMD64AMode *amCounter, *amFailAddr;
sewardjc33671d2005-02-01 20:30:00 +00004777
4778 /* sanity ... */
sewardj8f073592006-05-01 02:14:17 +00004779 vassert(arch_host == VexArchAMD64);
sewardj536fbab2010-07-29 15:39:05 +00004780 vassert(0 == (hwcaps_host
4781 & ~(VEX_HWCAPS_AMD64_SSE3
4782 | VEX_HWCAPS_AMD64_CX16
sewardjf350a422012-04-26 14:16:52 +00004783 | VEX_HWCAPS_AMD64_LZCNT
sewardj818c7302013-03-26 13:53:18 +00004784 | VEX_HWCAPS_AMD64_AVX
sewardjcc3d2192013-03-27 11:37:33 +00004785 | VEX_HWCAPS_AMD64_RDTSCP
4786 | VEX_HWCAPS_AMD64_BMI
4787 | VEX_HWCAPS_AMD64_AVX2)));
sewardjc33671d2005-02-01 20:30:00 +00004788
4789 /* Make up an initial environment to use. */
sewardj9a036bf2005-03-14 18:19:08 +00004790 env = LibVEX_Alloc(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00004791 env->vreg_ctr = 0;
4792
4793 /* Set up output code array. */
4794 env->code = newHInstrArray();
4795
4796 /* Copy BB's type env. */
4797 env->type_env = bb->tyenv;
4798
4799 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4800 change as we go along. */
4801 env->n_vregmap = bb->tyenv->types_used;
4802 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardj9b967672005-02-08 11:13:09 +00004803 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00004804
4805 /* and finally ... */
sewardjc6f970f2012-04-02 21:54:49 +00004806 env->chainingAllowed = chainingAllowed;
4807 env->hwcaps = hwcaps_host;
4808 env->max_ga = max_ga;
sewardjc33671d2005-02-01 20:30:00 +00004809
4810 /* For each IR temporary, allocate a suitably-kinded virtual
4811 register. */
4812 j = 0;
4813 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00004814 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00004815 switch (bb->tyenv->types[i]) {
4816 case Ity_I1:
sewardjc4530ae2012-05-21 10:18:49 +00004817 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4818 hreg = mkHReg(j++, HRcInt64, True);
4819 break;
4820 case Ity_I128:
4821 hreg = mkHReg(j++, HRcInt64, True);
4822 hregHI = mkHReg(j++, HRcInt64, True);
4823 break;
sewardjc33671d2005-02-01 20:30:00 +00004824 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00004825 case Ity_F64:
sewardjc4530ae2012-05-21 10:18:49 +00004826 case Ity_V128:
4827 hreg = mkHReg(j++, HRcVec128, True);
4828 break;
4829 case Ity_V256:
4830 hreg = mkHReg(j++, HRcVec128, True);
4831 hregHI = mkHReg(j++, HRcVec128, True);
4832 break;
4833 default:
4834 ppIRType(bb->tyenv->types[i]);
4835 vpanic("iselBB(amd64): IRTemp type");
sewardjc33671d2005-02-01 20:30:00 +00004836 }
4837 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00004838 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00004839 }
4840 env->vreg_ctr = j;
4841
sewardjc6f970f2012-04-02 21:54:49 +00004842 /* The very first instruction must be an event check. */
4843 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
4844 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
4845 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
4846
4847 /* Possibly a block counter increment (for profiling). At this
4848 point we don't know the address of the counter, so just pretend
4849 it is zero. It will have to be patched later, but before this
4850 translation is used, by a call to LibVEX_patchProfCtr. */
4851 if (addProfInc) {
4852 addInstr(env, AMD64Instr_ProfInc());
4853 }
4854
sewardjc33671d2005-02-01 20:30:00 +00004855 /* Ok, finally we can iterate over the statements. */
4856 for (i = 0; i < bb->stmts_used; i++)
4857 if (bb->stmts[i])
sewardjc6f970f2012-04-02 21:54:49 +00004858 iselStmt(env, bb->stmts[i]);
sewardjc33671d2005-02-01 20:30:00 +00004859
sewardjc6f970f2012-04-02 21:54:49 +00004860 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardjc33671d2005-02-01 20:30:00 +00004861
4862 /* record the number of vregs we used. */
4863 env->code->n_vregs = env->vreg_ctr;
4864 return env->code;
4865}
sewardja3e98302005-02-01 15:55:05 +00004866
4867
4868/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00004869/*--- end host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004870/*---------------------------------------------------------------*/