blob: 871ceb25f5f446807376d4067e7d49d9aecefb88 [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2004-2013 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
sewardj69d98e32010-06-18 08:17:41 +000045#include "host_generic_simd128.h"
sewardjcc3d2192013-03-27 11:37:33 +000046#include "host_generic_simd256.h"
47#include "host_generic_maddf.h"
sewardjcef7d3e2009-07-02 12:21:59 +000048#include "host_amd64_defs.h"
sewardj1a01e652005-02-23 11:39:21 +000049
50
51/*---------------------------------------------------------*/
52/*--- x87/SSE control word stuff ---*/
53/*---------------------------------------------------------*/
54
55/* Vex-generated code expects to run with the FPU set as follows: all
56 exceptions masked, round-to-nearest, precision = 53 bits. This
57 corresponds to a FPU control word value of 0x027F.
58
59 Similarly the SSE control word (%mxcsr) should be 0x1F80.
60
61 %fpucw and %mxcsr should have these values on entry to
62 Vex-generated code, and should those values should be
63 unchanged at exit.
64*/
65
66#define DEFAULT_FPUCW 0x027F
67
68#define DEFAULT_MXCSR 0x1F80
69
70/* debugging only, do not use */
71/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000072
73
74/*---------------------------------------------------------*/
75/*--- misc helpers ---*/
76/*---------------------------------------------------------*/
77
78/* These are duplicated in guest-amd64/toIR.c */
79static IRExpr* unop ( IROp op, IRExpr* a )
80{
81 return IRExpr_Unop(op, a);
82}
83
84static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
85{
86 return IRExpr_Binop(op, a1, a2);
87}
88
sewardj05b3b6a2005-02-04 01:44:33 +000089static IRExpr* bind ( Int binder )
90{
91 return IRExpr_Binder(binder);
92}
sewardjc33671d2005-02-01 20:30:00 +000093
sewardj009230b2013-01-26 11:47:55 +000094static Bool isZeroU8 ( IRExpr* e )
95{
96 return e->tag == Iex_Const
97 && e->Iex.Const.con->tag == Ico_U8
98 && e->Iex.Const.con->Ico.U8 == 0;
99}
100
sewardjc33671d2005-02-01 20:30:00 +0000101
sewardjc33671d2005-02-01 20:30:00 +0000102/*---------------------------------------------------------*/
103/*--- ISelEnv ---*/
104/*---------------------------------------------------------*/
105
106/* This carries around:
107
108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
109 might encounter. This is computed before insn selection starts,
110 and does not change.
111
112 - A mapping from IRTemp to HReg. This tells the insn selector
113 which virtual register is associated with each IRTemp
114 temporary. This is computed before insn selection starts, and
115 does not change. We expect this mapping to map precisely the
116 same set of IRTemps as the type mapping does.
117
sewardj9b967672005-02-08 11:13:09 +0000118 - vregmap holds the primary register for the IRTemp.
119 - vregmapHI is only used for 128-bit integer-typed
120 IRTemps. It holds the identity of a second
121 64-bit virtual HReg, which holds the high half
122 of the value.
123
sewardjc6f970f2012-04-02 21:54:49 +0000124 - The host subarchitecture we are selecting insns for.
125 This is set at the start and does not change.
126
sewardjc33671d2005-02-01 20:30:00 +0000127 - The code array, that is, the insns selected so far.
128
129 - A counter, for generating new virtual registers.
130
sewardjc6f970f2012-04-02 21:54:49 +0000131 - A Bool for indicating whether we may generate chain-me
132 instructions for control flow transfers, or whether we must use
133 XAssisted.
134
135 - The maximum guest address of any guest insn in this block.
136 Actually, the address of the highest-addressed byte from any insn
137 in this block. Is set at the start and does not change. This is
138 used for detecting jumps which are definitely forward-edges from
139 this block, and therefore can be made (chained) to the fast entry
140 point of the destination, thereby avoiding the destination's
141 event check.
sewardjc33671d2005-02-01 20:30:00 +0000142
143 Note, this is all host-independent. (JRS 20050201: well, kinda
144 ... not completely. Compare with ISelEnv for X86.)
145*/
146
147typedef
148 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000149 /* Constant -- are set at the start and do not change. */
sewardjc33671d2005-02-01 20:30:00 +0000150 IRTypeEnv* type_env;
151
152 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000153 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000154 Int n_vregmap;
155
sewardj5117ce12006-01-27 21:20:15 +0000156 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000157
158 Bool chainingAllowed;
159 Addr64 max_ga;
160
161 /* These are modified as we go along. */
162 HInstrArray* code;
163 Int vreg_ctr;
sewardjc33671d2005-02-01 20:30:00 +0000164 }
165 ISelEnv;
166
167
168static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
169{
170 vassert(tmp >= 0);
171 vassert(tmp < env->n_vregmap);
172 return env->vregmap[tmp];
173}
174
sewardjc4530ae2012-05-21 10:18:49 +0000175static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
176 ISelEnv* env, IRTemp tmp )
sewardj9b967672005-02-08 11:13:09 +0000177{
178 vassert(tmp >= 0);
179 vassert(tmp < env->n_vregmap);
florian79efdc62013-02-11 00:47:35 +0000180 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
sewardj9b967672005-02-08 11:13:09 +0000181 *vrLO = env->vregmap[tmp];
182 *vrHI = env->vregmapHI[tmp];
183}
sewardj614b3fb2005-02-02 02:16:03 +0000184
185static void addInstr ( ISelEnv* env, AMD64Instr* instr )
186{
187 addHInstr(env->code, instr);
188 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjf355f6b2006-08-16 00:23:21 +0000189 ppAMD64Instr(instr, True);
sewardj614b3fb2005-02-02 02:16:03 +0000190 vex_printf("\n");
191 }
192}
193
sewardj8258a8c2005-02-02 03:11:24 +0000194static HReg newVRegI ( ISelEnv* env )
195{
196 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
197 env->vreg_ctr++;
198 return reg;
199}
200
sewardj0852a132005-02-21 08:28:46 +0000201static HReg newVRegV ( ISelEnv* env )
202{
203 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
204 env->vreg_ctr++;
205 return reg;
206}
sewardj614b3fb2005-02-02 02:16:03 +0000207
208
209/*---------------------------------------------------------*/
210/*--- ISEL: Forward declarations ---*/
211/*---------------------------------------------------------*/
212
213/* These are organised as iselXXX and iselXXX_wrk pairs. The
214 iselXXX_wrk do the real work, but are not to be called directly.
215 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
216 checks that all returned registers are virtual. You should not
217 call the _wrk version directly.
218*/
219static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
220static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
221
222static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
223static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
224
225static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
226static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
227
228static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
230
231static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
232static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
233
sewardjc4530ae2012-05-21 10:18:49 +0000234static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000235 ISelEnv* env, IRExpr* e );
sewardjc4530ae2012-05-21 10:18:49 +0000236static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000237 ISelEnv* env, IRExpr* e );
238
sewardj614b3fb2005-02-02 02:16:03 +0000239static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
240static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
241
sewardj18303862005-02-21 12:36:54 +0000242static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000244
sewardj8d965312005-02-25 02:48:47 +0000245static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
246static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000247
sewardj0852a132005-02-21 08:28:46 +0000248static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000250
sewardjc4530ae2012-05-21 10:18:49 +0000251static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
252 ISelEnv* env, IRExpr* e );
253static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
254 ISelEnv* env, IRExpr* e );
255
sewardj614b3fb2005-02-02 02:16:03 +0000256
257/*---------------------------------------------------------*/
258/*--- ISEL: Misc helpers ---*/
259/*---------------------------------------------------------*/
260
261static Bool sane_AMode ( AMD64AMode* am )
262{
263 switch (am->tag) {
264 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000265 return
266 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
267 && (hregIsVirtual(am->Aam.IR.reg)
florian79efdc62013-02-11 00:47:35 +0000268 || sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
sewardj614b3fb2005-02-02 02:16:03 +0000269 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000270 return
271 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
272 && hregIsVirtual(am->Aam.IRRS.base)
273 && hregClass(am->Aam.IRRS.index) == HRcInt64
274 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000275 default:
276 vpanic("sane_AMode: unknown amd64 amode tag");
277 }
278}
279
280
281/* Can the lower 32 bits be signedly widened to produce the whole
282 64-bit value? In other words, are the top 33 bits either all 0 or
283 all 1 ? */
284static Bool fitsIn32Bits ( ULong x )
285{
286 Long y0 = (Long)x;
287 Long y1 = y0;
288 y1 <<= 32;
289 y1 >>=/*s*/ 32;
290 return toBool(x == y1);
291}
292
sewardjeb17e492007-08-25 23:07:44 +0000293/* Is this a 64-bit zero expression? */
294
295static Bool isZeroU64 ( IRExpr* e )
296{
297 return e->tag == Iex_Const
298 && e->Iex.Const.con->tag == Ico_U64
299 && e->Iex.Const.con->Ico.U64 == 0ULL;
300}
301
302static Bool isZeroU32 ( IRExpr* e )
303{
304 return e->tag == Iex_Const
305 && e->Iex.Const.con->tag == Ico_U32
306 && e->Iex.Const.con->Ico.U32 == 0;
307}
sewardj8258a8c2005-02-02 03:11:24 +0000308
309/* Make a int reg-reg move. */
310
311static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
312{
313 vassert(hregClass(src) == HRcInt64);
314 vassert(hregClass(dst) == HRcInt64);
315 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
316}
317
sewardjc4530ae2012-05-21 10:18:49 +0000318/* Make a vector (128 bit) reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000319
sewardj0852a132005-02-21 08:28:46 +0000320static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
321{
322 vassert(hregClass(src) == HRcVec128);
323 vassert(hregClass(dst) == HRcVec128);
324 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
325}
326
327/* Advance/retreat %rsp by n. */
328
329static void add_to_rsp ( ISelEnv* env, Int n )
330{
331 vassert(n > 0 && n < 256 && (n%8) == 0);
332 addInstr(env,
333 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
334 hregAMD64_RSP()));
335}
336
sewardj18303862005-02-21 12:36:54 +0000337static void sub_from_rsp ( ISelEnv* env, Int n )
338{
339 vassert(n > 0 && n < 256 && (n%8) == 0);
340 addInstr(env,
341 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
342 hregAMD64_RSP()));
343}
344
ded403e792010-04-02 14:15:58 +0000345/* Push 64-bit constants on the stack. */
346static void push_uimm64( ISelEnv* env, ULong uimm64 )
347{
348 /* If uimm64 can be expressed as the sign extension of its
349 lower 32 bits, we can do it the easy way. */
350 Long simm64 = (Long)uimm64;
351 if ( simm64 == ((simm64 << 32) >> 32) ) {
352 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
353 } else {
354 HReg tmp = newVRegI(env);
355 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
356 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
357 }
358}
sewardj18303862005-02-21 12:36:54 +0000359
sewardj05b3b6a2005-02-04 01:44:33 +0000360
sewardj4d77a9c2007-08-25 23:21:08 +0000361/* Used only in doHelperCall. If possible, produce a single
362 instruction which computes 'e' into 'dst'. If not possible, return
363 NULL. */
364
365static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
366 HReg dst,
367 IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +0000368{
sewardj74142b82013-08-08 10:28:59 +0000369 /* Per comments in doHelperCall below, appearance of
florian90419562013-08-15 20:54:52 +0000370 Iex_VECRET implies ill-formed IR. */
371 vassert(e->tag != Iex_VECRET);
sewardj74142b82013-08-08 10:28:59 +0000372
373 /* In this case we give out a copy of the BaseBlock pointer. */
florian90419562013-08-15 20:54:52 +0000374 if (UNLIKELY(e->tag == Iex_BBPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000375 return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
376 }
377
sewardj4d77a9c2007-08-25 23:21:08 +0000378 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
379
380 if (e->tag == Iex_Const) {
381 vassert(e->Iex.Const.con->tag == Ico_U64);
382 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
383 return AMD64Instr_Alu64R(
384 Aalu_MOV,
385 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
386 dst
387 );
388 } else {
389 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
390 }
sewardj05b3b6a2005-02-04 01:44:33 +0000391 }
sewardj4d77a9c2007-08-25 23:21:08 +0000392
393 if (e->tag == Iex_RdTmp) {
394 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
395 return mk_iMOVsd_RR(src, dst);
396 }
397
398 if (e->tag == Iex_Get) {
399 vassert(e->Iex.Get.ty == Ity_I64);
400 return AMD64Instr_Alu64R(
401 Aalu_MOV,
402 AMD64RMI_Mem(
403 AMD64AMode_IR(e->Iex.Get.offset,
404 hregAMD64_RBP())),
405 dst);
406 }
407
408 if (e->tag == Iex_Unop
409 && e->Iex.Unop.op == Iop_32Uto64
410 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
411 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
sewardjca257bc2010-09-08 08:34:52 +0000412 return AMD64Instr_MovxLQ(False, src, dst);
sewardj4d77a9c2007-08-25 23:21:08 +0000413 }
414
415 if (0) { ppIRExpr(e); vex_printf("\n"); }
416
417 return NULL;
sewardj05b3b6a2005-02-04 01:44:33 +0000418}
419
420
sewardj74142b82013-08-08 10:28:59 +0000421/* Do a complete function call. |guard| is a Ity_Bit expression
sewardj05b3b6a2005-02-04 01:44:33 +0000422 indicating whether or not the call happens. If guard==NULL, the
sewardj74142b82013-08-08 10:28:59 +0000423 call is unconditional. |retloc| is set to indicate where the
424 return value is after the call. The caller (of this fn) must
425 generate code to add |stackAdjustAfterCall| to the stack pointer
426 after the call is done. */
sewardj05b3b6a2005-02-04 01:44:33 +0000427
428static
sewardj74142b82013-08-08 10:28:59 +0000429void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
430 /*OUT*/RetLoc* retloc,
431 ISelEnv* env,
432 IRExpr* guard,
433 IRCallee* cee, IRType retTy, IRExpr** args )
sewardj05b3b6a2005-02-04 01:44:33 +0000434{
435 AMD64CondCode cc;
436 HReg argregs[6];
437 HReg tmpregs[6];
sewardj4d77a9c2007-08-25 23:21:08 +0000438 AMD64Instr* fastinstrs[6];
sewardj74142b82013-08-08 10:28:59 +0000439 UInt n_args, i;
440
441 /* Set default returns. We'll update them later if needed. */
442 *stackAdjustAfterCall = 0;
443 *retloc = mk_RetLoc_INVALID();
444
445 /* These are used for cross-checking that IR-level constraints on
florian90419562013-08-15 20:54:52 +0000446 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
sewardj74142b82013-08-08 10:28:59 +0000447 UInt nVECRETs = 0;
448 UInt nBBPTRs = 0;
sewardj05b3b6a2005-02-04 01:44:33 +0000449
450 /* Marshal args for a call and do the call.
451
sewardj05b3b6a2005-02-04 01:44:33 +0000452 This function only deals with a tiny set of possibilities, which
453 cover all helpers in practice. The restrictions are that only
454 arguments in registers are supported, hence only 6x64 integer
455 bits in total can be passed. In fact the only supported arg
456 type is I64.
457
sewardj74142b82013-08-08 10:28:59 +0000458 The return type can be I{64,32,16,8} or V{128,256}. In the
459 latter two cases, it is expected that |args| will contain the
florian90419562013-08-15 20:54:52 +0000460 special node IRExpr_VECRET(), in which case this routine
sewardj74142b82013-08-08 10:28:59 +0000461 generates code to allocate space on the stack for the vector
462 return value. Since we are not passing any scalars on the
463 stack, it is enough to preallocate the return space before
464 marshalling any arguments, in this case.
465
florian90419562013-08-15 20:54:52 +0000466 |args| may also contain IRExpr_BBPTR(), in which case the
sewardj74142b82013-08-08 10:28:59 +0000467 value in %rbp is passed as the corresponding argument.
468
sewardj05b3b6a2005-02-04 01:44:33 +0000469 Generating code which is both efficient and correct when
470 parameters are to be passed in registers is difficult, for the
471 reasons elaborated in detail in comments attached to
472 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
473 of the method described in those comments.
474
475 The problem is split into two cases: the fast scheme and the
476 slow scheme. In the fast scheme, arguments are computed
477 directly into the target (real) registers. This is only safe
478 when we can be sure that computation of each argument will not
479 trash any real registers set by computation of any other
480 argument.
481
482 In the slow scheme, all args are first computed into vregs, and
483 once they are all done, they are moved to the relevant real
484 regs. This always gives correct code, but it also gives a bunch
485 of vreg-to-rreg moves which are usually redundant but are hard
486 for the register allocator to get rid of.
487
488 To decide which scheme to use, all argument expressions are
489 first examined. If they are all so simple that it is clear they
490 will be evaluated without use of any fixed registers, use the
491 fast scheme, else use the slow scheme. Note also that only
492 unconditional calls may use the fast scheme, since having to
493 compute a condition expression could itself trash real
sewardj74142b82013-08-08 10:28:59 +0000494 registers. Note that for simplicity, in the case where
florian90419562013-08-15 20:54:52 +0000495 IRExpr_VECRET() is present, we use the slow scheme. This is
sewardj74142b82013-08-08 10:28:59 +0000496 motivated by the desire to avoid any possible complexity
497 w.r.t. nested calls.
sewardj05b3b6a2005-02-04 01:44:33 +0000498
499 Note this requires being able to examine an expression and
500 determine whether or not evaluation of it might use a fixed
501 register. That requires knowledge of how the rest of this insn
502 selector works. Currently just the following 3 are regarded as
503 safe -- hopefully they cover the majority of arguments in
504 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
505 */
506
507 /* Note that the cee->regparms field is meaningless on AMD64 host
508 (since there is only one calling convention) and so we always
509 ignore it. */
sewardj05b3b6a2005-02-04 01:44:33 +0000510 n_args = 0;
511 for (i = 0; args[i]; i++)
512 n_args++;
513
sewardj74142b82013-08-08 10:28:59 +0000514 if (n_args > 6)
sewardj05b3b6a2005-02-04 01:44:33 +0000515 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
516
517 argregs[0] = hregAMD64_RDI();
518 argregs[1] = hregAMD64_RSI();
519 argregs[2] = hregAMD64_RDX();
520 argregs[3] = hregAMD64_RCX();
521 argregs[4] = hregAMD64_R8();
522 argregs[5] = hregAMD64_R9();
523
524 tmpregs[0] = tmpregs[1] = tmpregs[2] =
525 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
526
sewardj4d77a9c2007-08-25 23:21:08 +0000527 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
528 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
529
sewardj05b3b6a2005-02-04 01:44:33 +0000530 /* First decide which scheme (slow or fast) is to be used. First
531 assume the fast scheme, and select slow if any contraindications
532 (wow) appear. */
533
sewardj74142b82013-08-08 10:28:59 +0000534 /* We'll need space on the stack for the return value. Avoid
535 possible complications with nested calls by using the slow
536 scheme. */
537 if (retTy == Ity_V128 || retTy == Ity_V256)
538 goto slowscheme;
539
sewardj05b3b6a2005-02-04 01:44:33 +0000540 if (guard) {
541 if (guard->tag == Iex_Const
542 && guard->Iex.Const.con->tag == Ico_U1
543 && guard->Iex.Const.con->Ico.U1 == True) {
544 /* unconditional */
545 } else {
546 /* Not manifestly unconditional -- be conservative. */
sewardj4d77a9c2007-08-25 23:21:08 +0000547 goto slowscheme;
sewardj05b3b6a2005-02-04 01:44:33 +0000548 }
549 }
550
sewardj4d77a9c2007-08-25 23:21:08 +0000551 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
552 use the slow scheme. Because this is tentative, we can't call
553 addInstr (that is, commit to) any instructions until we're
554 handled all the arguments. So park the resulting instructions
555 in a buffer and emit that if we're successful. */
556
557 /* FAST SCHEME */
sewardj74142b82013-08-08 10:28:59 +0000558 /* In this loop, we process args that can be computed into the
559 destination (real) register with a single instruction, without
florian90419562013-08-15 20:54:52 +0000560 using any fixed regs. That also includes IRExpr_BBPTR(), but
561 not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can
562 never see IRExpr_VECRET() at this point, since the return-type
sewardj74142b82013-08-08 10:28:59 +0000563 check above should ensure all those cases use the slow scheme
564 instead. */
565 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000566 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000567 IRExpr* arg = args[i];
florian90419562013-08-15 20:54:52 +0000568 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
sewardj74142b82013-08-08 10:28:59 +0000569 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
570 }
571 fastinstrs[i]
572 = iselIntExpr_single_instruction( env, argregs[i], args[i] );
573 if (fastinstrs[i] == NULL)
sewardj4d77a9c2007-08-25 23:21:08 +0000574 goto slowscheme;
sewardj4d77a9c2007-08-25 23:21:08 +0000575 }
576
577 /* Looks like we're in luck. Emit the accumulated instructions and
578 move on to doing the call itself. */
sewardj74142b82013-08-08 10:28:59 +0000579 for (i = 0; i < n_args; i++)
sewardj4d77a9c2007-08-25 23:21:08 +0000580 addInstr(env, fastinstrs[i]);
581
582 /* Fast scheme only applies for unconditional calls. Hence: */
583 cc = Acc_ALWAYS;
584
585 goto handle_call;
586
587
588 /* SLOW SCHEME; move via temporaries */
589 slowscheme:
sewardj74142b82013-08-08 10:28:59 +0000590 {}
sewardjc4530ae2012-05-21 10:18:49 +0000591# if 0 /* debug only */
592 if (n_args > 0) {for (i = 0; args[i]; i++) {
593 ppIRExpr(args[i]); vex_printf(" "); }
594 vex_printf("\n");}
595# endif
sewardj4d77a9c2007-08-25 23:21:08 +0000596
sewardj74142b82013-08-08 10:28:59 +0000597 /* If we have a vector return type, allocate a place for it on the
598 stack and record its address. */
599 HReg r_vecRetAddr = INVALID_HREG;
600 if (retTy == Ity_V128) {
601 r_vecRetAddr = newVRegI(env);
602 sub_from_rsp(env, 16);
603 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
604 }
605 else if (retTy == Ity_V256) {
sewardj74142b82013-08-08 10:28:59 +0000606 r_vecRetAddr = newVRegI(env);
607 sub_from_rsp(env, 32);
608 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
sewardj4d77a9c2007-08-25 23:21:08 +0000609 }
610
sewardj74142b82013-08-08 10:28:59 +0000611 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000612 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000613 IRExpr* arg = args[i];
florian90419562013-08-15 20:54:52 +0000614 if (UNLIKELY(arg->tag == Iex_BBPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000615 tmpregs[i] = newVRegI(env);
616 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
617 nBBPTRs++;
618 }
florian90419562013-08-15 20:54:52 +0000619 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
sewardj74142b82013-08-08 10:28:59 +0000620 /* We stashed the address of the return slot earlier, so just
621 retrieve it now. */
622 vassert(!hregIsInvalid(r_vecRetAddr));
623 tmpregs[i] = r_vecRetAddr;
624 nVECRETs++;
625 }
626 else {
627 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
628 tmpregs[i] = iselIntExpr_R(env, args[i]);
629 }
sewardj4d77a9c2007-08-25 23:21:08 +0000630 }
631
632 /* Now we can compute the condition. We can't do it earlier
633 because the argument computations could trash the condition
634 codes. Be a bit clever to handle the common case where the
635 guard is 1:Bit. */
636 cc = Acc_ALWAYS;
637 if (guard) {
638 if (guard->tag == Iex_Const
639 && guard->Iex.Const.con->tag == Ico_U1
640 && guard->Iex.Const.con->Ico.U1 == True) {
641 /* unconditional -- do nothing */
642 } else {
643 cc = iselCondCode( env, guard );
sewardj05b3b6a2005-02-04 01:44:33 +0000644 }
645 }
646
sewardj4d77a9c2007-08-25 23:21:08 +0000647 /* Move the args to their final destinations. */
sewardj74142b82013-08-08 10:28:59 +0000648 for (i = 0; i < n_args; i++) {
sewardj4d77a9c2007-08-25 23:21:08 +0000649 /* None of these insns, including any spill code that might
650 be generated, may alter the condition codes. */
651 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
sewardj05b3b6a2005-02-04 01:44:33 +0000652 }
653
sewardj4d77a9c2007-08-25 23:21:08 +0000654
sewardj74142b82013-08-08 10:28:59 +0000655 /* Do final checks, set the return values, and generate the call
656 instruction proper. */
sewardj4d77a9c2007-08-25 23:21:08 +0000657 handle_call:
sewardj74142b82013-08-08 10:28:59 +0000658
659 if (retTy == Ity_V128 || retTy == Ity_V256) {
660 vassert(nVECRETs == 1);
661 } else {
662 vassert(nVECRETs == 0);
663 }
664
665 vassert(nBBPTRs == 0 || nBBPTRs == 1);
666
667 vassert(*stackAdjustAfterCall == 0);
668 vassert(is_RetLoc_INVALID(*retloc));
669 switch (retTy) {
670 case Ity_INVALID:
671 /* Function doesn't return a value. */
672 *retloc = mk_RetLoc_simple(RLPri_None);
673 break;
674 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
675 *retloc = mk_RetLoc_simple(RLPri_Int);
676 break;
677 case Ity_V128:
678 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
679 *stackAdjustAfterCall = 16;
680 break;
681 case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +0000682 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
683 *stackAdjustAfterCall = 32;
684 break;
685 default:
686 /* IR can denote other possible return types, but we don't
687 handle those here. */
688 vassert(0);
689 }
690
691 /* Finally, generate the call itself. This needs the *retloc value
692 set in the switch above, which is why it's at the end. */
693 addInstr(env,
694 AMD64Instr_Call(cc, Ptr_to_ULong(cee->addr), n_args, *retloc));
sewardj05b3b6a2005-02-04 01:44:33 +0000695}
696
697
sewardj8d965312005-02-25 02:48:47 +0000698/* Given a guest-state array descriptor, an index expression and a
699 bias, generate an AMD64AMode holding the relevant guest state
700 offset. */
701
702static
sewardjdd40fdf2006-12-24 02:20:24 +0000703AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
sewardj8d965312005-02-25 02:48:47 +0000704 IRExpr* off, Int bias )
705{
706 HReg tmp, roff;
707 Int elemSz = sizeofIRType(descr->elemTy);
708 Int nElems = descr->nElems;
709
710 /* Throw out any cases not generated by an amd64 front end. In
711 theory there might be a day where we need to handle them -- if
712 we ever run non-amd64-guest on amd64 host. */
713
714 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
715 vpanic("genGuestArrayOffset(amd64 host)");
716
717 /* Compute off into a reg, %off. Then return:
718
719 movq %off, %tmp
720 addq $bias, %tmp (if bias != 0)
721 andq %tmp, 7
722 ... base(%rbp, %tmp, shift) ...
723 */
724 tmp = newVRegI(env);
725 roff = iselIntExpr_R(env, off);
726 addInstr(env, mk_iMOVsd_RR(roff, tmp));
727 if (bias != 0) {
728 /* Make sure the bias is sane, in the sense that there are
729 no significant bits above bit 30 in it. */
730 vassert(-10000 < bias && bias < 10000);
731 addInstr(env,
732 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
733 }
734 addInstr(env,
735 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
736 vassert(elemSz == 1 || elemSz == 8);
737 return
738 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
739 elemSz==8 ? 3 : 0);
740}
741
sewardj1a01e652005-02-23 11:39:21 +0000742
743/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
744static
745void set_SSE_rounding_default ( ISelEnv* env )
746{
747 /* pushq $DEFAULT_MXCSR
748 ldmxcsr 0(%rsp)
749 addq $8, %rsp
750 */
751 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
752 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
753 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
754 add_to_rsp(env, 8);
755}
756
sewardj25a85812005-05-08 23:03:48 +0000757/* Mess with the FPU's rounding mode: set to the default rounding mode
758 (DEFAULT_FPUCW). */
759static
760void set_FPU_rounding_default ( ISelEnv* env )
761{
762 /* movq $DEFAULT_FPUCW, -8(%rsp)
763 fldcw -8(%esp)
764 */
765 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
766 addInstr(env, AMD64Instr_Alu64M(
767 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
768 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
769}
sewardj1a01e652005-02-23 11:39:21 +0000770
771
772/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
773 expression denoting a value in the range 0 .. 3, indicating a round
774 mode encoded as per type IRRoundingMode. Set the SSE machinery to
775 have the same rounding.
776*/
777static
778void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
779{
780 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
781 both rounding bits == 0. If that wasn't the case, we couldn't
782 create a new rounding field simply by ORing the new value into
783 place. */
784
785 /* movq $3, %reg
786 andq [[mode]], %reg -- shouldn't be needed; paranoia
787 shlq $13, %reg
788 orq $DEFAULT_MXCSR, %reg
789 pushq %reg
790 ldmxcsr 0(%esp)
791 addq $8, %rsp
792 */
793 HReg reg = newVRegI(env);
794 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
795 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
796 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
797 iselIntExpr_RMI(env, mode), reg));
sewardj501a3392005-05-11 15:37:50 +0000798 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
sewardj1a01e652005-02-23 11:39:21 +0000799 addInstr(env, AMD64Instr_Alu64R(
800 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
801 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
802 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
803 add_to_rsp(env, 8);
804}
805
806
sewardj25a85812005-05-08 23:03:48 +0000807/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
808 expression denoting a value in the range 0 .. 3, indicating a round
809 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
810 the same rounding.
811*/
812static
813void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
814{
815 HReg rrm = iselIntExpr_R(env, mode);
816 HReg rrm2 = newVRegI(env);
817 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
818
819 /* movq %rrm, %rrm2
820 andq $3, %rrm2 -- shouldn't be needed; paranoia
821 shlq $10, %rrm2
822 orq $DEFAULT_FPUCW, %rrm2
823 movq %rrm2, -8(%rsp)
824 fldcw -8(%esp)
825 */
826 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
827 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
sewardj501a3392005-05-11 15:37:50 +0000828 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
sewardj25a85812005-05-08 23:03:48 +0000829 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
830 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
831 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
832 AMD64RI_Reg(rrm2), m8_rsp));
833 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
834}
sewardj8d965312005-02-25 02:48:47 +0000835
836
sewardjac530442005-05-11 16:13:37 +0000837/* Generate all-zeroes into a new vector register.
838*/
839static HReg generate_zeroes_V128 ( ISelEnv* env )
840{
841 HReg dst = newVRegV(env);
842 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
843 return dst;
844}
845
846/* Generate all-ones into a new vector register.
847*/
848static HReg generate_ones_V128 ( ISelEnv* env )
849{
850 HReg dst = newVRegV(env);
851 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
852 return dst;
853}
854
855
sewardj09717342005-05-05 21:34:02 +0000856/* Generate !src into a new vector register. Amazing that there isn't
857 a less crappy way to do this.
sewardj8d965312005-02-25 02:48:47 +0000858*/
859static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
860{
sewardjac530442005-05-11 16:13:37 +0000861 HReg dst = generate_ones_V128(env);
sewardj8d965312005-02-25 02:48:47 +0000862 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
863 return dst;
864}
865
866
sewardjacfbd7d2010-08-17 22:52:08 +0000867/* Expand the given byte into a 64-bit word, by cloning each bit
868 8 times. */
869static ULong bitmask8_to_bytemask64 ( UShort w8 )
870{
871 vassert(w8 == (w8 & 0xFF));
872 ULong w64 = 0;
873 Int i;
874 for (i = 0; i < 8; i++) {
875 if (w8 & (1<<i))
876 w64 |= (0xFFULL << (8 * i));
877 }
878 return w64;
879}
880
881
sewardj8258a8c2005-02-02 03:11:24 +0000882/*---------------------------------------------------------*/
883/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
884/*---------------------------------------------------------*/
885
886/* Select insns for an integer-typed expression, and add them to the
887 code list. Return a reg holding the result. This reg will be a
888 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
889 want to modify it, ask for a new vreg, copy it in there, and modify
890 the copy. The register allocator will do its best to map both
891 vregs to the same real register, so the copies will often disappear
892 later in the game.
893
894 This should handle expressions of 64, 32, 16 and 8-bit type. All
895 results are returned in a 64-bit register. For 32-, 16- and 8-bit
sewardje13074c2012-11-08 10:57:08 +0000896 expressions, the upper 32/48/56 bits are arbitrary, so you should
sewardj8258a8c2005-02-02 03:11:24 +0000897 mask or sign extend partial values if necessary.
898*/
899
900static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
901{
902 HReg r = iselIntExpr_R_wrk(env, e);
903 /* sanity checks ... */
904# if 0
905 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
906# endif
907 vassert(hregClass(r) == HRcInt64);
908 vassert(hregIsVirtual(r));
909 return r;
910}
911
912/* DO NOT CALL THIS DIRECTLY ! */
913static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
914{
sewardje7905662005-05-09 18:15:21 +0000915 /* Used for unary/binary SIMD64 ops. */
916 HWord fn = 0;
sewardj8711f662005-05-09 17:52:56 +0000917 Bool second_is_UInt;
sewardje7905662005-05-09 18:15:21 +0000918
sewardj05b3b6a2005-02-04 01:44:33 +0000919 MatchInfo mi;
sewardj176ad2f2005-04-27 11:55:08 +0000920 DECLARE_PATTERN(p_1Uto8_64to1);
sewardjca257bc2010-09-08 08:34:52 +0000921 DECLARE_PATTERN(p_LDle8_then_8Uto64);
922 DECLARE_PATTERN(p_LDle16_then_16Uto64);
sewardj8258a8c2005-02-02 03:11:24 +0000923
924 IRType ty = typeOfIRExpr(env->type_env,e);
sewardj13f12a52011-05-03 07:51:49 +0000925 switch (ty) {
926 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
927 default: vassert(0);
928 }
sewardj8258a8c2005-02-02 03:11:24 +0000929
930 switch (e->tag) {
931
932 /* --------- TEMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +0000933 case Iex_RdTmp: {
934 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj8258a8c2005-02-02 03:11:24 +0000935 }
936
937 /* --------- LOAD --------- */
sewardjaf1ceca2005-06-30 23:31:27 +0000938 case Iex_Load: {
sewardj8258a8c2005-02-02 03:11:24 +0000939 HReg dst = newVRegI(env);
sewardjaf1ceca2005-06-30 23:31:27 +0000940 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
941
sewardje9d8a262009-07-01 08:06:34 +0000942 /* We can't handle big-endian loads, nor load-linked. */
sewardjaf1ceca2005-06-30 23:31:27 +0000943 if (e->Iex.Load.end != Iend_LE)
944 goto irreducible;
945
sewardjf67eadf2005-02-03 03:53:52 +0000946 if (ty == Ity_I64) {
947 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
948 AMD64RMI_Mem(amode), dst) );
949 return dst;
950 }
sewardj8258a8c2005-02-02 03:11:24 +0000951 if (ty == Ity_I32) {
952 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
953 return dst;
954 }
sewardj05b3b6a2005-02-04 01:44:33 +0000955 if (ty == Ity_I16) {
956 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
957 return dst;
958 }
sewardj7f039c42005-02-04 21:13:55 +0000959 if (ty == Ity_I8) {
960 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
961 return dst;
962 }
sewardj8258a8c2005-02-02 03:11:24 +0000963 break;
964 }
965
966 /* --------- BINARY OP --------- */
967 case Iex_Binop: {
968 AMD64AluOp aluOp;
969 AMD64ShiftOp shOp;
sewardj8711f662005-05-09 17:52:56 +0000970
sewardjeb17e492007-08-25 23:07:44 +0000971 /* Pattern: Sub64(0,x) */
972 /* and: Sub32(0,x) */
973 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
974 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
975 HReg dst = newVRegI(env);
976 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
977 addInstr(env, mk_iMOVsd_RR(reg,dst));
978 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
979 return dst;
980 }
981
sewardj8258a8c2005-02-02 03:11:24 +0000982 /* Is it an addition or logical style op? */
983 switch (e->Iex.Binop.op) {
984 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
985 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000986 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
987 aluOp = Aalu_SUB; break;
988 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
989 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000990 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000991 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000992 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
993 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000994 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000995 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000996 default:
997 aluOp = Aalu_INVALID; break;
998 }
999 /* For commutative ops we assume any literal
1000 values are on the second operand. */
1001 if (aluOp != Aalu_INVALID) {
1002 HReg dst = newVRegI(env);
1003 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1004 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1005 addInstr(env, mk_iMOVsd_RR(reg,dst));
1006 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
1007 return dst;
1008 }
1009
1010 /* Perhaps a shift op? */
1011 switch (e->Iex.Binop.op) {
1012 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
1013 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +00001014 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
1015 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +00001016 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
1017 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +00001018 default:
1019 shOp = Ash_INVALID; break;
1020 }
1021 if (shOp != Ash_INVALID) {
1022 HReg dst = newVRegI(env);
1023
1024 /* regL = the value to be shifted */
1025 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1026 addInstr(env, mk_iMOVsd_RR(regL,dst));
1027
1028 /* Do any necessary widening for 32/16/8 bit operands */
1029 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +00001030 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
1031 break;
sewardj85520e42005-02-19 15:22:38 +00001032 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +00001033 break;
sewardj85520e42005-02-19 15:22:38 +00001034 case Iop_Shr8:
1035 addInstr(env, AMD64Instr_Alu64R(
1036 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
1037 break;
1038 case Iop_Shr16:
1039 addInstr(env, AMD64Instr_Alu64R(
1040 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
1041 break;
sewardjb095fba2005-02-13 14:13:04 +00001042 case Iop_Shr32:
sewardjca257bc2010-09-08 08:34:52 +00001043 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
sewardjb095fba2005-02-13 14:13:04 +00001044 break;
sewardje83d9b22005-08-13 23:58:34 +00001045 case Iop_Sar8:
1046 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1047 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
1048 break;
1049 case Iop_Sar16:
1050 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
1051 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
1052 break;
sewardj05b3b6a2005-02-04 01:44:33 +00001053 case Iop_Sar32:
sewardjca257bc2010-09-08 08:34:52 +00001054 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
sewardj05b3b6a2005-02-04 01:44:33 +00001055 break;
1056 default:
sewardj909c06d2005-02-19 22:47:41 +00001057 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +00001058 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +00001059 }
1060
1061 /* Now consider the shift amount. If it's a literal, we
1062 can do a much better job than the general case. */
1063 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1064 /* assert that the IR is well-typed */
1065 Int nshift;
1066 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1067 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1068 vassert(nshift >= 0);
1069 if (nshift > 0)
1070 /* Can't allow nshift==0 since that means %cl */
sewardj501a3392005-05-11 15:37:50 +00001071 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001072 } else {
1073 /* General case; we have to force the amount into %cl. */
1074 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1075 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
sewardj501a3392005-05-11 15:37:50 +00001076 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001077 }
1078 return dst;
1079 }
1080
sewardj8711f662005-05-09 17:52:56 +00001081 /* Deal with 64-bit SIMD binary ops */
1082 second_is_UInt = False;
1083 switch (e->Iex.Binop.op) {
1084 case Iop_Add8x8:
1085 fn = (HWord)h_generic_calc_Add8x8; break;
1086 case Iop_Add16x4:
1087 fn = (HWord)h_generic_calc_Add16x4; break;
1088 case Iop_Add32x2:
1089 fn = (HWord)h_generic_calc_Add32x2; break;
sewardja7ba8c42005-05-10 20:08:34 +00001090
1091 case Iop_Avg8Ux8:
1092 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1093 case Iop_Avg16Ux4:
1094 fn = (HWord)h_generic_calc_Avg16Ux4; break;
sewardj8711f662005-05-09 17:52:56 +00001095
1096 case Iop_CmpEQ8x8:
1097 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1098 case Iop_CmpEQ16x4:
1099 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1100 case Iop_CmpEQ32x2:
1101 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1102
1103 case Iop_CmpGT8Sx8:
1104 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1105 case Iop_CmpGT16Sx4:
1106 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1107 case Iop_CmpGT32Sx2:
1108 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1109
1110 case Iop_InterleaveHI8x8:
1111 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1112 case Iop_InterleaveLO8x8:
1113 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1114 case Iop_InterleaveHI16x4:
1115 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1116 case Iop_InterleaveLO16x4:
1117 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1118 case Iop_InterleaveHI32x2:
1119 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1120 case Iop_InterleaveLO32x2:
1121 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
sewardjd166e282008-02-06 11:42:45 +00001122 case Iop_CatOddLanes16x4:
1123 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1124 case Iop_CatEvenLanes16x4:
1125 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1126 case Iop_Perm8x8:
1127 fn = (HWord)h_generic_calc_Perm8x8; break;
sewardj8711f662005-05-09 17:52:56 +00001128
sewardja7ba8c42005-05-10 20:08:34 +00001129 case Iop_Max8Ux8:
1130 fn = (HWord)h_generic_calc_Max8Ux8; break;
1131 case Iop_Max16Sx4:
1132 fn = (HWord)h_generic_calc_Max16Sx4; break;
1133 case Iop_Min8Ux8:
1134 fn = (HWord)h_generic_calc_Min8Ux8; break;
1135 case Iop_Min16Sx4:
1136 fn = (HWord)h_generic_calc_Min16Sx4; break;
sewardj8711f662005-05-09 17:52:56 +00001137
1138 case Iop_Mul16x4:
1139 fn = (HWord)h_generic_calc_Mul16x4; break;
sewardjd166e282008-02-06 11:42:45 +00001140 case Iop_Mul32x2:
1141 fn = (HWord)h_generic_calc_Mul32x2; break;
sewardj8711f662005-05-09 17:52:56 +00001142 case Iop_MulHi16Sx4:
1143 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
sewardja7ba8c42005-05-10 20:08:34 +00001144 case Iop_MulHi16Ux4:
1145 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1146
sewardj8711f662005-05-09 17:52:56 +00001147 case Iop_QAdd8Sx8:
1148 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1149 case Iop_QAdd16Sx4:
1150 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1151 case Iop_QAdd8Ux8:
1152 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1153 case Iop_QAdd16Ux4:
1154 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1155
sewardj5f438dd2011-06-16 11:36:23 +00001156 case Iop_QNarrowBin32Sto16Sx4:
1157 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
1158 case Iop_QNarrowBin16Sto8Sx8:
1159 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
1160 case Iop_QNarrowBin16Sto8Ux8:
1161 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
sewardjad2c9ea2011-10-22 09:32:16 +00001162 case Iop_NarrowBin16to8x8:
1163 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
1164 case Iop_NarrowBin32to16x4:
1165 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
sewardj8711f662005-05-09 17:52:56 +00001166
1167 case Iop_QSub8Sx8:
1168 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1169 case Iop_QSub16Sx4:
1170 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1171 case Iop_QSub8Ux8:
1172 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1173 case Iop_QSub16Ux4:
1174 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1175
1176 case Iop_Sub8x8:
1177 fn = (HWord)h_generic_calc_Sub8x8; break;
1178 case Iop_Sub16x4:
1179 fn = (HWord)h_generic_calc_Sub16x4; break;
1180 case Iop_Sub32x2:
1181 fn = (HWord)h_generic_calc_Sub32x2; break;
1182
1183 case Iop_ShlN32x2:
1184 fn = (HWord)h_generic_calc_ShlN32x2;
1185 second_is_UInt = True;
1186 break;
1187 case Iop_ShlN16x4:
1188 fn = (HWord)h_generic_calc_ShlN16x4;
1189 second_is_UInt = True;
1190 break;
sewardjd166e282008-02-06 11:42:45 +00001191 case Iop_ShlN8x8:
1192 fn = (HWord)h_generic_calc_ShlN8x8;
1193 second_is_UInt = True;
1194 break;
sewardj8711f662005-05-09 17:52:56 +00001195 case Iop_ShrN32x2:
1196 fn = (HWord)h_generic_calc_ShrN32x2;
1197 second_is_UInt = True;
1198 break;
1199 case Iop_ShrN16x4:
1200 fn = (HWord)h_generic_calc_ShrN16x4;
1201 second_is_UInt = True;
1202 break;
1203 case Iop_SarN32x2:
1204 fn = (HWord)h_generic_calc_SarN32x2;
1205 second_is_UInt = True;
1206 break;
1207 case Iop_SarN16x4:
1208 fn = (HWord)h_generic_calc_SarN16x4;
1209 second_is_UInt = True;
1210 break;
sewardj02f79f12007-09-01 18:59:53 +00001211 case Iop_SarN8x8:
1212 fn = (HWord)h_generic_calc_SarN8x8;
1213 second_is_UInt = True;
1214 break;
sewardj8711f662005-05-09 17:52:56 +00001215
1216 default:
1217 fn = (HWord)0; break;
1218 }
1219 if (fn != (HWord)0) {
1220 /* Note: the following assumes all helpers are of signature
1221 ULong fn ( ULong, ULong ), and they are
1222 not marked as regparm functions.
1223 */
1224 HReg dst = newVRegI(env);
1225 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1226 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1227 if (second_is_UInt)
sewardjca257bc2010-09-08 08:34:52 +00001228 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
sewardj8711f662005-05-09 17:52:56 +00001229 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1230 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
sewardj74142b82013-08-08 10:28:59 +00001231 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
1232 mk_RetLoc_simple(RLPri_Int) ));
sewardj8711f662005-05-09 17:52:56 +00001233 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1234 return dst;
1235 }
1236
sewardj7de0d3c2005-02-13 02:26:41 +00001237 /* Handle misc other ops. */
1238
sewardj478646f2008-05-01 20:13:04 +00001239 if (e->Iex.Binop.op == Iop_Max32U) {
sewardj9cc2bbf2011-06-05 17:56:03 +00001240 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1241 HReg dst = newVRegI(env);
1242 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1243 addInstr(env, mk_iMOVsd_RR(src1, dst));
1244 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
sewardj478646f2008-05-01 20:13:04 +00001245 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
1246 return dst;
1247 }
1248
sewardj7de0d3c2005-02-13 02:26:41 +00001249 if (e->Iex.Binop.op == Iop_DivModS64to32
1250 || e->Iex.Binop.op == Iop_DivModU64to32) {
1251 /* 64 x 32 -> (32(rem),32(div)) division */
1252 /* Get the 64-bit operand into edx:eax, and the other into
1253 any old R/M. */
1254 HReg rax = hregAMD64_RAX();
1255 HReg rdx = hregAMD64_RDX();
1256 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00001257 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +00001258 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +00001259 /* Compute the left operand into a reg, and then
1260 put the top half in edx and the bottom in eax. */
1261 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +00001262 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1263 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj501a3392005-05-11 15:37:50 +00001264 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001265 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardjca257bc2010-09-08 08:34:52 +00001266 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
1267 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
sewardj501a3392005-05-11 15:37:50 +00001268 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001269 addInstr(env, mk_iMOVsd_RR(rax, dst));
1270 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1271 return dst;
1272 }
1273
1274 if (e->Iex.Binop.op == Iop_32HLto64) {
1275 HReg hi32 = newVRegI(env);
1276 HReg lo32 = newVRegI(env);
1277 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1278 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1279 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1280 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
sewardj501a3392005-05-11 15:37:50 +00001281 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
sewardjca257bc2010-09-08 08:34:52 +00001282 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
sewardj7de0d3c2005-02-13 02:26:41 +00001283 addInstr(env, AMD64Instr_Alu64R(
1284 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1285 return hi32;
1286 }
1287
sewardj85520e42005-02-19 15:22:38 +00001288 if (e->Iex.Binop.op == Iop_16HLto32) {
1289 HReg hi16 = newVRegI(env);
1290 HReg lo16 = newVRegI(env);
1291 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1292 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1293 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1294 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
sewardj501a3392005-05-11 15:37:50 +00001295 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
sewardj85520e42005-02-19 15:22:38 +00001296 addInstr(env, AMD64Instr_Alu64R(
1297 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1298 addInstr(env, AMD64Instr_Alu64R(
1299 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1300 return hi16;
1301 }
sewardj7de0d3c2005-02-13 02:26:41 +00001302
sewardja64f8ad2005-04-24 00:26:37 +00001303 if (e->Iex.Binop.op == Iop_8HLto16) {
1304 HReg hi8 = newVRegI(env);
1305 HReg lo8 = newVRegI(env);
1306 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1307 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1308 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1309 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
sewardj501a3392005-05-11 15:37:50 +00001310 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
sewardja64f8ad2005-04-24 00:26:37 +00001311 addInstr(env, AMD64Instr_Alu64R(
1312 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1313 addInstr(env, AMD64Instr_Alu64R(
1314 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1315 return hi8;
1316 }
sewardj85520e42005-02-19 15:22:38 +00001317
1318 if (e->Iex.Binop.op == Iop_MullS32
1319 || e->Iex.Binop.op == Iop_MullS16
1320 || e->Iex.Binop.op == Iop_MullS8
1321 || e->Iex.Binop.op == Iop_MullU32
1322 || e->Iex.Binop.op == Iop_MullU16
1323 || e->Iex.Binop.op == Iop_MullU8) {
1324 HReg a32 = newVRegI(env);
1325 HReg b32 = newVRegI(env);
1326 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1327 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1328 Int shift = 0;
1329 AMD64ShiftOp shr_op = Ash_SHR;
1330 switch (e->Iex.Binop.op) {
1331 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1332 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1333 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1334 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1335 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1336 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1337 default: vassert(0);
1338 }
1339
1340 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1341 addInstr(env, mk_iMOVsd_RR(b32s, b32));
sewardj501a3392005-05-11 15:37:50 +00001342 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1343 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1344 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1345 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
sewardj85520e42005-02-19 15:22:38 +00001346 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1347 return b32;
1348 }
1349
sewardj18303862005-02-21 12:36:54 +00001350 if (e->Iex.Binop.op == Iop_CmpF64) {
1351 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1352 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1353 HReg dst = newVRegI(env);
1354 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1355 /* Mask out irrelevant parts of the result so as to conform
1356 to the CmpF64 definition. */
1357 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1358 return dst;
1359 }
1360
sewardj6c299f32009-12-31 18:00:12 +00001361 if (e->Iex.Binop.op == Iop_F64toI32S
1362 || e->Iex.Binop.op == Iop_F64toI64S) {
1363 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001364 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1365 HReg dst = newVRegI(env);
1366 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001367 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001368 set_SSE_rounding_default(env);
1369 return dst;
1370 }
1371
sewardj8258a8c2005-02-02 03:11:24 +00001372 break;
1373 }
1374
sewardjf67eadf2005-02-03 03:53:52 +00001375 /* --------- UNARY OP --------- */
1376 case Iex_Unop: {
sewardj7f039c42005-02-04 21:13:55 +00001377
sewardj176ad2f2005-04-27 11:55:08 +00001378 /* 1Uto8(64to1(expr64)) */
sewardjca257bc2010-09-08 08:34:52 +00001379 {
1380 DEFINE_PATTERN( p_1Uto8_64to1,
1381 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1382 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
1383 IRExpr* expr64 = mi.bindee[0];
1384 HReg dst = newVRegI(env);
1385 HReg src = iselIntExpr_R(env, expr64);
1386 addInstr(env, mk_iMOVsd_RR(src,dst) );
1387 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1388 AMD64RMI_Imm(1), dst));
sewardjf67eadf2005-02-03 03:53:52 +00001389 return dst;
1390 }
sewardjca257bc2010-09-08 08:34:52 +00001391 }
1392
1393 /* 8Uto64(LDle(expr64)) */
1394 {
1395 DEFINE_PATTERN(p_LDle8_then_8Uto64,
1396 unop(Iop_8Uto64,
1397 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1398 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
1399 HReg dst = newVRegI(env);
1400 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1401 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
1402 return dst;
1403 }
1404 }
1405
1406 /* 16Uto64(LDle(expr64)) */
1407 {
1408 DEFINE_PATTERN(p_LDle16_then_16Uto64,
1409 unop(Iop_16Uto64,
1410 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1411 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
1412 HReg dst = newVRegI(env);
1413 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1414 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
1415 return dst;
1416 }
1417 }
1418
sewardj9cc2bbf2011-06-05 17:56:03 +00001419 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
1420 Use 32 bit arithmetic and let the default zero-extend rule
1421 do the 32Uto64 for free. */
1422 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
1423 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
1424 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
1425 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
1426 AMD64AluOp aluOp = Aalu_INVALID;
1427 switch (opi) {
1428 case Iop_Add32: aluOp = Aalu_ADD; break;
1429 case Iop_Sub32: aluOp = Aalu_SUB; break;
1430 case Iop_And32: aluOp = Aalu_AND; break;
1431 case Iop_Or32: aluOp = Aalu_OR; break;
1432 case Iop_Xor32: aluOp = Aalu_XOR; break;
1433 default: break;
1434 }
1435 if (aluOp != Aalu_INVALID) {
1436 /* For commutative ops we assume any literal values are on
1437 the second operand. */
1438 HReg dst = newVRegI(env);
1439 HReg reg = iselIntExpr_R(env, argL);
1440 AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
1441 addInstr(env, mk_iMOVsd_RR(reg,dst));
1442 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
1443 return dst;
1444 }
1445 /* just fall through to normal handling for Iop_32Uto64 */
1446 }
1447
1448 /* Fallback cases */
sewardjca257bc2010-09-08 08:34:52 +00001449 switch (e->Iex.Unop.op) {
1450 case Iop_32Uto64:
sewardj05b3b6a2005-02-04 01:44:33 +00001451 case Iop_32Sto64: {
1452 HReg dst = newVRegI(env);
1453 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjca257bc2010-09-08 08:34:52 +00001454 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
1455 src, dst) );
sewardj05b3b6a2005-02-04 01:44:33 +00001456 return dst;
1457 }
sewardj9b967672005-02-08 11:13:09 +00001458 case Iop_128HIto64: {
1459 HReg rHi, rLo;
1460 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1461 return rHi; /* and abandon rLo */
1462 }
1463 case Iop_128to64: {
1464 HReg rHi, rLo;
1465 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1466 return rLo; /* and abandon rHi */
1467 }
sewardj85520e42005-02-19 15:22:38 +00001468 case Iop_8Uto16:
sewardjec93f982005-06-21 13:51:18 +00001469 case Iop_8Uto32:
sewardj176ad2f2005-04-27 11:55:08 +00001470 case Iop_8Uto64:
1471 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001472 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001473 HReg dst = newVRegI(env);
1474 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001475 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1476 || e->Iex.Unop.op==Iop_16Uto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001477 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001478 addInstr(env, mk_iMOVsd_RR(src,dst) );
1479 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1480 AMD64RMI_Imm(mask), dst));
1481 return dst;
1482 }
sewardj85520e42005-02-19 15:22:38 +00001483 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001484 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001485 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001486 case Iop_16Sto32:
1487 case Iop_16Sto64: {
1488 HReg dst = newVRegI(env);
1489 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001490 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1491 || e->Iex.Unop.op==Iop_16Sto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001492 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001493 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001494 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1495 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj486074e2005-02-08 20:10:04 +00001496 return dst;
1497 }
sewardj85520e42005-02-19 15:22:38 +00001498 case Iop_Not8:
1499 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001500 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001501 case Iop_Not64: {
1502 HReg dst = newVRegI(env);
1503 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1504 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001505 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
sewardjd0a12df2005-02-10 02:07:43 +00001506 return dst;
1507 }
de5a70f5c2010-04-01 23:08:59 +00001508 case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001509 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001510 case Iop_64HIto32: {
1511 HReg dst = newVRegI(env);
1512 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1513 Int shift = 0;
1514 switch (e->Iex.Unop.op) {
sewardj9ba870d2010-04-02 11:29:23 +00001515 case Iop_16HIto8: shift = 8; break;
sewardj85520e42005-02-19 15:22:38 +00001516 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001517 case Iop_64HIto32: shift = 32; break;
1518 default: vassert(0);
1519 }
1520 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001521 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
sewardj7de0d3c2005-02-13 02:26:41 +00001522 return dst;
1523 }
sewardj176ad2f2005-04-27 11:55:08 +00001524 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001525 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001526 case Iop_1Uto8: {
1527 HReg dst = newVRegI(env);
1528 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1529 addInstr(env, AMD64Instr_Set64(cond,dst));
1530 return dst;
1531 }
sewardja64f8ad2005-04-24 00:26:37 +00001532 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001533 case Iop_1Sto16:
1534 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001535 case Iop_1Sto64: {
1536 /* could do better than this, but for now ... */
1537 HReg dst = newVRegI(env);
1538 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1539 addInstr(env, AMD64Instr_Set64(cond,dst));
sewardj501a3392005-05-11 15:37:50 +00001540 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1541 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
sewardj42322b52005-04-20 22:57:11 +00001542 return dst;
1543 }
sewardjf53b7352005-04-06 20:01:56 +00001544 case Iop_Ctz64: {
1545 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1546 HReg dst = newVRegI(env);
1547 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1548 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1549 return dst;
1550 }
sewardj537cab02005-04-07 02:03:52 +00001551 case Iop_Clz64: {
1552 /* Count leading zeroes. Do 'bsrq' to establish the index
1553 of the highest set bit, and subtract that value from
1554 63. */
1555 HReg tmp = newVRegI(env);
1556 HReg dst = newVRegI(env);
1557 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1558 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1559 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1560 AMD64RMI_Imm(63), dst));
1561 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1562 AMD64RMI_Reg(tmp), dst));
1563 return dst;
1564 }
sewardjeb17e492007-08-25 23:07:44 +00001565
1566 case Iop_CmpwNEZ64: {
sewardj176ad2f2005-04-27 11:55:08 +00001567 HReg dst = newVRegI(env);
sewardjeb17e492007-08-25 23:07:44 +00001568 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1569 addInstr(env, mk_iMOVsd_RR(src,dst));
sewardj501a3392005-05-11 15:37:50 +00001570 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
sewardjeb17e492007-08-25 23:07:44 +00001571 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1572 AMD64RMI_Reg(src), dst));
1573 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1574 return dst;
1575 }
1576
1577 case Iop_CmpwNEZ32: {
1578 HReg src = newVRegI(env);
1579 HReg dst = newVRegI(env);
1580 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1581 addInstr(env, mk_iMOVsd_RR(pre,src));
sewardjca257bc2010-09-08 08:34:52 +00001582 addInstr(env, AMD64Instr_MovxLQ(False, src, src));
sewardjeb17e492007-08-25 23:07:44 +00001583 addInstr(env, mk_iMOVsd_RR(src,dst));
1584 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1585 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1586 AMD64RMI_Reg(src), dst));
1587 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1588 return dst;
1589 }
1590
1591 case Iop_Left8:
1592 case Iop_Left16:
1593 case Iop_Left32:
1594 case Iop_Left64: {
1595 HReg dst = newVRegI(env);
1596 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1597 addInstr(env, mk_iMOVsd_RR(src, dst));
1598 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1599 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
sewardj176ad2f2005-04-27 11:55:08 +00001600 return dst;
1601 }
sewardj537cab02005-04-07 02:03:52 +00001602
sewardj478fe702005-04-23 01:15:47 +00001603 case Iop_V128to32: {
1604 HReg dst = newVRegI(env);
1605 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1606 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1607 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1608 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1609 return dst;
1610 }
sewardj1a01e652005-02-23 11:39:21 +00001611
1612 /* V128{HI}to64 */
1613 case Iop_V128HIto64:
1614 case Iop_V128to64: {
sewardj1a01e652005-02-23 11:39:21 +00001615 HReg dst = newVRegI(env);
sewardjc4530ae2012-05-21 10:18:49 +00001616 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16;
1617 HReg rsp = hregAMD64_RSP();
sewardj1a01e652005-02-23 11:39:21 +00001618 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
sewardjc4530ae2012-05-21 10:18:49 +00001619 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1620 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1621 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1622 16, vec, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00001623 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
sewardjc4530ae2012-05-21 10:18:49 +00001624 AMD64RMI_Mem(off_rsp), dst ));
1625 return dst;
1626 }
1627
1628 case Iop_V256to64_0: case Iop_V256to64_1:
1629 case Iop_V256to64_2: case Iop_V256to64_3: {
1630 HReg vHi, vLo, vec;
1631 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
1632 /* Do the first part of the selection by deciding which of
1633 the 128 bit registers do look at, and second part using
1634 the same scheme as for V128{HI}to64 above. */
1635 Int off = 0;
1636 switch (e->Iex.Unop.op) {
1637 case Iop_V256to64_0: vec = vLo; off = -16; break;
1638 case Iop_V256to64_1: vec = vLo; off = -8; break;
1639 case Iop_V256to64_2: vec = vHi; off = -16; break;
1640 case Iop_V256to64_3: vec = vHi; off = -8; break;
1641 default: vassert(0);
1642 }
1643 HReg dst = newVRegI(env);
1644 HReg rsp = hregAMD64_RSP();
1645 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1646 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1647 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1648 16, vec, m16_rsp));
1649 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1650 AMD64RMI_Mem(off_rsp), dst ));
sewardj1a01e652005-02-23 11:39:21 +00001651 return dst;
1652 }
1653
sewardj924215b2005-03-26 21:50:31 +00001654 /* ReinterpF64asI64(e) */
1655 /* Given an IEEE754 double, produce an I64 with the same bit
1656 pattern. */
1657 case Iop_ReinterpF64asI64: {
1658 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1659 HReg dst = newVRegI(env);
1660 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1661 /* paranoia */
1662 set_SSE_rounding_default(env);
1663 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1664 addInstr(env, AMD64Instr_Alu64R(
1665 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1666 return dst;
1667 }
1668
sewardj79501112008-07-29 09:48:26 +00001669 /* ReinterpF32asI32(e) */
1670 /* Given an IEEE754 single, produce an I64 with the same bit
1671 pattern in the lower half. */
1672 case Iop_ReinterpF32asI32: {
1673 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1674 HReg dst = newVRegI(env);
1675 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1676 /* paranoia */
1677 set_SSE_rounding_default(env);
1678 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1679 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1680 return dst;
1681 }
1682
sewardj85520e42005-02-19 15:22:38 +00001683 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001684 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001685 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001686 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001687 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001688 case Iop_64to32:
1689 /* These are no-ops. */
1690 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001691
sewardje13074c2012-11-08 10:57:08 +00001692 case Iop_GetMSBs8x8: {
1693 /* Note: the following assumes the helper is of
1694 signature
1695 UInt fn ( ULong ), and is not a regparm fn.
1696 */
1697 HReg dst = newVRegI(env);
1698 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1699 fn = (HWord)h_generic_calc_GetMSBs8x8;
1700 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardjcfe046e2013-01-17 14:23:53 +00001701 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001702 1, mk_RetLoc_simple(RLPri_Int) ));
sewardje13074c2012-11-08 10:57:08 +00001703 /* MovxLQ is not exactly the right thing here. We just
1704 need to get the bottom 8 bits of RAX into dst, and zero
1705 out everything else. Assuming that the helper returns
1706 a UInt with the top 24 bits zeroed out, it'll do,
1707 though. */
1708 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1709 return dst;
1710 }
1711
sewardj78a20592012-12-13 18:29:56 +00001712 case Iop_GetMSBs8x16: {
1713 /* Note: the following assumes the helper is of signature
1714 UInt fn ( ULong w64hi, ULong w64Lo ),
1715 and is not a regparm fn. */
1716 HReg dst = newVRegI(env);
1717 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1718 HReg rsp = hregAMD64_RSP();
1719 fn = (HWord)h_generic_calc_GetMSBs8x16;
1720 AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp);
1721 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1722 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1723 16, vec, m16_rsp));
1724 /* hi 64 bits into RDI -- the first arg */
1725 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1726 AMD64RMI_Mem(m8_rsp),
1727 hregAMD64_RDI() )); /* 1st arg */
1728 /* lo 64 bits into RSI -- the 2nd arg */
1729 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1730 AMD64RMI_Mem(m16_rsp),
1731 hregAMD64_RSI() )); /* 2nd arg */
sewardjcfe046e2013-01-17 14:23:53 +00001732 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001733 2, mk_RetLoc_simple(RLPri_Int) ));
sewardj78a20592012-12-13 18:29:56 +00001734 /* MovxLQ is not exactly the right thing here. We just
sewardj9213c612012-12-19 08:39:11 +00001735 need to get the bottom 16 bits of RAX into dst, and zero
sewardj78a20592012-12-13 18:29:56 +00001736 out everything else. Assuming that the helper returns
sewardj9213c612012-12-19 08:39:11 +00001737 a UInt with the top 16 bits zeroed out, it'll do,
sewardj78a20592012-12-13 18:29:56 +00001738 though. */
1739 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1740 return dst;
1741 }
1742
sewardjf67eadf2005-02-03 03:53:52 +00001743 default:
1744 break;
1745 }
sewardje7905662005-05-09 18:15:21 +00001746
1747 /* Deal with unary 64-bit SIMD ops. */
1748 switch (e->Iex.Unop.op) {
1749 case Iop_CmpNEZ32x2:
1750 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1751 case Iop_CmpNEZ16x4:
1752 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1753 case Iop_CmpNEZ8x8:
1754 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1755 default:
1756 fn = (HWord)0; break;
1757 }
1758 if (fn != (HWord)0) {
1759 /* Note: the following assumes all helpers are of
1760 signature
1761 ULong fn ( ULong ), and they are
1762 not marked as regparm functions.
1763 */
1764 HReg dst = newVRegI(env);
1765 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1766 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardj74142b82013-08-08 10:28:59 +00001767 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
1768 mk_RetLoc_simple(RLPri_Int) ));
sewardje7905662005-05-09 18:15:21 +00001769 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1770 return dst;
1771 }
1772
sewardjf67eadf2005-02-03 03:53:52 +00001773 break;
1774 }
sewardj8258a8c2005-02-02 03:11:24 +00001775
1776 /* --------- GET --------- */
1777 case Iex_Get: {
1778 if (ty == Ity_I64) {
1779 HReg dst = newVRegI(env);
1780 addInstr(env, AMD64Instr_Alu64R(
1781 Aalu_MOV,
1782 AMD64RMI_Mem(
1783 AMD64AMode_IR(e->Iex.Get.offset,
1784 hregAMD64_RBP())),
1785 dst));
1786 return dst;
1787 }
1788 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1789 HReg dst = newVRegI(env);
1790 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001791 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001792 False,
1793 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1794 dst));
1795 return dst;
1796 }
1797 break;
1798 }
1799
sewardj8d965312005-02-25 02:48:47 +00001800 case Iex_GetI: {
1801 AMD64AMode* am
1802 = genGuestArrayOffset(
1803 env, e->Iex.GetI.descr,
1804 e->Iex.GetI.ix, e->Iex.GetI.bias );
1805 HReg dst = newVRegI(env);
1806 if (ty == Ity_I8) {
1807 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1808 return dst;
1809 }
sewardj1e015d82005-04-23 23:41:46 +00001810 if (ty == Ity_I64) {
1811 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1812 return dst;
1813 }
sewardj8d965312005-02-25 02:48:47 +00001814 break;
1815 }
sewardj05b3b6a2005-02-04 01:44:33 +00001816
1817 /* --------- CCALL --------- */
1818 case Iex_CCall: {
1819 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001820 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001821
sewardjcfe046e2013-01-17 14:23:53 +00001822 /* be very restrictive for now. Only 64-bit ints allowed for
sewardj74142b82013-08-08 10:28:59 +00001823 args, and 64 or 32 bits for return type. */
sewardje8aaa872005-07-07 13:12:04 +00001824 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
sewardj05b3b6a2005-02-04 01:44:33 +00001825 goto irreducible;
1826
sewardj7f039c42005-02-04 21:13:55 +00001827 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00001828 UInt addToSp = 0;
1829 RetLoc rloc = mk_RetLoc_INVALID();
1830 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1831 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1832 vassert(is_sane_RetLoc(rloc));
1833 vassert(rloc.pri == RLPri_Int);
1834 vassert(addToSp == 0);
sewardj05b3b6a2005-02-04 01:44:33 +00001835
sewardje8aaa872005-07-07 13:12:04 +00001836 /* Move to dst, and zero out the top 32 bits if the result type is
1837 Ity_I32. Probably overkill, but still .. */
1838 if (e->Iex.CCall.retty == Ity_I64)
1839 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1840 else
sewardjca257bc2010-09-08 08:34:52 +00001841 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
sewardje8aaa872005-07-07 13:12:04 +00001842
sewardj05b3b6a2005-02-04 01:44:33 +00001843 return dst;
1844 }
1845
sewardj7f039c42005-02-04 21:13:55 +00001846 /* --------- LITERAL --------- */
1847 /* 64/32/16/8-bit literals */
1848 case Iex_Const:
1849 if (ty == Ity_I64) {
1850 HReg r = newVRegI(env);
1851 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1852 return r;
1853 } else {
1854 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1855 HReg r = newVRegI(env);
1856 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1857 return r;
1858 }
sewardj05b3b6a2005-02-04 01:44:33 +00001859
1860 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00001861 case Iex_ITE: { // VFD
sewardj009230b2013-01-26 11:47:55 +00001862 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
florian99dd03e2013-01-29 03:56:06 +00001863 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1864 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1865 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.ITE.iffalse);
sewardj009230b2013-01-26 11:47:55 +00001866 HReg dst = newVRegI(env);
florian99dd03e2013-01-29 03:56:06 +00001867 addInstr(env, mk_iMOVsd_RR(r1,dst));
1868 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00001869 addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst));
1870 return dst;
sewardj05b3b6a2005-02-04 01:44:33 +00001871 }
1872 break;
1873 }
sewardj8258a8c2005-02-02 03:11:24 +00001874
sewardjf4c803b2006-09-11 11:07:34 +00001875 /* --------- TERNARY OP --------- */
1876 case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001877 IRTriop *triop = e->Iex.Triop.details;
sewardjf4c803b2006-09-11 11:07:34 +00001878 /* C3210 flags following FPU partial remainder (fprem), both
1879 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001880 if (triop->op == Iop_PRemC3210F64
1881 || triop->op == Iop_PRem1C3210F64) {
sewardjf4c803b2006-09-11 11:07:34 +00001882 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00001883 HReg arg1 = iselDblExpr(env, triop->arg2);
1884 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardjf4c803b2006-09-11 11:07:34 +00001885 HReg dst = newVRegI(env);
1886 addInstr(env, AMD64Instr_A87Free(2));
1887
1888 /* one arg -> top of x87 stack */
1889 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001890 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001891
1892 /* other arg -> top of x87 stack */
1893 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001894 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001895
florian420bfa92012-06-02 20:29:22 +00001896 switch (triop->op) {
sewardjf4c803b2006-09-11 11:07:34 +00001897 case Iop_PRemC3210F64:
1898 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1899 break;
sewardj4970e4e2008-10-11 10:07:55 +00001900 case Iop_PRem1C3210F64:
1901 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1902 break;
sewardjf4c803b2006-09-11 11:07:34 +00001903 default:
1904 vassert(0);
1905 }
1906 /* Ignore the result, and instead make off with the FPU's
1907 C3210 flags (in the status word). */
1908 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1909 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1910 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1911 return dst;
1912 }
1913 break;
1914 }
1915
sewardj8258a8c2005-02-02 03:11:24 +00001916 default:
1917 break;
1918 } /* switch (e->tag) */
1919
1920 /* We get here if no pattern matched. */
1921 irreducible:
1922 ppIRExpr(e);
1923 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1924}
sewardj614b3fb2005-02-02 02:16:03 +00001925
1926
1927/*---------------------------------------------------------*/
1928/*--- ISEL: Integer expression auxiliaries ---*/
1929/*---------------------------------------------------------*/
1930
1931/* --------------------- AMODEs --------------------- */
1932
1933/* Return an AMode which computes the value of the specified
1934 expression, possibly also adding insns to the code list as a
1935 result. The expression may only be a 32-bit one.
1936*/
1937
sewardj8258a8c2005-02-02 03:11:24 +00001938static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1939{
1940 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1941 vassert(sane_AMode(am));
1942 return am;
1943}
1944
1945/* DO NOT CALL THIS DIRECTLY ! */
1946static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1947{
sewardj05b3b6a2005-02-04 01:44:33 +00001948 MatchInfo mi;
1949 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001950 IRType ty = typeOfIRExpr(env->type_env,e);
1951 vassert(ty == Ity_I64);
1952
sewardj05b3b6a2005-02-04 01:44:33 +00001953 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1954 /* bind0 bind1 bind2 bind3 */
1955 DEFINE_PATTERN(p_complex,
1956 binop( Iop_Add64,
1957 binop( Iop_Add64,
1958 bind(0),
1959 binop(Iop_Shl64, bind(1), bind(2))
1960 ),
1961 bind(3)
1962 )
1963 );
1964 if (matchIRExpr(&mi, p_complex, e)) {
1965 IRExpr* expr1 = mi.bindee[0];
1966 IRExpr* expr2 = mi.bindee[1];
1967 IRExpr* imm8 = mi.bindee[2];
1968 IRExpr* simm32 = mi.bindee[3];
1969 if (imm8->tag == Iex_Const
1970 && imm8->Iex.Const.con->tag == Ico_U8
1971 && imm8->Iex.Const.con->Ico.U8 < 4
1972 /* imm8 is OK, now check simm32 */
1973 && simm32->tag == Iex_Const
1974 && simm32->Iex.Const.con->tag == Ico_U64
1975 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1976 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001977 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001978 HReg r1 = iselIntExpr_R(env, expr1);
1979 HReg r2 = iselIntExpr_R(env, expr2);
1980 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1981 return AMD64AMode_IRRS(offset, r1, r2, shift);
1982 }
1983 }
1984
sewardj8258a8c2005-02-02 03:11:24 +00001985 /* Add64(expr1, Shl64(expr2, imm)) */
1986 if (e->tag == Iex_Binop
1987 && e->Iex.Binop.op == Iop_Add64
1988 && e->Iex.Binop.arg2->tag == Iex_Binop
1989 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1990 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1991 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1992 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1993 if (shift == 1 || shift == 2 || shift == 3) {
1994 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1995 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1996 return AMD64AMode_IRRS(0, r1, r2, shift);
1997 }
1998 }
1999
2000 /* Add64(expr,i) */
2001 if (e->tag == Iex_Binop
2002 && e->Iex.Binop.op == Iop_Add64
2003 && e->Iex.Binop.arg2->tag == Iex_Const
2004 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2005 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
2006 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2007 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00002008 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00002009 r1
2010 );
2011 }
2012
2013 /* Doesn't match anything in particular. Generate it into
2014 a register and use that. */
2015 {
2016 HReg r1 = iselIntExpr_R(env, e);
2017 return AMD64AMode_IR(0, r1);
2018 }
2019}
sewardj614b3fb2005-02-02 02:16:03 +00002020
2021
2022/* --------------------- RMIs --------------------- */
2023
2024/* Similarly, calculate an expression into an X86RMI operand. As with
2025 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
2026
2027static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
2028{
2029 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
2030 /* sanity checks ... */
2031 switch (rmi->tag) {
2032 case Armi_Imm:
2033 return rmi;
2034 case Armi_Reg:
2035 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
2036 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
2037 return rmi;
2038 case Armi_Mem:
2039 vassert(sane_AMode(rmi->Armi.Mem.am));
2040 return rmi;
2041 default:
2042 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
2043 }
2044}
2045
2046/* DO NOT CALL THIS DIRECTLY ! */
2047static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
2048{
2049 IRType ty = typeOfIRExpr(env->type_env,e);
2050 vassert(ty == Ity_I64 || ty == Ity_I32
2051 || ty == Ity_I16 || ty == Ity_I8);
2052
2053 /* special case: immediate 64/32/16/8 */
2054 if (e->tag == Iex_Const) {
2055 switch (e->Iex.Const.con->tag) {
2056 case Ico_U64:
2057 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002058 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00002059 }
2060 break;
2061 case Ico_U32:
2062 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
2063 case Ico_U16:
2064 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
2065 case Ico_U8:
2066 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
2067 default:
2068 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2069 }
2070 }
2071
2072 /* special case: 64-bit GET */
2073 if (e->tag == Iex_Get && ty == Ity_I64) {
2074 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2075 hregAMD64_RBP()));
2076 }
2077
sewardj0852a132005-02-21 08:28:46 +00002078 /* special case: 64-bit load from memory */
sewardje9d8a262009-07-01 08:06:34 +00002079 if (e->tag == Iex_Load && ty == Ity_I64
sewardje768e922009-11-26 17:17:37 +00002080 && e->Iex.Load.end == Iend_LE) {
sewardjaf1ceca2005-06-30 23:31:27 +00002081 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj0852a132005-02-21 08:28:46 +00002082 return AMD64RMI_Mem(am);
2083 }
sewardj614b3fb2005-02-02 02:16:03 +00002084
2085 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00002086 {
2087 HReg r = iselIntExpr_R ( env, e );
2088 return AMD64RMI_Reg(r);
2089 }
sewardj614b3fb2005-02-02 02:16:03 +00002090}
2091
2092
sewardjf67eadf2005-02-03 03:53:52 +00002093/* --------------------- RIs --------------------- */
2094
2095/* Calculate an expression into an AMD64RI operand. As with
2096 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2097 bits. */
2098
2099static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
2100{
2101 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
2102 /* sanity checks ... */
2103 switch (ri->tag) {
2104 case Ari_Imm:
2105 return ri;
sewardj80d6e6d2008-05-28 09:40:29 +00002106 case Ari_Reg:
sewardjf67eadf2005-02-03 03:53:52 +00002107 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
2108 vassert(hregIsVirtual(ri->Ari.Reg.reg));
2109 return ri;
2110 default:
2111 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2112 }
2113}
2114
2115/* DO NOT CALL THIS DIRECTLY ! */
2116static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2117{
2118 IRType ty = typeOfIRExpr(env->type_env,e);
2119 vassert(ty == Ity_I64 || ty == Ity_I32
2120 || ty == Ity_I16 || ty == Ity_I8);
2121
2122 /* special case: immediate */
2123 if (e->tag == Iex_Const) {
2124 switch (e->Iex.Const.con->tag) {
2125 case Ico_U64:
2126 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002127 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00002128 }
2129 break;
2130 case Ico_U32:
2131 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2132 case Ico_U16:
2133 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2134 case Ico_U8:
2135 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2136 default:
2137 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2138 }
2139 }
2140
2141 /* default case: calculate into a register and return that */
2142 {
2143 HReg r = iselIntExpr_R ( env, e );
2144 return AMD64RI_Reg(r);
2145 }
2146}
2147
2148
sewardj05b3b6a2005-02-04 01:44:33 +00002149/* --------------------- RMs --------------------- */
2150
2151/* Similarly, calculate an expression into an AMD64RM operand. As
2152 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2153 bits. */
2154
2155static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2156{
2157 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2158 /* sanity checks ... */
2159 switch (rm->tag) {
2160 case Arm_Reg:
2161 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2162 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2163 return rm;
2164 case Arm_Mem:
2165 vassert(sane_AMode(rm->Arm.Mem.am));
2166 return rm;
2167 default:
2168 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2169 }
2170}
2171
2172/* DO NOT CALL THIS DIRECTLY ! */
2173static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2174{
2175 IRType ty = typeOfIRExpr(env->type_env,e);
2176 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2177
2178 /* special case: 64-bit GET */
2179 if (e->tag == Iex_Get && ty == Ity_I64) {
2180 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2181 hregAMD64_RBP()));
2182 }
2183
2184 /* special case: load from memory */
2185
2186 /* default case: calculate into a register and return that */
2187 {
2188 HReg r = iselIntExpr_R ( env, e );
2189 return AMD64RM_Reg(r);
2190 }
2191}
2192
2193
2194/* --------------------- CONDCODE --------------------- */
2195
2196/* Generate code to evaluated a bit-typed expression, returning the
2197 condition code which would correspond when the expression would
2198 notionally have returned 1. */
2199
2200static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2201{
2202 /* Uh, there's nothing we can sanity check here, unfortunately. */
2203 return iselCondCode_wrk(env,e);
2204}
2205
2206/* DO NOT CALL THIS DIRECTLY ! */
2207static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2208{
sewardjf8c37f72005-02-07 18:55:29 +00002209 MatchInfo mi;
sewardj0af46ab2005-04-26 01:52:29 +00002210
sewardj05b3b6a2005-02-04 01:44:33 +00002211 vassert(e);
2212 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2213
sewardj176ad2f2005-04-27 11:55:08 +00002214 /* var */
sewardjdd40fdf2006-12-24 02:20:24 +00002215 if (e->tag == Iex_RdTmp) {
2216 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj176ad2f2005-04-27 11:55:08 +00002217 HReg dst = newVRegI(env);
2218 addInstr(env, mk_iMOVsd_RR(r64,dst));
2219 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2220 return Acc_NZ;
2221 }
2222
sewardj109e9352005-07-19 08:42:56 +00002223 /* Constant 1:Bit */
2224 if (e->tag == Iex_Const) {
2225 HReg r;
2226 vassert(e->Iex.Const.con->tag == Ico_U1);
2227 vassert(e->Iex.Const.con->Ico.U1 == True
2228 || e->Iex.Const.con->Ico.U1 == False);
2229 r = newVRegI(env);
2230 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2231 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2232 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2233 }
sewardj486074e2005-02-08 20:10:04 +00002234
2235 /* Not1(...) */
2236 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2237 /* Generate code for the arg, and negate the test condition */
2238 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2239 }
2240
sewardj176ad2f2005-04-27 11:55:08 +00002241 /* --- patterns rooted at: 64to1 --- */
2242
sewardj176ad2f2005-04-27 11:55:08 +00002243 /* 64to1 */
2244 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
sewardj501a3392005-05-11 15:37:50 +00002245 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2246 addInstr(env, AMD64Instr_Test64(1,reg));
sewardjf8c37f72005-02-07 18:55:29 +00002247 return Acc_NZ;
2248 }
2249
florianc862f282012-07-19 17:22:33 +00002250 /* --- patterns rooted at: 32to1 --- */
2251
2252 /* 32to1 */
2253 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
2254 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2255 addInstr(env, AMD64Instr_Test64(1,reg));
2256 return Acc_NZ;
2257 }
2258
sewardj176ad2f2005-04-27 11:55:08 +00002259 /* --- patterns rooted at: CmpNEZ8 --- */
2260
2261 /* CmpNEZ8(x) */
2262 if (e->tag == Iex_Unop
2263 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2264 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002265 addInstr(env, AMD64Instr_Test64(0xFF,r));
sewardj176ad2f2005-04-27 11:55:08 +00002266 return Acc_NZ;
2267 }
2268
sewardj86ec28b2005-04-27 13:39:35 +00002269 /* --- patterns rooted at: CmpNEZ16 --- */
2270
2271 /* CmpNEZ16(x) */
2272 if (e->tag == Iex_Unop
2273 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2274 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002275 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
sewardj86ec28b2005-04-27 13:39:35 +00002276 return Acc_NZ;
2277 }
2278
sewardj176ad2f2005-04-27 11:55:08 +00002279 /* --- patterns rooted at: CmpNEZ32 --- */
2280
2281 /* CmpNEZ32(x) */
2282 if (e->tag == Iex_Unop
2283 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2284 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj176ad2f2005-04-27 11:55:08 +00002285 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
sewardj9cc2bbf2011-06-05 17:56:03 +00002286 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
sewardj176ad2f2005-04-27 11:55:08 +00002287 return Acc_NZ;
2288 }
2289
2290 /* --- patterns rooted at: CmpNEZ64 --- */
2291
sewardj0bc78ab2005-05-11 22:47:32 +00002292 /* CmpNEZ64(Or64(x,y)) */
2293 {
2294 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2295 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2296 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2297 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2298 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2299 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2300 HReg tmp = newVRegI(env);
2301 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2302 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2303 return Acc_NZ;
2304 }
2305 }
2306
sewardj176ad2f2005-04-27 11:55:08 +00002307 /* CmpNEZ64(x) */
2308 if (e->tag == Iex_Unop
2309 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2310 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2311 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2312 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2313 return Acc_NZ;
2314 }
2315
2316 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2317
sewardj42322b52005-04-20 22:57:11 +00002318 /* CmpEQ8 / CmpNE8 */
2319 if (e->tag == Iex_Binop
2320 && (e->Iex.Binop.op == Iop_CmpEQ8
sewardj1fb8c922009-07-12 12:56:53 +00002321 || e->Iex.Binop.op == Iop_CmpNE8
2322 || e->Iex.Binop.op == Iop_CasCmpEQ8
2323 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
sewardj009230b2013-01-26 11:47:55 +00002324 if (isZeroU8(e->Iex.Binop.arg2)) {
2325 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2326 addInstr(env, AMD64Instr_Test64(0xFF,r1));
2327 switch (e->Iex.Binop.op) {
2328 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2329 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2330 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
2331 }
2332 } else {
2333 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2334 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2335 HReg r = newVRegI(env);
2336 addInstr(env, mk_iMOVsd_RR(r1,r));
2337 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2338 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2339 switch (e->Iex.Binop.op) {
2340 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2341 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2342 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
2343 }
sewardj42322b52005-04-20 22:57:11 +00002344 }
2345 }
2346
sewardj0af46ab2005-04-26 01:52:29 +00002347 /* CmpEQ16 / CmpNE16 */
2348 if (e->tag == Iex_Binop
2349 && (e->Iex.Binop.op == Iop_CmpEQ16
sewardj1fb8c922009-07-12 12:56:53 +00002350 || e->Iex.Binop.op == Iop_CmpNE16
2351 || e->Iex.Binop.op == Iop_CasCmpEQ16
2352 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
sewardj0af46ab2005-04-26 01:52:29 +00002353 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2354 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2355 HReg r = newVRegI(env);
2356 addInstr(env, mk_iMOVsd_RR(r1,r));
2357 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2358 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2359 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002360 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2361 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002362 default: vpanic("iselCondCode(amd64): CmpXX16");
2363 }
2364 }
2365
sewardj50d89bf2011-01-10 15:10:48 +00002366 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
2367 Saves a "movq %rax, %tmp" compared to the default route. */
2368 if (e->tag == Iex_Binop
2369 && e->Iex.Binop.op == Iop_CmpNE64
2370 && e->Iex.Binop.arg1->tag == Iex_CCall
2371 && e->Iex.Binop.arg2->tag == Iex_Const) {
2372 IRExpr* cal = e->Iex.Binop.arg1;
2373 IRExpr* con = e->Iex.Binop.arg2;
2374 HReg tmp = newVRegI(env);
2375 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
2376 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
2377 vassert(con->Iex.Const.con->tag == Ico_U64);
2378 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00002379 UInt addToSp = 0;
2380 RetLoc rloc = mk_RetLoc_INVALID();
2381 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2382 cal->Iex.CCall.cee,
2383 cal->Iex.CCall.retty, cal->Iex.CCall.args );
2384 vassert(is_sane_RetLoc(rloc));
2385 vassert(rloc.pri == RLPri_Int);
2386 vassert(addToSp == 0);
2387 /* */
sewardj50d89bf2011-01-10 15:10:48 +00002388 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
2389 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
2390 AMD64RMI_Reg(hregAMD64_RAX()), tmp));
2391 return Acc_NZ;
2392 }
2393
sewardjd0a12df2005-02-10 02:07:43 +00002394 /* Cmp*64*(x,y) */
2395 if (e->tag == Iex_Binop
2396 && (e->Iex.Binop.op == Iop_CmpEQ64
2397 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00002398 || e->Iex.Binop.op == Iop_CmpLT64S
2399 || e->Iex.Binop.op == Iop_CmpLT64U
2400 || e->Iex.Binop.op == Iop_CmpLE64S
sewardja9e4a802005-12-26 19:33:55 +00002401 || e->Iex.Binop.op == Iop_CmpLE64U
sewardj1fb8c922009-07-12 12:56:53 +00002402 || e->Iex.Binop.op == Iop_CasCmpEQ64
sewardje13074c2012-11-08 10:57:08 +00002403 || e->Iex.Binop.op == Iop_CasCmpNE64
2404 || e->Iex.Binop.op == Iop_ExpCmpNE64)) {
sewardjd0a12df2005-02-10 02:07:43 +00002405 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2406 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2407 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2408 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002409 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
sewardje13074c2012-11-08 10:57:08 +00002410 case Iop_CmpNE64:
2411 case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002412 case Iop_CmpLT64S: return Acc_L;
2413 case Iop_CmpLT64U: return Acc_B;
2414 case Iop_CmpLE64S: return Acc_LE;
sewardja9e4a802005-12-26 19:33:55 +00002415 case Iop_CmpLE64U: return Acc_BE;
sewardjd0a12df2005-02-10 02:07:43 +00002416 default: vpanic("iselCondCode(amd64): CmpXX64");
2417 }
2418 }
2419
sewardj9cc2bbf2011-06-05 17:56:03 +00002420 /* Cmp*32*(x,y) */
2421 if (e->tag == Iex_Binop
2422 && (e->Iex.Binop.op == Iop_CmpEQ32
2423 || e->Iex.Binop.op == Iop_CmpNE32
2424 || e->Iex.Binop.op == Iop_CmpLT32S
2425 || e->Iex.Binop.op == Iop_CmpLT32U
2426 || e->Iex.Binop.op == Iop_CmpLE32S
2427 || e->Iex.Binop.op == Iop_CmpLE32U
2428 || e->Iex.Binop.op == Iop_CasCmpEQ32
sewardj009230b2013-01-26 11:47:55 +00002429 || e->Iex.Binop.op == Iop_CasCmpNE32
2430 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
sewardj9cc2bbf2011-06-05 17:56:03 +00002431 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2432 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2433 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
2434 switch (e->Iex.Binop.op) {
2435 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
sewardj009230b2013-01-26 11:47:55 +00002436 case Iop_CmpNE32:
2437 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ;
sewardj9cc2bbf2011-06-05 17:56:03 +00002438 case Iop_CmpLT32S: return Acc_L;
2439 case Iop_CmpLT32U: return Acc_B;
2440 case Iop_CmpLE32S: return Acc_LE;
2441 case Iop_CmpLE32U: return Acc_BE;
2442 default: vpanic("iselCondCode(amd64): CmpXX32");
2443 }
2444 }
2445
sewardj05b3b6a2005-02-04 01:44:33 +00002446 ppIRExpr(e);
2447 vpanic("iselCondCode(amd64)");
2448}
2449
2450
sewardj9b967672005-02-08 11:13:09 +00002451/*---------------------------------------------------------*/
2452/*--- ISEL: Integer expressions (128 bit) ---*/
2453/*---------------------------------------------------------*/
2454
2455/* Compute a 128-bit value into a register pair, which is returned as
2456 the first two parameters. As with iselIntExpr_R, these may be
2457 either real or virtual regs; in any case they must not be changed
2458 by subsequent code emitted by the caller. */
2459
2460static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2461 ISelEnv* env, IRExpr* e )
2462{
2463 iselInt128Expr_wrk(rHi, rLo, env, e);
2464# if 0
2465 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2466# endif
2467 vassert(hregClass(*rHi) == HRcInt64);
2468 vassert(hregIsVirtual(*rHi));
2469 vassert(hregClass(*rLo) == HRcInt64);
2470 vassert(hregIsVirtual(*rLo));
2471}
2472
2473/* DO NOT CALL THIS DIRECTLY ! */
2474static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2475 ISelEnv* env, IRExpr* e )
2476{
sewardj9b967672005-02-08 11:13:09 +00002477 vassert(e);
2478 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2479
sewardj9b967672005-02-08 11:13:09 +00002480 /* read 128-bit IRTemp */
sewardjdd40fdf2006-12-24 02:20:24 +00002481 if (e->tag == Iex_RdTmp) {
sewardjc4530ae2012-05-21 10:18:49 +00002482 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
sewardj9b967672005-02-08 11:13:09 +00002483 return;
2484 }
2485
sewardj9b967672005-02-08 11:13:09 +00002486 /* --------- BINARY ops --------- */
2487 if (e->tag == Iex_Binop) {
2488 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002489 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002490 case Iop_MullU64:
2491 case Iop_MullS64: {
2492 /* get one operand into %rax, and the other into a R/M.
2493 Need to make an educated guess about which is better in
2494 which. */
2495 HReg tLo = newVRegI(env);
2496 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002497 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002498 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2499 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2500 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
sewardj501a3392005-05-11 15:37:50 +00002501 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
sewardj9b967672005-02-08 11:13:09 +00002502 /* Result is now in RDX:RAX. Tell the caller. */
2503 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2504 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2505 *rHi = tHi;
2506 *rLo = tLo;
2507 return;
2508 }
sewardj7de0d3c2005-02-13 02:26:41 +00002509
sewardja6b93d12005-02-17 09:28:28 +00002510 /* 128 x 64 -> (64(rem),64(div)) division */
2511 case Iop_DivModU128to64:
2512 case Iop_DivModS128to64: {
2513 /* Get the 128-bit operand into rdx:rax, and the other into
2514 any old R/M. */
2515 HReg sHi, sLo;
2516 HReg tLo = newVRegI(env);
2517 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002518 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002519 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2520 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2521 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2522 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2523 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2524 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2525 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2526 *rHi = tHi;
2527 *rLo = tLo;
2528 return;
2529 }
2530
2531 /* 64HLto128(e1,e2) */
2532 case Iop_64HLto128:
2533 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2534 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2535 return;
2536
sewardj9b967672005-02-08 11:13:09 +00002537 default:
2538 break;
2539 }
2540 } /* if (e->tag == Iex_Binop) */
2541
sewardj9b967672005-02-08 11:13:09 +00002542 ppIRExpr(e);
2543 vpanic("iselInt128Expr");
2544}
2545
2546
sewardj8d965312005-02-25 02:48:47 +00002547/*---------------------------------------------------------*/
2548/*--- ISEL: Floating point expressions (32 bit) ---*/
2549/*---------------------------------------------------------*/
2550
2551/* Nothing interesting here; really just wrappers for
2552 64-bit stuff. */
2553
2554static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2555{
2556 HReg r = iselFltExpr_wrk( env, e );
2557# if 0
2558 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2559# endif
2560 vassert(hregClass(r) == HRcVec128);
2561 vassert(hregIsVirtual(r));
2562 return r;
2563}
2564
2565/* DO NOT CALL THIS DIRECTLY */
2566static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2567{
2568 IRType ty = typeOfIRExpr(env->type_env,e);
2569 vassert(ty == Ity_F32);
2570
sewardjdd40fdf2006-12-24 02:20:24 +00002571 if (e->tag == Iex_RdTmp) {
2572 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardjc49ce232005-02-25 13:03:03 +00002573 }
2574
sewardje768e922009-11-26 17:17:37 +00002575 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardjc49ce232005-02-25 13:03:03 +00002576 AMD64AMode* am;
2577 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002578 vassert(e->Iex.Load.ty == Ity_F32);
2579 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardjc49ce232005-02-25 13:03:03 +00002580 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2581 return res;
2582 }
sewardj8d965312005-02-25 02:48:47 +00002583
2584 if (e->tag == Iex_Binop
2585 && e->Iex.Binop.op == Iop_F64toF32) {
2586 /* Although the result is still held in a standard SSE register,
2587 we need to round it to reflect the loss of accuracy/range
2588 entailed in casting it to a 32-bit float. */
2589 HReg dst = newVRegV(env);
2590 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2591 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2592 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2593 set_SSE_rounding_default( env );
2594 return dst;
2595 }
2596
sewardjc49ce232005-02-25 13:03:03 +00002597 if (e->tag == Iex_Get) {
2598 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2599 hregAMD64_RBP() );
2600 HReg res = newVRegV(env);
2601 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2602 return res;
2603 }
2604
sewardj5992bd02005-05-11 02:13:42 +00002605 if (e->tag == Iex_Unop
2606 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2607 /* Given an I32, produce an IEEE754 float with the same bit
2608 pattern. */
2609 HReg dst = newVRegV(env);
2610 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2611 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2612 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2613 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2614 return dst;
2615 }
sewardj8d965312005-02-25 02:48:47 +00002616
sewardjd15b5972010-06-27 09:06:34 +00002617 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2618 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2619 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
2620 HReg dst = newVRegV(env);
2621
2622 /* rf now holds the value to be rounded. The first thing to do
2623 is set the FPU's rounding mode accordingly. */
2624
2625 /* Set host x87 rounding mode */
2626 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2627
2628 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
2629 addInstr(env, AMD64Instr_A87Free(1));
2630 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
2631 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2632 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
2633 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
2634
2635 /* Restore default x87 rounding. */
2636 set_FPU_rounding_default( env );
2637
2638 return dst;
2639 }
2640
sewardjcc3d2192013-03-27 11:37:33 +00002641 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) {
2642 /* Sigh ... very rough code. Could do much better. */
2643 /* Get the 128-bit literal 00---0 10---0 into a register
2644 and xor it with the value to be negated. */
2645 HReg r1 = newVRegI(env);
2646 HReg dst = newVRegV(env);
2647 HReg tmp = newVRegV(env);
2648 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2649 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
2650 addInstr(env, mk_vMOVsd_RR(src,tmp));
2651 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2652 addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 ));
2653 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
2654 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
2655 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
2656 add_to_rsp(env, 16);
2657 return dst;
2658 }
2659
2660 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) {
2661 IRQop *qop = e->Iex.Qop.details;
2662 HReg dst = newVRegV(env);
2663 HReg argX = iselFltExpr(env, qop->arg2);
2664 HReg argY = iselFltExpr(env, qop->arg3);
2665 HReg argZ = iselFltExpr(env, qop->arg4);
2666 /* XXXROUNDINGFIXME */
2667 /* set roundingmode here */
2668 /* subq $16, %rsp -- make a space*/
2669 sub_from_rsp(env, 16);
2670 /* Prepare 4 arg regs:
2671 leaq 0(%rsp), %rdi
2672 leaq 4(%rsp), %rsi
2673 leaq 8(%rsp), %rdx
2674 leaq 12(%rsp), %rcx
2675 */
2676 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2677 hregAMD64_RDI()));
2678 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()),
2679 hregAMD64_RSI()));
2680 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2681 hregAMD64_RDX()));
2682 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()),
2683 hregAMD64_RCX()));
2684 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2685 movss %argX, 0(%rsi)
2686 movss %argY, 0(%rdx)
2687 movss %argZ, 0(%rcx)
2688 */
2689 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX,
2690 AMD64AMode_IR(0, hregAMD64_RSI())));
2691 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY,
2692 AMD64AMode_IR(0, hregAMD64_RDX())));
2693 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ,
2694 AMD64AMode_IR(0, hregAMD64_RCX())));
2695 /* call the helper */
2696 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2697 (ULong)(HWord)h_generic_calc_MAddF32,
sewardj74142b82013-08-08 10:28:59 +00002698 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002699 /* fetch the result from memory, using %r_argp, which the
2700 register allocator will keep alive across the call. */
2701 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst,
2702 AMD64AMode_IR(0, hregAMD64_RSP())));
2703 /* and finally, clear the space */
2704 add_to_rsp(env, 16);
2705 return dst;
2706 }
2707
sewardj8d965312005-02-25 02:48:47 +00002708 ppIRExpr(e);
2709 vpanic("iselFltExpr_wrk");
2710}
sewardj18303862005-02-21 12:36:54 +00002711
2712
2713/*---------------------------------------------------------*/
2714/*--- ISEL: Floating point expressions (64 bit) ---*/
2715/*---------------------------------------------------------*/
2716
2717/* Compute a 64-bit floating point value into the lower half of an xmm
2718 register, the identity of which is returned. As with
2719 iselIntExpr_R, the returned reg will be virtual, and it must not be
2720 changed by subsequent code emitted by the caller.
2721*/
2722
2723/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2724
2725 Type S (1 bit) E (11 bits) F (52 bits)
2726 ---- --------- ----------- -----------
2727 signalling NaN u 2047 (max) .0uuuuu---u
2728 (with at least
2729 one 1 bit)
2730 quiet NaN u 2047 (max) .1uuuuu---u
2731
2732 negative infinity 1 2047 (max) .000000---0
2733
2734 positive infinity 0 2047 (max) .000000---0
2735
2736 negative zero 1 0 .000000---0
2737
2738 positive zero 0 0 .000000---0
2739*/
2740
2741static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2742{
2743 HReg r = iselDblExpr_wrk( env, e );
2744# if 0
2745 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2746# endif
2747 vassert(hregClass(r) == HRcVec128);
2748 vassert(hregIsVirtual(r));
2749 return r;
2750}
2751
2752/* DO NOT CALL THIS DIRECTLY */
2753static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2754{
2755 IRType ty = typeOfIRExpr(env->type_env,e);
2756 vassert(e);
2757 vassert(ty == Ity_F64);
2758
sewardjdd40fdf2006-12-24 02:20:24 +00002759 if (e->tag == Iex_RdTmp) {
2760 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj18303862005-02-21 12:36:54 +00002761 }
2762
sewardj8d965312005-02-25 02:48:47 +00002763 if (e->tag == Iex_Const) {
2764 union { ULong u64; Double f64; } u;
2765 HReg res = newVRegV(env);
2766 HReg tmp = newVRegI(env);
2767 vassert(sizeof(u) == 8);
2768 vassert(sizeof(u.u64) == 8);
2769 vassert(sizeof(u.f64) == 8);
2770
2771 if (e->Iex.Const.con->tag == Ico_F64) {
2772 u.f64 = e->Iex.Const.con->Ico.F64;
2773 }
2774 else if (e->Iex.Const.con->tag == Ico_F64i) {
2775 u.u64 = e->Iex.Const.con->Ico.F64i;
2776 }
2777 else
2778 vpanic("iselDblExpr(amd64): const");
2779
2780 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2781 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2782 addInstr(env, AMD64Instr_SseLdSt(
2783 True/*load*/, 8, res,
2784 AMD64AMode_IR(0, hregAMD64_RSP())
2785 ));
2786 add_to_rsp(env, 8);
2787 return res;
2788 }
sewardj9da16972005-02-21 13:58:26 +00002789
sewardje768e922009-11-26 17:17:37 +00002790 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj9da16972005-02-21 13:58:26 +00002791 AMD64AMode* am;
2792 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002793 vassert(e->Iex.Load.ty == Ity_F64);
2794 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj9da16972005-02-21 13:58:26 +00002795 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2796 return res;
2797 }
sewardj18303862005-02-21 12:36:54 +00002798
2799 if (e->tag == Iex_Get) {
2800 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2801 hregAMD64_RBP() );
2802 HReg res = newVRegV(env);
2803 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2804 return res;
2805 }
2806
sewardj8d965312005-02-25 02:48:47 +00002807 if (e->tag == Iex_GetI) {
2808 AMD64AMode* am
2809 = genGuestArrayOffset(
2810 env, e->Iex.GetI.descr,
2811 e->Iex.GetI.ix, e->Iex.GetI.bias );
2812 HReg res = newVRegV(env);
2813 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2814 return res;
2815 }
2816
sewardj4796d662006-02-05 16:06:26 +00002817 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00002818 IRTriop *triop = e->Iex.Triop.details;
sewardj137015d2005-03-27 04:01:15 +00002819 AMD64SseOp op = Asse_INVALID;
florian420bfa92012-06-02 20:29:22 +00002820 switch (triop->op) {
sewardj137015d2005-03-27 04:01:15 +00002821 case Iop_AddF64: op = Asse_ADDF; break;
2822 case Iop_SubF64: op = Asse_SUBF; break;
2823 case Iop_MulF64: op = Asse_MULF; break;
2824 case Iop_DivF64: op = Asse_DIVF; break;
2825 default: break;
2826 }
2827 if (op != Asse_INVALID) {
2828 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002829 HReg argL = iselDblExpr(env, triop->arg2);
2830 HReg argR = iselDblExpr(env, triop->arg3);
sewardj137015d2005-03-27 04:01:15 +00002831 addInstr(env, mk_vMOVsd_RR(argL, dst));
sewardj4796d662006-02-05 16:06:26 +00002832 /* XXXROUNDINGFIXME */
2833 /* set roundingmode here */
sewardj137015d2005-03-27 04:01:15 +00002834 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2835 return dst;
2836 }
2837 }
2838
sewardjcc3d2192013-03-27 11:37:33 +00002839 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) {
2840 IRQop *qop = e->Iex.Qop.details;
2841 HReg dst = newVRegV(env);
2842 HReg argX = iselDblExpr(env, qop->arg2);
2843 HReg argY = iselDblExpr(env, qop->arg3);
2844 HReg argZ = iselDblExpr(env, qop->arg4);
2845 /* XXXROUNDINGFIXME */
2846 /* set roundingmode here */
2847 /* subq $32, %rsp -- make a space*/
2848 sub_from_rsp(env, 32);
2849 /* Prepare 4 arg regs:
2850 leaq 0(%rsp), %rdi
2851 leaq 8(%rsp), %rsi
2852 leaq 16(%rsp), %rdx
2853 leaq 24(%rsp), %rcx
2854 */
2855 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2856 hregAMD64_RDI()));
2857 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2858 hregAMD64_RSI()));
2859 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()),
2860 hregAMD64_RDX()));
2861 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()),
2862 hregAMD64_RCX()));
2863 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2864 movsd %argX, 0(%rsi)
2865 movsd %argY, 0(%rdx)
2866 movsd %argZ, 0(%rcx)
2867 */
2868 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX,
2869 AMD64AMode_IR(0, hregAMD64_RSI())));
2870 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY,
2871 AMD64AMode_IR(0, hregAMD64_RDX())));
2872 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ,
2873 AMD64AMode_IR(0, hregAMD64_RCX())));
2874 /* call the helper */
2875 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2876 (ULong)(HWord)h_generic_calc_MAddF64,
sewardj74142b82013-08-08 10:28:59 +00002877 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002878 /* fetch the result from memory, using %r_argp, which the
2879 register allocator will keep alive across the call. */
2880 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst,
2881 AMD64AMode_IR(0, hregAMD64_RSP())));
2882 /* and finally, clear the space */
2883 add_to_rsp(env, 32);
2884 return dst;
2885 }
2886
sewardjb183b852006-02-03 16:08:03 +00002887 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
sewardj25a85812005-05-08 23:03:48 +00002888 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2889 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2890 HReg dst = newVRegV(env);
2891
2892 /* rf now holds the value to be rounded. The first thing to do
2893 is set the FPU's rounding mode accordingly. */
2894
2895 /* Set host x87 rounding mode */
2896 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2897
2898 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2899 addInstr(env, AMD64Instr_A87Free(1));
sewardjd15b5972010-06-27 09:06:34 +00002900 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002901 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
sewardjd15b5972010-06-27 09:06:34 +00002902 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002903 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2904
2905 /* Restore default x87 rounding. */
2906 set_FPU_rounding_default( env );
2907
2908 return dst;
2909 }
2910
florian420bfa92012-06-02 20:29:22 +00002911 IRTriop *triop = e->Iex.Triop.details;
sewardj4796d662006-02-05 16:06:26 +00002912 if (e->tag == Iex_Triop
florian420bfa92012-06-02 20:29:22 +00002913 && (triop->op == Iop_ScaleF64
2914 || triop->op == Iop_AtanF64
2915 || triop->op == Iop_Yl2xF64
2916 || triop->op == Iop_Yl2xp1F64
2917 || triop->op == Iop_PRemF64
2918 || triop->op == Iop_PRem1F64)
sewardj25a85812005-05-08 23:03:48 +00002919 ) {
2920 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00002921 HReg arg1 = iselDblExpr(env, triop->arg2);
2922 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardj25a85812005-05-08 23:03:48 +00002923 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002924 Bool arg2first = toBool(triop->op == Iop_ScaleF64
2925 || triop->op == Iop_PRemF64
2926 || triop->op == Iop_PRem1F64);
sewardj25a85812005-05-08 23:03:48 +00002927 addInstr(env, AMD64Instr_A87Free(2));
2928
2929 /* one arg -> top of x87 stack */
2930 addInstr(env, AMD64Instr_SseLdSt(
2931 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002932 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002933
2934 /* other arg -> top of x87 stack */
2935 addInstr(env, AMD64Instr_SseLdSt(
2936 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002937 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002938
2939 /* do it */
sewardj4796d662006-02-05 16:06:26 +00002940 /* XXXROUNDINGFIXME */
2941 /* set roundingmode here */
florian420bfa92012-06-02 20:29:22 +00002942 switch (triop->op) {
sewardj25a85812005-05-08 23:03:48 +00002943 case Iop_ScaleF64:
2944 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2945 break;
2946 case Iop_AtanF64:
2947 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2948 break;
2949 case Iop_Yl2xF64:
2950 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2951 break;
sewardj5e205372005-05-09 02:57:08 +00002952 case Iop_Yl2xp1F64:
2953 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2954 break;
sewardjf4c803b2006-09-11 11:07:34 +00002955 case Iop_PRemF64:
2956 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2957 break;
sewardj4970e4e2008-10-11 10:07:55 +00002958 case Iop_PRem1F64:
2959 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
2960 break;
sewardj25a85812005-05-08 23:03:48 +00002961 default:
2962 vassert(0);
2963 }
2964
2965 /* save result */
sewardjd15b5972010-06-27 09:06:34 +00002966 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002967 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2968 return dst;
2969 }
sewardj1a01e652005-02-23 11:39:21 +00002970
sewardj6c299f32009-12-31 18:00:12 +00002971 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002972 HReg dst = newVRegV(env);
2973 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2974 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2975 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2976 set_SSE_rounding_default( env );
2977 return dst;
2978 }
2979
sewardj6c299f32009-12-31 18:00:12 +00002980 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002981 HReg dst = newVRegV(env);
2982 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2983 set_SSE_rounding_default( env );
2984 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2985 return dst;
2986 }
2987
sewardj137015d2005-03-27 04:01:15 +00002988 if (e->tag == Iex_Unop
2989 && (e->Iex.Unop.op == Iop_NegF64
2990 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00002991 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00002992 /* Get the 128-bit literal 00---0 10---0 into a register
2993 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00002994 HReg r1 = newVRegI(env);
2995 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00002996 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00002997 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2998 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00002999 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00003000 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3001 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3002 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00003003 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3004
3005 if (e->Iex.Unop.op == Iop_NegF64)
3006 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3007 else
3008 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3009
sewardj8d965312005-02-25 02:48:47 +00003010 add_to_rsp(env, 16);
3011 return dst;
3012 }
3013
sewardj4796d662006-02-05 16:06:26 +00003014 if (e->tag == Iex_Binop) {
sewardj25a85812005-05-08 23:03:48 +00003015 A87FpOp fpop = Afp_INVALID;
sewardj4796d662006-02-05 16:06:26 +00003016 switch (e->Iex.Binop.op) {
sewardj25a85812005-05-08 23:03:48 +00003017 case Iop_SqrtF64: fpop = Afp_SQRT; break;
sewardj5e205372005-05-09 02:57:08 +00003018 case Iop_SinF64: fpop = Afp_SIN; break;
3019 case Iop_CosF64: fpop = Afp_COS; break;
3020 case Iop_TanF64: fpop = Afp_TAN; break;
sewardj25a85812005-05-08 23:03:48 +00003021 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3022 default: break;
3023 }
3024 if (fpop != Afp_INVALID) {
3025 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00003026 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
sewardj25a85812005-05-08 23:03:48 +00003027 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00003028 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
sewardj25a85812005-05-08 23:03:48 +00003029 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
sewardj5e205372005-05-09 02:57:08 +00003030 addInstr(env, AMD64Instr_A87Free(nNeeded));
sewardjd15b5972010-06-27 09:06:34 +00003031 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj4796d662006-02-05 16:06:26 +00003032 /* XXXROUNDINGFIXME */
3033 /* set roundingmode here */
sewardj25a85812005-05-08 23:03:48 +00003034 addInstr(env, AMD64Instr_A87FpOp(fpop));
sewardj4796d662006-02-05 16:06:26 +00003035 if (e->Iex.Binop.op==Iop_TanF64) {
sewardj5e205372005-05-09 02:57:08 +00003036 /* get rid of the extra 1.0 that fptan pushes */
sewardjd15b5972010-06-27 09:06:34 +00003037 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj5e205372005-05-09 02:57:08 +00003038 }
sewardjd15b5972010-06-27 09:06:34 +00003039 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00003040 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3041 return dst;
3042 }
3043 }
sewardjc49ce232005-02-25 13:03:03 +00003044
3045 if (e->tag == Iex_Unop) {
3046 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00003047//.. case Iop_I32toF64: {
3048//.. HReg dst = newVRegF(env);
3049//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3050//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3051//.. set_FPU_rounding_default(env);
3052//.. addInstr(env, X86Instr_FpLdStI(
3053//.. True/*load*/, 4, dst,
3054//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00003055//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00003056//.. return dst;
3057//.. }
sewardj924215b2005-03-26 21:50:31 +00003058 case Iop_ReinterpI64asF64: {
3059 /* Given an I64, produce an IEEE754 double with the same
3060 bit pattern. */
3061 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3062 HReg dst = newVRegV(env);
3063 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3064 /* paranoia */
3065 set_SSE_rounding_default(env);
3066 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3067 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3068 return dst;
3069 }
sewardjc49ce232005-02-25 13:03:03 +00003070 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00003071 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00003072 HReg f64 = newVRegV(env);
3073 /* this shouldn't be necessary, but be paranoid ... */
3074 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00003075 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00003076 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3077 return f64;
3078 }
3079 default:
3080 break;
3081 }
3082 }
sewardj8d965312005-02-25 02:48:47 +00003083
3084 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00003085 if (e->tag == Iex_ITE) { // VFD
3086 HReg r1, r0, dst;
sewardj8d965312005-02-25 02:48:47 +00003087 vassert(ty == Ity_F64);
florian99dd03e2013-01-29 03:56:06 +00003088 vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1);
3089 r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3090 r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
sewardj8d965312005-02-25 02:48:47 +00003091 dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003092 addInstr(env, mk_vMOVsd_RR(r1,dst));
3093 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003094 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardj8d965312005-02-25 02:48:47 +00003095 return dst;
3096 }
sewardj18303862005-02-21 12:36:54 +00003097
3098 ppIRExpr(e);
3099 vpanic("iselDblExpr_wrk");
3100}
sewardjc2bcb6f2005-02-07 00:17:12 +00003101
sewardj0852a132005-02-21 08:28:46 +00003102
3103/*---------------------------------------------------------*/
3104/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3105/*---------------------------------------------------------*/
3106
3107static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3108{
3109 HReg r = iselVecExpr_wrk( env, e );
3110# if 0
3111 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3112# endif
3113 vassert(hregClass(r) == HRcVec128);
3114 vassert(hregIsVirtual(r));
3115 return r;
3116}
3117
3118
3119/* DO NOT CALL THIS DIRECTLY */
3120static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3121{
sewardj69d98e32010-06-18 08:17:41 +00003122 HWord fn = 0; /* address of helper fn, if required */
3123 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00003124 AMD64SseOp op = Asse_INVALID;
3125 IRType ty = typeOfIRExpr(env->type_env,e);
3126 vassert(e);
3127 vassert(ty == Ity_V128);
3128
sewardjdd40fdf2006-12-24 02:20:24 +00003129 if (e->tag == Iex_RdTmp) {
3130 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj0852a132005-02-21 08:28:46 +00003131 }
3132
3133 if (e->tag == Iex_Get) {
3134 HReg dst = newVRegV(env);
3135 addInstr(env, AMD64Instr_SseLdSt(
3136 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00003137 16,
sewardj0852a132005-02-21 08:28:46 +00003138 dst,
3139 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3140 )
3141 );
3142 return dst;
3143 }
3144
sewardje768e922009-11-26 17:17:37 +00003145 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj1a01e652005-02-23 11:39:21 +00003146 HReg dst = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00003147 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj1a01e652005-02-23 11:39:21 +00003148 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3149 return dst;
3150 }
3151
3152 if (e->tag == Iex_Const) {
3153 HReg dst = newVRegV(env);
3154 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj9ba870d2010-04-02 11:29:23 +00003155 switch (e->Iex.Const.con->Ico.V128) {
3156 case 0x0000:
3157 dst = generate_zeroes_V128(env);
sewardjacfbd7d2010-08-17 22:52:08 +00003158 break;
sewardj9ba870d2010-04-02 11:29:23 +00003159 case 0xFFFF:
3160 dst = generate_ones_V128(env);
sewardj9ba870d2010-04-02 11:29:23 +00003161 break;
sewardjacfbd7d2010-08-17 22:52:08 +00003162 default: {
3163 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3164 /* do push_uimm64 twice, first time for the high-order half. */
3165 push_uimm64(env, bitmask8_to_bytemask64(
3166 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
3167 ));
3168 push_uimm64(env, bitmask8_to_bytemask64(
3169 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
3170 ));
3171 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3172 add_to_rsp(env, 16);
3173 break;
3174 }
sewardj1a01e652005-02-23 11:39:21 +00003175 }
sewardj9ba870d2010-04-02 11:29:23 +00003176 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003177 }
sewardj0852a132005-02-21 08:28:46 +00003178
3179 if (e->tag == Iex_Unop) {
3180 switch (e->Iex.Unop.op) {
3181
sewardj8d965312005-02-25 02:48:47 +00003182 case Iop_NotV128: {
3183 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3184 return do_sse_NotV128(env, arg);
3185 }
3186
sewardj09717342005-05-05 21:34:02 +00003187 case Iop_CmpNEZ64x2: {
3188 /* We can use SSE2 instructions for this. */
3189 /* Ideally, we want to do a 64Ix2 comparison against zero of
3190 the operand. Problem is no such insn exists. Solution
3191 therefore is to do a 32Ix4 comparison instead, and bitwise-
3192 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3193 let the not'd result of this initial comparison be a:b:c:d.
3194 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3195 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3196 giving the required result.
3197
3198 The required selection sequence is 2,3,0,1, which
3199 according to Intel's documentation means the pshufd
3200 literal value is 0xB1, that is,
3201 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3202 */
3203 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
sewardjac530442005-05-11 16:13:37 +00003204 HReg tmp = generate_zeroes_V128(env);
sewardj09717342005-05-05 21:34:02 +00003205 HReg dst = newVRegV(env);
sewardj09717342005-05-05 21:34:02 +00003206 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3207 tmp = do_sse_NotV128(env, tmp);
3208 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3209 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3210 return dst;
3211 }
3212
sewardjac530442005-05-11 16:13:37 +00003213 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3214 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3215 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3216 do_CmpNEZ_vector:
3217 {
3218 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3219 HReg tmp = newVRegV(env);
3220 HReg zero = generate_zeroes_V128(env);
3221 HReg dst;
3222 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3223 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3224 dst = do_sse_NotV128(env, tmp);
3225 return dst;
3226 }
sewardja7ba8c42005-05-10 20:08:34 +00003227
3228 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3229 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3230 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
3231 do_32Fx4_unary:
3232 {
3233 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3234 HReg dst = newVRegV(env);
3235 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3236 return dst;
3237 }
3238
sewardj97628592005-05-10 22:42:54 +00003239 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
3240 do_64Fx2_unary:
3241 {
3242 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3243 HReg dst = newVRegV(env);
3244 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3245 return dst;
3246 }
sewardja7ba8c42005-05-10 20:08:34 +00003247
3248 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3249 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3250 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
3251 do_32F0x4_unary:
3252 {
3253 /* A bit subtle. We have to copy the arg to the result
3254 register first, because actually doing the SSE scalar insn
3255 leaves the upper 3/4 of the destination register
3256 unchanged. Whereas the required semantics of these
3257 primops is that the upper 3/4 is simply copied in from the
3258 argument. */
3259 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3260 HReg dst = newVRegV(env);
3261 addInstr(env, mk_vMOVsd_RR(arg, dst));
3262 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3263 return dst;
3264 }
3265
sewardj0852a132005-02-21 08:28:46 +00003266 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3267 do_64F0x2_unary:
3268 {
3269 /* A bit subtle. We have to copy the arg to the result
3270 register first, because actually doing the SSE scalar insn
3271 leaves the upper half of the destination register
3272 unchanged. Whereas the required semantics of these
3273 primops is that the upper half is simply copied in from the
3274 argument. */
3275 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3276 HReg dst = newVRegV(env);
3277 addInstr(env, mk_vMOVsd_RR(arg, dst));
3278 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3279 return dst;
3280 }
3281
sewardj8d965312005-02-25 02:48:47 +00003282 case Iop_32UtoV128: {
3283 HReg dst = newVRegV(env);
3284 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3285 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3286 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3287 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3288 return dst;
3289 }
sewardj0852a132005-02-21 08:28:46 +00003290
3291 case Iop_64UtoV128: {
3292 HReg dst = newVRegV(env);
3293 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3294 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3295 addInstr(env, AMD64Instr_Push(rmi));
3296 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3297 add_to_rsp(env, 8);
3298 return dst;
3299 }
3300
sewardj4b1cc832012-06-13 11:10:20 +00003301 case Iop_V256toV128_0:
3302 case Iop_V256toV128_1: {
3303 HReg vHi, vLo;
3304 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
3305 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
3306 }
3307
sewardj0852a132005-02-21 08:28:46 +00003308 default:
3309 break;
3310 } /* switch (e->Iex.Unop.op) */
3311 } /* if (e->tag == Iex_Unop) */
3312
3313 if (e->tag == Iex_Binop) {
3314 switch (e->Iex.Binop.op) {
3315
sewardjc4530ae2012-05-21 10:18:49 +00003316 /* FIXME: could we generate MOVQ here? */
sewardj18303862005-02-21 12:36:54 +00003317 case Iop_SetV128lo64: {
3318 HReg dst = newVRegV(env);
3319 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3320 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003321 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3322 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3323 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3324 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3325 return dst;
3326 }
3327
sewardjc4530ae2012-05-21 10:18:49 +00003328 /* FIXME: could we generate MOVD here? */
sewardj478fe702005-04-23 01:15:47 +00003329 case Iop_SetV128lo32: {
3330 HReg dst = newVRegV(env);
3331 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3332 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3333 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3334 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3335 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3336 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003337 return dst;
3338 }
3339
sewardj1a01e652005-02-23 11:39:21 +00003340 case Iop_64HLtoV128: {
sewardjc4530ae2012-05-21 10:18:49 +00003341 HReg rsp = hregAMD64_RSP();
3342 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
3343 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
3344 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1);
3345 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2);
3346 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp));
3347 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003348 HReg dst = newVRegV(env);
sewardjc4530ae2012-05-21 10:18:49 +00003349 /* One store-forwarding stall coming up, oh well :-( */
3350 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003351 return dst;
3352 }
3353
sewardj432f8b62005-05-10 02:50:05 +00003354 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3355 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3356 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
sewardjb9282632005-11-05 02:33:25 +00003357 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003358 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
3359 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
3360 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3361 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
3362 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
3363 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
3364 do_32Fx4:
3365 {
3366 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3367 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3368 HReg dst = newVRegV(env);
3369 addInstr(env, mk_vMOVsd_RR(argL, dst));
3370 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3371 return dst;
3372 }
3373
sewardj97628592005-05-10 22:42:54 +00003374 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3375 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3376 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
sewardjb9282632005-11-05 02:33:25 +00003377 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003378 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
sewardj5992bd02005-05-11 02:13:42 +00003379 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
3380 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3381 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003382 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
3383 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
3384 do_64Fx2:
3385 {
3386 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3387 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3388 HReg dst = newVRegV(env);
3389 addInstr(env, mk_vMOVsd_RR(argL, dst));
3390 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3391 return dst;
3392 }
sewardj8d965312005-02-25 02:48:47 +00003393
sewardj432f8b62005-05-10 02:50:05 +00003394 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003395 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardj4c328cf2005-05-05 12:05:54 +00003396 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
sewardjb9282632005-11-05 02:33:25 +00003397 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003398 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003399 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003400 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3401 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003402 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3403 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3404 do_32F0x4: {
3405 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3406 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3407 HReg dst = newVRegV(env);
3408 addInstr(env, mk_vMOVsd_RR(argL, dst));
3409 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3410 return dst;
3411 }
3412
sewardj137015d2005-03-27 04:01:15 +00003413 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003414 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003415 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardjb9282632005-11-05 02:33:25 +00003416 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003417 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3418 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003419 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003420 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003421 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3422 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3423 do_64F0x2: {
3424 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3425 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3426 HReg dst = newVRegV(env);
3427 addInstr(env, mk_vMOVsd_RR(argL, dst));
3428 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3429 return dst;
3430 }
3431
sewardj5f438dd2011-06-16 11:36:23 +00003432 case Iop_QNarrowBin32Sto16Sx8:
sewardj97628592005-05-10 22:42:54 +00003433 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003434 case Iop_QNarrowBin16Sto8Sx16:
sewardj97628592005-05-10 22:42:54 +00003435 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003436 case Iop_QNarrowBin16Sto8Ux16:
sewardj97628592005-05-10 22:42:54 +00003437 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3438
3439 case Iop_InterleaveHI8x16:
3440 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3441 case Iop_InterleaveHI16x8:
3442 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3443 case Iop_InterleaveHI32x4:
3444 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3445 case Iop_InterleaveHI64x2:
3446 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3447
3448 case Iop_InterleaveLO8x16:
3449 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3450 case Iop_InterleaveLO16x8:
3451 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3452 case Iop_InterleaveLO32x4:
3453 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3454 case Iop_InterleaveLO64x2:
3455 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3456
sewardj1a01e652005-02-23 11:39:21 +00003457 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003458 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003459 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003460 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003461 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003462 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003463 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003464 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3465 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3466 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3467 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3468 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3469 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3470 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3471 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3472 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3473 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3474 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3475 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
sewardjadffcef2005-05-11 00:03:06 +00003476 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3477 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3478 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3479 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3480 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3481 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3482 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003483 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3484 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3485 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003486 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003487 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3488 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3489 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3490 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003491 do_SseReRg: {
3492 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3493 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3494 HReg dst = newVRegV(env);
3495 if (arg1isEReg) {
sewardj9da16972005-02-21 13:58:26 +00003496 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3497 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3498 } else {
3499 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3500 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3501 }
3502 return dst;
3503 }
3504
sewardjadffcef2005-05-11 00:03:06 +00003505 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3506 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3507 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3508 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3509 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3510 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3511 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
sewardj09717342005-05-05 21:34:02 +00003512 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3513 do_SseShift: {
3514 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3515 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3516 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3517 HReg ereg = newVRegV(env);
3518 HReg dst = newVRegV(env);
3519 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3520 addInstr(env, AMD64Instr_Push(rmi));
3521 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3522 addInstr(env, mk_vMOVsd_RR(greg, dst));
3523 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3524 add_to_rsp(env, 16);
3525 return dst;
3526 }
sewardj0852a132005-02-21 08:28:46 +00003527
sewardj69d98e32010-06-18 08:17:41 +00003528 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
3529 goto do_SseAssistedBinary;
3530 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
3531 goto do_SseAssistedBinary;
3532 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
3533 goto do_SseAssistedBinary;
3534 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
3535 goto do_SseAssistedBinary;
3536 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
3537 goto do_SseAssistedBinary;
3538 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
3539 goto do_SseAssistedBinary;
3540 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
3541 goto do_SseAssistedBinary;
3542 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
3543 goto do_SseAssistedBinary;
3544 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
3545 goto do_SseAssistedBinary;
sewardjd8815622011-10-19 15:24:01 +00003546 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2;
3547 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003548 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
3549 goto do_SseAssistedBinary;
sewardjd8bca7e2012-06-20 11:46:19 +00003550 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4;
3551 goto do_SseAssistedBinary;
sewardj5f438dd2011-06-16 11:36:23 +00003552 case Iop_QNarrowBin32Sto16Ux8:
3553 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
sewardj2260b992011-06-15 16:05:07 +00003554 goto do_SseAssistedBinary;
sewardjad2c9ea2011-10-22 09:32:16 +00003555 case Iop_NarrowBin16to8x16:
3556 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3557 goto do_SseAssistedBinary;
3558 case Iop_NarrowBin32to16x8:
3559 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3560 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003561 do_SseAssistedBinary: {
3562 /* RRRufff! RRRufff code is what we're generating here. Oh
3563 well. */
3564 vassert(fn != 0);
3565 HReg dst = newVRegV(env);
3566 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3567 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3568 HReg argp = newVRegI(env);
3569 /* subq $112, %rsp -- make a space*/
3570 sub_from_rsp(env, 112);
3571 /* leaq 48(%rsp), %r_argp -- point into it */
3572 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3573 argp));
3574 /* andq $-16, %r_argp -- 16-align the pointer */
3575 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3576 AMD64RMI_Imm( ~(UInt)15 ),
3577 argp));
3578 /* Prepare 3 arg regs:
3579 leaq 0(%r_argp), %rdi
3580 leaq 16(%r_argp), %rsi
3581 leaq 32(%r_argp), %rdx
3582 */
3583 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3584 hregAMD64_RDI()));
3585 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3586 hregAMD64_RSI()));
3587 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
3588 hregAMD64_RDX()));
3589 /* Store the two args, at (%rsi) and (%rdx):
3590 movupd %argL, 0(%rsi)
3591 movupd %argR, 0(%rdx)
3592 */
3593 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3594 AMD64AMode_IR(0, hregAMD64_RSI())));
3595 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
3596 AMD64AMode_IR(0, hregAMD64_RDX())));
3597 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003598 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003599 3, mk_RetLoc_simple(RLPri_None) ));
sewardj69d98e32010-06-18 08:17:41 +00003600 /* fetch the result from memory, using %r_argp, which the
3601 register allocator will keep alive across the call. */
3602 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3603 AMD64AMode_IR(0, argp)));
3604 /* and finally, clear the space */
3605 add_to_rsp(env, 112);
3606 return dst;
3607 }
3608
sewardj0874bee2011-01-17 10:32:18 +00003609 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
3610 goto do_SseAssistedVectorAndScalar;
3611 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
3612 goto do_SseAssistedVectorAndScalar;
3613 do_SseAssistedVectorAndScalar: {
3614 /* RRRufff! RRRufff code is what we're generating here. Oh
3615 well. */
3616 vassert(fn != 0);
3617 HReg dst = newVRegV(env);
3618 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3619 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3620 HReg argp = newVRegI(env);
3621 /* subq $112, %rsp -- make a space*/
3622 sub_from_rsp(env, 112);
3623 /* leaq 48(%rsp), %r_argp -- point into it */
3624 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3625 argp));
3626 /* andq $-16, %r_argp -- 16-align the pointer */
3627 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3628 AMD64RMI_Imm( ~(UInt)15 ),
3629 argp));
3630 /* Prepare 2 vector arg regs:
3631 leaq 0(%r_argp), %rdi
3632 leaq 16(%r_argp), %rsi
3633 */
3634 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3635 hregAMD64_RDI()));
3636 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3637 hregAMD64_RSI()));
3638 /* Store the vector arg, at (%rsi):
3639 movupd %argL, 0(%rsi)
3640 */
3641 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3642 AMD64AMode_IR(0, hregAMD64_RSI())));
3643 /* And get the scalar value into rdx */
3644 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
3645
3646 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003647 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003648 3, mk_RetLoc_simple(RLPri_None) ));
sewardj0874bee2011-01-17 10:32:18 +00003649 /* fetch the result from memory, using %r_argp, which the
3650 register allocator will keep alive across the call. */
3651 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3652 AMD64AMode_IR(0, argp)));
3653 /* and finally, clear the space */
3654 add_to_rsp(env, 112);
3655 return dst;
3656 }
3657
sewardj0852a132005-02-21 08:28:46 +00003658 default:
3659 break;
3660 } /* switch (e->Iex.Binop.op) */
3661 } /* if (e->tag == Iex_Binop) */
3662
florian99dd03e2013-01-29 03:56:06 +00003663 if (e->tag == Iex_ITE) { // VFD
3664 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3665 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
sewardjadffcef2005-05-11 00:03:06 +00003666 HReg dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003667 addInstr(env, mk_vMOVsd_RR(r1,dst));
floriane6be61f2013-02-01 16:11:51 +00003668 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003669 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardjadffcef2005-05-11 00:03:06 +00003670 return dst;
3671 }
3672
sewardjacfbd7d2010-08-17 22:52:08 +00003673 //vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003674 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
sewardj5117ce12006-01-27 21:20:15 +00003675 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
sewardj0852a132005-02-21 08:28:46 +00003676 ppIRExpr(e);
3677 vpanic("iselVecExpr_wrk");
3678}
sewardjc33671d2005-02-01 20:30:00 +00003679
3680
3681/*---------------------------------------------------------*/
sewardjc4530ae2012-05-21 10:18:49 +00003682/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
3683/*---------------------------------------------------------*/
3684
sewardj56c30312012-06-12 08:45:39 +00003685static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003686 ISelEnv* env, IRExpr* e )
3687{
3688 iselDVecExpr_wrk( rHi, rLo, env, e );
3689# if 0
3690 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3691# endif
3692 vassert(hregClass(*rHi) == HRcVec128);
3693 vassert(hregClass(*rLo) == HRcVec128);
3694 vassert(hregIsVirtual(*rHi));
3695 vassert(hregIsVirtual(*rLo));
3696}
3697
3698
3699/* DO NOT CALL THIS DIRECTLY */
sewardj56c30312012-06-12 08:45:39 +00003700static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003701 ISelEnv* env, IRExpr* e )
3702{
sewardjcc3d2192013-03-27 11:37:33 +00003703 HWord fn = 0; /* address of helper fn, if required */
sewardjc4530ae2012-05-21 10:18:49 +00003704 vassert(e);
3705 IRType ty = typeOfIRExpr(env->type_env,e);
3706 vassert(ty == Ity_V256);
3707
sewardj56c30312012-06-12 08:45:39 +00003708 AMD64SseOp op = Asse_INVALID;
3709
sewardjc4530ae2012-05-21 10:18:49 +00003710 /* read 256-bit IRTemp */
3711 if (e->tag == Iex_RdTmp) {
3712 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3713 return;
3714 }
3715
3716 if (e->tag == Iex_Get) {
3717 HReg vHi = newVRegV(env);
3718 HReg vLo = newVRegV(env);
3719 HReg rbp = hregAMD64_RBP();
3720 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp);
3721 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp);
3722 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3723 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3724 *rHi = vHi;
3725 *rLo = vLo;
3726 return;
3727 }
3728
3729 if (e->tag == Iex_Load) {
3730 HReg vHi = newVRegV(env);
3731 HReg vLo = newVRegV(env);
3732 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr);
3733 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
3734 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
3735 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3736 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3737 *rHi = vHi;
3738 *rLo = vLo;
3739 return;
3740 }
3741
sewardj37a505b2012-06-29 15:28:24 +00003742 if (e->tag == Iex_Const) {
3743 vassert(e->Iex.Const.con->tag == Ico_V256);
3744 switch (e->Iex.Const.con->Ico.V256) {
3745 case 0x00000000: {
3746 HReg vHi = generate_zeroes_V128(env);
3747 HReg vLo = newVRegV(env);
3748 addInstr(env, mk_vMOVsd_RR(vHi, vLo));
3749 *rHi = vHi;
3750 *rLo = vLo;
3751 return;
3752 }
3753 default:
3754 break; /* give up. Until such time as is necessary. */
3755 }
3756 }
3757
sewardj2a2bda92012-06-14 23:32:02 +00003758 if (e->tag == Iex_Unop) {
3759 switch (e->Iex.Unop.op) {
3760
3761 case Iop_NotV256: {
3762 HReg argHi, argLo;
3763 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3764 *rHi = do_sse_NotV128(env, argHi);
3765 *rLo = do_sse_NotV128(env, argLo);
3766 return;
3767 }
3768
sewardj82096922012-06-24 14:57:59 +00003769 case Iop_Recip32Fx8: op = Asse_RCPF; goto do_32Fx8_unary;
sewardj66becf32012-06-18 23:15:16 +00003770 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary;
3771 case Iop_RSqrt32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary;
3772 do_32Fx8_unary:
3773 {
3774 HReg argHi, argLo;
3775 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3776 HReg dstHi = newVRegV(env);
3777 HReg dstLo = newVRegV(env);
3778 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi));
3779 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo));
3780 *rHi = dstHi;
3781 *rLo = dstLo;
3782 return;
3783 }
3784
3785 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary;
3786 do_64Fx4_unary:
3787 {
3788 HReg argHi, argLo;
3789 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3790 HReg dstHi = newVRegV(env);
3791 HReg dstLo = newVRegV(env);
3792 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi));
3793 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo));
3794 *rHi = dstHi;
3795 *rLo = dstLo;
3796 return;
3797 }
3798
sewardj23db8a02012-06-25 07:46:18 +00003799 case Iop_CmpNEZ64x4: {
3800 /* We can use SSE2 instructions for this. */
3801 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide
3802 (obviously). See comment on Iop_CmpNEZ64x2 for
3803 explanation of what's going on here. */
3804 HReg argHi, argLo;
3805 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3806 HReg tmpHi = generate_zeroes_V128(env);
3807 HReg tmpLo = newVRegV(env);
3808 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo));
3809 HReg dstHi = newVRegV(env);
3810 HReg dstLo = newVRegV(env);
3811 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi));
3812 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo));
3813 tmpHi = do_sse_NotV128(env, tmpHi);
3814 tmpLo = do_sse_NotV128(env, tmpLo);
3815 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi));
3816 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo));
3817 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi));
3818 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo));
3819 *rHi = dstHi;
3820 *rLo = dstLo;
3821 return;
3822 }
3823
3824 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
sewardjcc3d2192013-03-27 11:37:33 +00003825 case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3826 case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
sewardj23db8a02012-06-25 07:46:18 +00003827 do_CmpNEZ_vector:
3828 {
3829 HReg argHi, argLo;
3830 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3831 HReg tmpHi = newVRegV(env);
3832 HReg tmpLo = newVRegV(env);
3833 HReg zero = generate_zeroes_V128(env);
3834 HReg dstHi, dstLo;
3835 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi));
3836 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo));
3837 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi));
3838 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo));
3839 dstHi = do_sse_NotV128(env, tmpHi);
3840 dstLo = do_sse_NotV128(env, tmpLo);
3841 *rHi = dstHi;
3842 *rLo = dstLo;
3843 return;
3844 }
3845
sewardj2a2bda92012-06-14 23:32:02 +00003846 default:
3847 break;
3848 } /* switch (e->Iex.Unop.op) */
3849 } /* if (e->tag == Iex_Unop) */
3850
sewardj56c30312012-06-12 08:45:39 +00003851 if (e->tag == Iex_Binop) {
3852 switch (e->Iex.Binop.op) {
3853
3854 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4;
3855 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4;
3856 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4;
3857 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4;
sewardj8eb7ae82012-06-24 14:00:27 +00003858 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4;
3859 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4;
sewardj56c30312012-06-12 08:45:39 +00003860 do_64Fx4:
3861 {
3862 HReg argLhi, argLlo, argRhi, argRlo;
3863 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3864 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3865 HReg dstHi = newVRegV(env);
3866 HReg dstLo = newVRegV(env);
3867 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3868 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3869 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
3870 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
3871 *rHi = dstHi;
3872 *rLo = dstLo;
3873 return;
3874 }
3875
3876 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8;
3877 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8;
3878 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8;
3879 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8;
sewardj8eb7ae82012-06-24 14:00:27 +00003880 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8;
3881 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8;
sewardj56c30312012-06-12 08:45:39 +00003882 do_32Fx8:
3883 {
3884 HReg argLhi, argLlo, argRhi, argRlo;
3885 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3886 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3887 HReg dstHi = newVRegV(env);
3888 HReg dstLo = newVRegV(env);
3889 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3890 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3891 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
3892 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
3893 *rHi = dstHi;
3894 *rLo = dstLo;
3895 return;
3896 }
3897
sewardj4b1cc832012-06-13 11:10:20 +00003898 case Iop_AndV256: op = Asse_AND; goto do_SseReRg;
sewardj2a2bda92012-06-14 23:32:02 +00003899 case Iop_OrV256: op = Asse_OR; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003900 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg;
sewardjcc3d2192013-03-27 11:37:33 +00003901 case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg;
3902 case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg;
3903 case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg;
3904 case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg;
3905 case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg;
3906 case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg;
3907 case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg;
3908 case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg;
3909 case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg;
3910 case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg;
3911 case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg;
3912 case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg;
3913 case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg;
3914 case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg;
3915 case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg;
3916 case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg;
3917 case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg;
3918 case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg;
3919 case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg;
3920 case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg;
3921 case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg;
3922 case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg;
3923 case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg;
3924 case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg;
3925 case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg;
3926 case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg;
3927 case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg;
3928 case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg;
3929 case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg;
3930 case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg;
3931 case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003932 do_SseReRg:
3933 {
3934 HReg argLhi, argLlo, argRhi, argRlo;
3935 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3936 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3937 HReg dstHi = newVRegV(env);
3938 HReg dstLo = newVRegV(env);
3939 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3940 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3941 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi));
3942 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo));
3943 *rHi = dstHi;
3944 *rLo = dstLo;
3945 return;
3946 }
3947
sewardjcc3d2192013-03-27 11:37:33 +00003948 case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift;
3949 case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift;
3950 case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift;
3951 case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift;
3952 case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift;
3953 case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift;
3954 case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift;
3955 case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift;
3956 do_SseShift: {
3957 HReg gregHi, gregLo;
3958 iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1);
3959 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3960 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3961 HReg ereg = newVRegV(env);
3962 HReg dstHi = newVRegV(env);
3963 HReg dstLo = newVRegV(env);
3964 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3965 addInstr(env, AMD64Instr_Push(rmi));
3966 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3967 addInstr(env, mk_vMOVsd_RR(gregHi, dstHi));
3968 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi));
3969 addInstr(env, mk_vMOVsd_RR(gregLo, dstLo));
3970 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo));
3971 add_to_rsp(env, 16);
3972 *rHi = dstHi;
3973 *rLo = dstLo;
3974 return;
3975 }
3976
sewardj4b1cc832012-06-13 11:10:20 +00003977 case Iop_V128HLtoV256: {
3978 *rHi = iselVecExpr(env, e->Iex.Binop.arg1);
3979 *rLo = iselVecExpr(env, e->Iex.Binop.arg2);
3980 return;
3981 }
3982
sewardjcc3d2192013-03-27 11:37:33 +00003983 case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4;
3984 goto do_SseAssistedBinary;
3985 case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4;
3986 goto do_SseAssistedBinary;
3987 case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4;
3988 goto do_SseAssistedBinary;
3989 case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4;
3990 goto do_SseAssistedBinary;
3991 case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4;
3992 goto do_SseAssistedBinary;
3993 case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8;
3994 goto do_SseAssistedBinary;
3995 case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8;
3996 goto do_SseAssistedBinary;
3997 case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16;
3998 goto do_SseAssistedBinary;
3999 case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16;
4000 goto do_SseAssistedBinary;
4001 case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2;
4002 goto do_SseAssistedBinary;
4003 case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2;
4004 goto do_SseAssistedBinary;
4005 do_SseAssistedBinary: {
4006 /* RRRufff! RRRufff code is what we're generating here. Oh
4007 well. */
4008 vassert(fn != 0);
4009 HReg dstHi = newVRegV(env);
4010 HReg dstLo = newVRegV(env);
4011 HReg argLhi, argLlo, argRhi, argRlo;
4012 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4013 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4014 HReg argp = newVRegI(env);
4015 /* subq $160, %rsp -- make a space*/
4016 sub_from_rsp(env, 160);
4017 /* leaq 48(%rsp), %r_argp -- point into it */
4018 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4019 argp));
4020 /* andq $-16, %r_argp -- 16-align the pointer */
4021 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4022 AMD64RMI_Imm( ~(UInt)15 ),
4023 argp));
4024 /* Prepare 3 arg regs:
4025 leaq 0(%r_argp), %rdi
4026 leaq 16(%r_argp), %rsi
4027 leaq 32(%r_argp), %rdx
4028 */
4029 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4030 hregAMD64_RDI()));
4031 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
4032 hregAMD64_RSI()));
4033 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4034 hregAMD64_RDX()));
4035 /* Store the two high args, at (%rsi) and (%rdx):
4036 movupd %argLhi, 0(%rsi)
4037 movupd %argRhi, 0(%rdx)
4038 */
4039 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4040 AMD64AMode_IR(0, hregAMD64_RSI())));
4041 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4042 AMD64AMode_IR(0, hregAMD64_RDX())));
4043 /* Store the two low args, at 48(%rsi) and 48(%rdx):
4044 movupd %argLlo, 48(%rsi)
4045 movupd %argRlo, 48(%rdx)
4046 */
4047 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4048 AMD64AMode_IR(48, hregAMD64_RSI())));
4049 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4050 AMD64AMode_IR(48, hregAMD64_RDX())));
4051 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004052 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4053 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004054 /* Prepare 3 arg regs:
4055 leaq 48(%r_argp), %rdi
4056 leaq 64(%r_argp), %rsi
4057 leaq 80(%r_argp), %rdx
4058 */
4059 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp),
4060 hregAMD64_RDI()));
4061 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4062 hregAMD64_RSI()));
4063 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp),
4064 hregAMD64_RDX()));
4065 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004066 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4067 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004068 /* fetch the result from memory, using %r_argp, which the
4069 register allocator will keep alive across the call. */
4070 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4071 AMD64AMode_IR(0, argp)));
4072 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4073 AMD64AMode_IR(48, argp)));
4074 /* and finally, clear the space */
4075 add_to_rsp(env, 160);
4076 *rHi = dstHi;
4077 *rLo = dstLo;
4078 return;
4079 }
4080
4081 case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8;
4082 goto do_SseAssistedBinary256;
4083 do_SseAssistedBinary256: {
4084 /* RRRufff! RRRufff code is what we're generating here. Oh
4085 well. */
4086 vassert(fn != 0);
4087 HReg dstHi = newVRegV(env);
4088 HReg dstLo = newVRegV(env);
4089 HReg argLhi, argLlo, argRhi, argRlo;
4090 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4091 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4092 HReg argp = newVRegI(env);
4093 /* subq $160, %rsp -- make a space*/
4094 sub_from_rsp(env, 160);
4095 /* leaq 48(%rsp), %r_argp -- point into it */
4096 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4097 argp));
4098 /* andq $-16, %r_argp -- 16-align the pointer */
4099 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4100 AMD64RMI_Imm( ~(UInt)15 ),
4101 argp));
4102 /* Prepare 3 arg regs:
4103 leaq 0(%r_argp), %rdi
4104 leaq 32(%r_argp), %rsi
4105 leaq 64(%r_argp), %rdx
4106 */
4107 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4108 hregAMD64_RDI()));
4109 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4110 hregAMD64_RSI()));
4111 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4112 hregAMD64_RDX()));
4113 /* Store the two args, at (%rsi) and (%rdx):
4114 movupd %argLlo, 0(%rsi)
4115 movupd %argLhi, 16(%rsi)
4116 movupd %argRlo, 0(%rdx)
4117 movupd %argRhi, 16(%rdx)
4118 */
4119 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4120 AMD64AMode_IR(0, hregAMD64_RSI())));
4121 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4122 AMD64AMode_IR(16, hregAMD64_RSI())));
4123 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4124 AMD64AMode_IR(0, hregAMD64_RDX())));
4125 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4126 AMD64AMode_IR(16, hregAMD64_RDX())));
4127 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004128 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4129 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004130 /* fetch the result from memory, using %r_argp, which the
4131 register allocator will keep alive across the call. */
4132 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4133 AMD64AMode_IR(0, argp)));
4134 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4135 AMD64AMode_IR(16, argp)));
4136 /* and finally, clear the space */
4137 add_to_rsp(env, 160);
4138 *rHi = dstHi;
4139 *rLo = dstLo;
4140 return;
4141 }
4142
sewardj56c30312012-06-12 08:45:39 +00004143 default:
4144 break;
4145 } /* switch (e->Iex.Binop.op) */
4146 } /* if (e->tag == Iex_Binop) */
4147
florian96d7cc32012-06-01 20:41:24 +00004148 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) {
sewardjc4530ae2012-05-21 10:18:49 +00004149 HReg rsp = hregAMD64_RSP();
4150 HReg vHi = newVRegV(env);
4151 HReg vLo = newVRegV(env);
4152 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
4153 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
4154 /* arg1 is the most significant (Q3), arg4 the least (Q0) */
4155 /* Get all the args into regs, before messing with the stack. */
florian96d7cc32012-06-01 20:41:24 +00004156 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1);
4157 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2);
4158 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3);
4159 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4);
sewardjc4530ae2012-05-21 10:18:49 +00004160 /* less significant lane (Q2) at the lower address (-16(rsp)) */
4161 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp));
4162 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp));
4163 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp));
4164 /* and then the lower half .. */
4165 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp));
4166 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp));
4167 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp));
4168 *rHi = vHi;
4169 *rLo = vLo;
4170 return;
4171 }
4172
sewardjcc3d2192013-03-27 11:37:33 +00004173 if (e->tag == Iex_ITE) {
4174 HReg r1Hi, r1Lo, r0Hi, r0Lo;
4175 iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue);
4176 iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse);
4177 HReg dstHi = newVRegV(env);
4178 HReg dstLo = newVRegV(env);
4179 addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi));
4180 addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo));
4181 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
4182 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi));
4183 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo));
4184 *rHi = dstHi;
4185 *rLo = dstLo;
4186 return;
4187 }
4188
sewardjc4530ae2012-05-21 10:18:49 +00004189 //avx_fail:
4190 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n",
4191 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
4192 ppIRExpr(e);
4193 vpanic("iselDVecExpr_wrk");
4194}
4195
4196
4197/*---------------------------------------------------------*/
sewardjc33671d2005-02-01 20:30:00 +00004198/*--- ISEL: Statements ---*/
4199/*---------------------------------------------------------*/
4200
4201static void iselStmt ( ISelEnv* env, IRStmt* stmt )
4202{
4203 if (vex_traceflags & VEX_TRACE_VCODE) {
4204 vex_printf("\n-- ");
4205 ppIRStmt(stmt);
4206 vex_printf("\n");
4207 }
4208
4209 switch (stmt->tag) {
4210
sewardj05b3b6a2005-02-04 01:44:33 +00004211 /* --------- STORE --------- */
sewardjaf1ceca2005-06-30 23:31:27 +00004212 case Ist_Store: {
sewardje9d8a262009-07-01 08:06:34 +00004213 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
4214 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
4215 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00004216
sewardje768e922009-11-26 17:17:37 +00004217 if (tya != Ity_I64 || end != Iend_LE)
sewardjaf1ceca2005-06-30 23:31:27 +00004218 goto stmt_fail;
4219
sewardj31191072005-02-05 18:24:47 +00004220 if (tyd == Ity_I64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004221 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004222 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
sewardj31191072005-02-05 18:24:47 +00004223 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
4224 return;
4225 }
sewardj05b3b6a2005-02-04 01:44:33 +00004226 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004227 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004228 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
sewardj428fabd2005-03-21 03:11:17 +00004229 addInstr(env, AMD64Instr_Store(
4230 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
4231 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00004232 return;
4233 }
sewardj8d965312005-02-25 02:48:47 +00004234 if (tyd == Ity_F64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004235 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004236 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
sewardj8d965312005-02-25 02:48:47 +00004237 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
4238 return;
4239 }
sewardjc49ce232005-02-25 13:03:03 +00004240 if (tyd == Ity_F32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004241 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004242 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
sewardjc49ce232005-02-25 13:03:03 +00004243 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
4244 return;
4245 }
sewardj0852a132005-02-21 08:28:46 +00004246 if (tyd == Ity_V128) {
sewardjbf0d86c2007-11-26 23:18:52 +00004247 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004248 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
sewardj18303862005-02-21 12:36:54 +00004249 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00004250 return;
4251 }
sewardjc4530ae2012-05-21 10:18:49 +00004252 if (tyd == Ity_V256) {
4253 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
4254 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
4255 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
4256 HReg vHi, vLo;
4257 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data);
4258 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4259 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4260 return;
4261 }
sewardj0852a132005-02-21 08:28:46 +00004262 break;
sewardj05b3b6a2005-02-04 01:44:33 +00004263 }
sewardjf67eadf2005-02-03 03:53:52 +00004264
4265 /* --------- PUT --------- */
4266 case Ist_Put: {
4267 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4268 if (ty == Ity_I64) {
4269 /* We're going to write to memory, so compute the RHS into an
4270 AMD64RI. */
4271 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
4272 addInstr(env,
4273 AMD64Instr_Alu64M(
4274 Aalu_MOV,
4275 ri,
4276 AMD64AMode_IR(stmt->Ist.Put.offset,
4277 hregAMD64_RBP())
4278 ));
4279 return;
4280 }
sewardjf67eadf2005-02-03 03:53:52 +00004281 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
4282 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4283 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00004284 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00004285 r,
4286 AMD64AMode_IR(stmt->Ist.Put.offset,
4287 hregAMD64_RBP())));
4288 return;
4289 }
sewardj8d965312005-02-25 02:48:47 +00004290 if (ty == Ity_F32) {
4291 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4292 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
4293 set_SSE_rounding_default(env); /* paranoia */
4294 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
4295 return;
4296 }
sewardj1a01e652005-02-23 11:39:21 +00004297 if (ty == Ity_F64) {
4298 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4299 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
4300 hregAMD64_RBP() );
4301 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
4302 return;
4303 }
sewardjc4530ae2012-05-21 10:18:49 +00004304 if (ty == Ity_V128) {
4305 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4306 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
4307 hregAMD64_RBP());
4308 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
4309 return;
4310 }
4311 if (ty == Ity_V256) {
4312 HReg vHi, vLo;
4313 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data);
4314 HReg rbp = hregAMD64_RBP();
4315 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp);
4316 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp);
4317 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4318 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4319 return;
4320 }
sewardjf67eadf2005-02-03 03:53:52 +00004321 break;
4322 }
4323
sewardj8d965312005-02-25 02:48:47 +00004324 /* --------- Indexed PUT --------- */
4325 case Ist_PutI: {
floriand6f38b32012-05-31 15:46:18 +00004326 IRPutI *puti = stmt->Ist.PutI.details;
4327
sewardj8d965312005-02-25 02:48:47 +00004328 AMD64AMode* am
4329 = genGuestArrayOffset(
floriand6f38b32012-05-31 15:46:18 +00004330 env, puti->descr,
4331 puti->ix, puti->bias );
sewardj8d965312005-02-25 02:48:47 +00004332
floriand6f38b32012-05-31 15:46:18 +00004333 IRType ty = typeOfIRExpr(env->type_env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004334 if (ty == Ity_F64) {
floriand6f38b32012-05-31 15:46:18 +00004335 HReg val = iselDblExpr(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004336 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
4337 return;
4338 }
4339 if (ty == Ity_I8) {
floriand6f38b32012-05-31 15:46:18 +00004340 HReg r = iselIntExpr_R(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004341 addInstr(env, AMD64Instr_Store( 1, r, am ));
4342 return;
4343 }
sewardj1e015d82005-04-23 23:41:46 +00004344 if (ty == Ity_I64) {
floriand6f38b32012-05-31 15:46:18 +00004345 AMD64RI* ri = iselIntExpr_RI(env, puti->data);
sewardj1e015d82005-04-23 23:41:46 +00004346 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
4347 return;
4348 }
sewardj8d965312005-02-25 02:48:47 +00004349 break;
4350 }
sewardj614b3fb2005-02-02 02:16:03 +00004351
4352 /* --------- TMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +00004353 case Ist_WrTmp: {
4354 IRTemp tmp = stmt->Ist.WrTmp.tmp;
sewardj614b3fb2005-02-02 02:16:03 +00004355 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj6ce1a232007-03-31 19:12:38 +00004356
4357 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
4358 compute it into an AMode and then use LEA. This usually
4359 produces fewer instructions, often because (for memcheck
4360 created IR) we get t = address-expression, (t is later used
4361 twice) and so doing this naturally turns address-expression
4362 back into an AMD64 amode. */
4363 if (ty == Ity_I64
4364 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4365 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
4366 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4367 HReg dst = lookupIRTemp(env, tmp);
4368 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
4369 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4370 value into a register. Just emit a normal reg-reg move
4371 so reg-alloc can coalesce it away in the usual way. */
4372 HReg src = am->Aam.IR.reg;
4373 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
4374 } else {
4375 addInstr(env, AMD64Instr_Lea64(am,dst));
4376 }
4377 return;
4378 }
4379
sewardj9b967672005-02-08 11:13:09 +00004380 if (ty == Ity_I64 || ty == Ity_I32
4381 || ty == Ity_I16 || ty == Ity_I8) {
sewardjdd40fdf2006-12-24 02:20:24 +00004382 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
sewardj614b3fb2005-02-02 02:16:03 +00004383 HReg dst = lookupIRTemp(env, tmp);
4384 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
4385 return;
4386 }
sewardj9b967672005-02-08 11:13:09 +00004387 if (ty == Ity_I128) {
4388 HReg rHi, rLo, dstHi, dstLo;
sewardjdd40fdf2006-12-24 02:20:24 +00004389 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
sewardjc4530ae2012-05-21 10:18:49 +00004390 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
sewardj9b967672005-02-08 11:13:09 +00004391 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4392 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4393 return;
4394 }
sewardja5bd0af2005-03-24 20:40:12 +00004395 if (ty == Ity_I1) {
sewardjdd40fdf2006-12-24 02:20:24 +00004396 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
sewardja5bd0af2005-03-24 20:40:12 +00004397 HReg dst = lookupIRTemp(env, tmp);
4398 addInstr(env, AMD64Instr_Set64(cond, dst));
4399 return;
4400 }
sewardj18303862005-02-21 12:36:54 +00004401 if (ty == Ity_F64) {
4402 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004403 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004404 addInstr(env, mk_vMOVsd_RR(src, dst));
4405 return;
4406 }
sewardjc49ce232005-02-25 13:03:03 +00004407 if (ty == Ity_F32) {
4408 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004409 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
sewardjc49ce232005-02-25 13:03:03 +00004410 addInstr(env, mk_vMOVsd_RR(src, dst));
4411 return;
4412 }
sewardj0852a132005-02-21 08:28:46 +00004413 if (ty == Ity_V128) {
4414 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004415 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004416 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00004417 return;
4418 }
sewardjc4530ae2012-05-21 10:18:49 +00004419 if (ty == Ity_V256) {
4420 HReg rHi, rLo, dstHi, dstLo;
4421 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4422 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4423 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) );
4424 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) );
4425 return;
4426 }
sewardj614b3fb2005-02-02 02:16:03 +00004427 break;
4428 }
4429
sewardjd0a12df2005-02-10 02:07:43 +00004430 /* --------- Call to DIRTY helper --------- */
4431 case Ist_Dirty: {
sewardjd0a12df2005-02-10 02:07:43 +00004432 IRDirty* d = stmt->Ist.Dirty.details;
sewardjd0a12df2005-02-10 02:07:43 +00004433
sewardjcfe046e2013-01-17 14:23:53 +00004434 /* Figure out the return type, if any. */
4435 IRType retty = Ity_INVALID;
4436 if (d->tmp != IRTemp_INVALID)
4437 retty = typeOfIRTemp(env->type_env, d->tmp);
4438
sewardj74142b82013-08-08 10:28:59 +00004439 /* Throw out any return types we don't know about. */
4440 Bool retty_ok = False;
sewardjcfe046e2013-01-17 14:23:53 +00004441 switch (retty) {
4442 case Ity_INVALID: /* function doesn't return anything */
sewardj74142b82013-08-08 10:28:59 +00004443 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj82cc37c2013-08-16 08:32:15 +00004444 case Ity_V128: case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +00004445 retty_ok = True; break;
sewardjcfe046e2013-01-17 14:23:53 +00004446 default:
4447 break;
4448 }
sewardj74142b82013-08-08 10:28:59 +00004449 if (!retty_ok)
sewardjcfe046e2013-01-17 14:23:53 +00004450 break; /* will go to stmt_fail: */
4451
sewardj74142b82013-08-08 10:28:59 +00004452 /* Marshal args, do the call, and set the return value to
4453 0x555..555 if this is a conditional call that returns a value
4454 and the call is skipped. */
4455 UInt addToSp = 0;
4456 RetLoc rloc = mk_RetLoc_INVALID();
4457 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4458 vassert(is_sane_RetLoc(rloc));
sewardjd0a12df2005-02-10 02:07:43 +00004459
4460 /* Now figure out what to do with the returned value, if any. */
sewardj74142b82013-08-08 10:28:59 +00004461 switch (retty) {
4462 case Ity_INVALID: {
4463 /* No return value. Nothing to do. */
4464 vassert(d->tmp == IRTemp_INVALID);
4465 vassert(rloc.pri == RLPri_None);
4466 vassert(addToSp == 0);
4467 return;
4468 }
4469 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4470 /* The returned value is in %rax. Park it in the register
4471 associated with tmp. */
4472 vassert(rloc.pri == RLPri_Int);
4473 vassert(addToSp == 0);
4474 HReg dst = lookupIRTemp(env, d->tmp);
4475 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
4476 return;
4477 }
4478 case Ity_V128: {
sewardj54eea4e2013-09-02 13:17:49 +00004479 /* The returned value is on the stack, and rloc.spOff
4480 tells us where. Fish it off the stack and then move
4481 the stack pointer upwards to clear it, as directed by
sewardj74142b82013-08-08 10:28:59 +00004482 doHelperCall. */
4483 vassert(rloc.pri == RLPri_V128SpRel);
4484 vassert(addToSp >= 16);
4485 HReg dst = lookupIRTemp(env, d->tmp);
4486 AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4487 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
4488 add_to_rsp(env, addToSp);
4489 return;
4490 }
sewardj82cc37c2013-08-16 08:32:15 +00004491 case Ity_V256: {
4492 /* See comments for Ity_V128. */
4493 vassert(rloc.pri == RLPri_V256SpRel);
4494 vassert(addToSp >= 32);
4495 HReg dstLo, dstHi;
4496 lookupIRTempPair(&dstHi, &dstLo, env, d->tmp);
4497 AMD64AMode* amLo = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4498 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstLo, amLo ));
4499 AMD64AMode* amHi = AMD64AMode_IR(rloc.spOff+16, hregAMD64_RSP());
4500 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstHi, amHi ));
4501 add_to_rsp(env, addToSp);
4502 return;
4503 }
sewardj74142b82013-08-08 10:28:59 +00004504 default:
4505 /*NOTREACHED*/
4506 vassert(0);
sewardjd0a12df2005-02-10 02:07:43 +00004507 }
4508 break;
4509 }
4510
4511 /* --------- MEM FENCE --------- */
sewardjc4356f02007-11-09 21:15:04 +00004512 case Ist_MBE:
4513 switch (stmt->Ist.MBE.event) {
4514 case Imbe_Fence:
4515 addInstr(env, AMD64Instr_MFence());
4516 return;
sewardjc4356f02007-11-09 21:15:04 +00004517 default:
4518 break;
4519 }
4520 break;
sewardjf8c37f72005-02-07 18:55:29 +00004521
sewardje9d8a262009-07-01 08:06:34 +00004522 /* --------- ACAS --------- */
4523 case Ist_CAS:
4524 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4525 /* "normal" singleton CAS */
4526 UChar sz;
4527 IRCAS* cas = stmt->Ist.CAS.details;
4528 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4529 /* get: cas->expd into %rax, and cas->data into %rbx */
4530 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4531 HReg rData = iselIntExpr_R(env, cas->dataLo);
4532 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4533 HReg rOld = lookupIRTemp(env, cas->oldLo);
4534 vassert(cas->expdHi == NULL);
4535 vassert(cas->dataHi == NULL);
4536 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
4537 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
4538 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
4539 switch (ty) {
4540 case Ity_I64: sz = 8; break;
4541 case Ity_I32: sz = 4; break;
4542 case Ity_I16: sz = 2; break;
4543 case Ity_I8: sz = 1; break;
4544 default: goto unhandled_cas;
4545 }
4546 addInstr(env, AMD64Instr_ACAS(am, sz));
4547 addInstr(env, AMD64Instr_CMov64(
4548 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
4549 return;
4550 } else {
4551 /* double CAS */
4552 UChar sz;
4553 IRCAS* cas = stmt->Ist.CAS.details;
4554 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4555 /* only 32-bit and 64-bit allowed in this case */
4556 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4557 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4558 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4559 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4560 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4561 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4562 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4563 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4564 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4565 switch (ty) {
4566 case Ity_I64:
4567 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
4568 goto unhandled_cas; /* we'd have to generate
4569 cmpxchg16b, but the host
4570 doesn't support that */
4571 sz = 8;
4572 break;
4573 case Ity_I32:
4574 sz = 4;
4575 break;
4576 default:
4577 goto unhandled_cas;
4578 }
4579 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4580 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4581 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
4582 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
4583 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
4584 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
4585 addInstr(env, AMD64Instr_DACAS(am, sz));
4586 addInstr(env,
4587 AMD64Instr_CMov64(
4588 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
4589 addInstr(env,
4590 AMD64Instr_CMov64(
4591 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
4592 return;
4593 }
4594 unhandled_cas:
4595 break;
4596
sewardjd20b2902005-03-22 00:15:00 +00004597 /* --------- INSTR MARK --------- */
4598 /* Doesn't generate any executable code ... */
4599 case Ist_IMark:
4600 return;
4601
sewardj5a9ffab2005-05-12 17:55:01 +00004602 /* --------- ABI HINT --------- */
4603 /* These have no meaning (denotation in the IR) and so we ignore
4604 them ... if any actually made it this far. */
4605 case Ist_AbiHint:
4606 return;
4607
sewardjd20b2902005-03-22 00:15:00 +00004608 /* --------- NO-OP --------- */
4609 case Ist_NoOp:
4610 return;
4611
sewardjf8c37f72005-02-07 18:55:29 +00004612 /* --------- EXIT --------- */
4613 case Ist_Exit: {
sewardjf8c37f72005-02-07 18:55:29 +00004614 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4615 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00004616
4617 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4618 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
4619 hregAMD64_RBP());
4620
4621 /* Case: boring transfer to known address */
4622 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4623 if (env->chainingAllowed) {
4624 /* .. almost always true .. */
4625 /* Skip the event check at the dst if this is a forwards
4626 edge. */
4627 Bool toFastEP
4628 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4629 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4630 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4631 amRIP, cc, toFastEP));
4632 } else {
4633 /* .. very occasionally .. */
4634 /* We can't use chaining, so ask for an assisted transfer,
4635 as that's the only alternative that is allowable. */
4636 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4637 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
4638 }
4639 return;
4640 }
4641
4642 /* Case: assisted transfer to arbitrary address */
4643 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004644 /* Keep this list in sync with that in iselNext below */
4645 case Ijk_ClientReq:
4646 case Ijk_EmWarn:
4647 case Ijk_NoDecode:
4648 case Ijk_NoRedir:
4649 case Ijk_SigSEGV:
4650 case Ijk_SigTRAP:
4651 case Ijk_Sys_syscall:
4652 case Ijk_TInval:
4653 case Ijk_Yield:
4654 {
sewardjc6f970f2012-04-02 21:54:49 +00004655 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4656 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
4657 return;
4658 }
4659 default:
4660 break;
4661 }
4662
4663 /* Do we ever expect to see any other kind? */
4664 goto stmt_fail;
sewardjf8c37f72005-02-07 18:55:29 +00004665 }
sewardjc33671d2005-02-01 20:30:00 +00004666
4667 default: break;
4668 }
sewardjaf1ceca2005-06-30 23:31:27 +00004669 stmt_fail:
sewardjc33671d2005-02-01 20:30:00 +00004670 ppIRStmt(stmt);
4671 vpanic("iselStmt(amd64)");
4672}
4673
4674
4675/*---------------------------------------------------------*/
4676/*--- ISEL: Basic block terminators (Nexts) ---*/
4677/*---------------------------------------------------------*/
4678
sewardjc6f970f2012-04-02 21:54:49 +00004679static void iselNext ( ISelEnv* env,
4680 IRExpr* next, IRJumpKind jk, Int offsIP )
sewardjf67eadf2005-02-03 03:53:52 +00004681{
sewardjf67eadf2005-02-03 03:53:52 +00004682 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00004683 vex_printf( "\n-- PUT(%d) = ", offsIP);
4684 ppIRExpr( next );
4685 vex_printf( "; exit-");
sewardjf67eadf2005-02-03 03:53:52 +00004686 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00004687 vex_printf( "\n");
sewardjf67eadf2005-02-03 03:53:52 +00004688 }
sewardjc6f970f2012-04-02 21:54:49 +00004689
4690 /* Case: boring transfer to known address */
4691 if (next->tag == Iex_Const) {
4692 IRConst* cdst = next->Iex.Const.con;
4693 vassert(cdst->tag == Ico_U64);
4694 if (jk == Ijk_Boring || jk == Ijk_Call) {
4695 /* Boring transfer to known address */
4696 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4697 if (env->chainingAllowed) {
4698 /* .. almost always true .. */
4699 /* Skip the event check at the dst if this is a forwards
4700 edge. */
4701 Bool toFastEP
4702 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4703 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4704 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
4705 amRIP, Acc_ALWAYS,
4706 toFastEP));
4707 } else {
4708 /* .. very occasionally .. */
4709 /* We can't use chaining, so ask for an indirect transfer,
4710 as that's the cheapest alternative that is
4711 allowable. */
4712 HReg r = iselIntExpr_R(env, next);
4713 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4714 Ijk_Boring));
4715 }
4716 return;
4717 }
4718 }
4719
4720 /* Case: call/return (==boring) transfer to any address */
4721 switch (jk) {
4722 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4723 HReg r = iselIntExpr_R(env, next);
4724 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4725 if (env->chainingAllowed) {
4726 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
4727 } else {
4728 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4729 Ijk_Boring));
4730 }
4731 return;
4732 }
4733 default:
4734 break;
4735 }
4736
sewardj2f6902b2012-04-23 09:48:14 +00004737 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00004738 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004739 /* Keep this list in sync with that for Ist_Exit above */
4740 case Ijk_ClientReq:
4741 case Ijk_EmWarn:
sewardj3d0e38e2012-04-21 07:38:29 +00004742 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00004743 case Ijk_NoRedir:
4744 case Ijk_SigSEGV:
4745 case Ijk_SigTRAP:
4746 case Ijk_Sys_syscall:
4747 case Ijk_TInval:
4748 case Ijk_Yield: {
sewardjc6f970f2012-04-02 21:54:49 +00004749 HReg r = iselIntExpr_R(env, next);
4750 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4751 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
4752 return;
4753 }
4754 default:
4755 break;
4756 }
4757
4758 vex_printf( "\n-- PUT(%d) = ", offsIP);
4759 ppIRExpr( next );
4760 vex_printf( "; exit-");
4761 ppIRJumpKind(jk);
4762 vex_printf( "\n");
4763 vassert(0); // are we expecting any other kind?
sewardjc33671d2005-02-01 20:30:00 +00004764}
4765
4766
4767/*---------------------------------------------------------*/
4768/*--- Insn selector top-level ---*/
4769/*---------------------------------------------------------*/
4770
sewardjdd40fdf2006-12-24 02:20:24 +00004771/* Translate an entire SB to amd64 code. */
sewardjc33671d2005-02-01 20:30:00 +00004772
sewardjc6f970f2012-04-02 21:54:49 +00004773HInstrArray* iselSB_AMD64 ( IRSB* bb,
4774 VexArch arch_host,
4775 VexArchInfo* archinfo_host,
4776 VexAbiInfo* vbi/*UNUSED*/,
4777 Int offs_Host_EvC_Counter,
4778 Int offs_Host_EvC_FailAddr,
4779 Bool chainingAllowed,
4780 Bool addProfInc,
4781 Addr64 max_ga )
sewardjc33671d2005-02-01 20:30:00 +00004782{
sewardjc6f970f2012-04-02 21:54:49 +00004783 Int i, j;
4784 HReg hreg, hregHI;
4785 ISelEnv* env;
4786 UInt hwcaps_host = archinfo_host->hwcaps;
4787 AMD64AMode *amCounter, *amFailAddr;
sewardjc33671d2005-02-01 20:30:00 +00004788
4789 /* sanity ... */
sewardj8f073592006-05-01 02:14:17 +00004790 vassert(arch_host == VexArchAMD64);
sewardj536fbab2010-07-29 15:39:05 +00004791 vassert(0 == (hwcaps_host
4792 & ~(VEX_HWCAPS_AMD64_SSE3
4793 | VEX_HWCAPS_AMD64_CX16
sewardjf350a422012-04-26 14:16:52 +00004794 | VEX_HWCAPS_AMD64_LZCNT
sewardj818c7302013-03-26 13:53:18 +00004795 | VEX_HWCAPS_AMD64_AVX
sewardjcc3d2192013-03-27 11:37:33 +00004796 | VEX_HWCAPS_AMD64_RDTSCP
4797 | VEX_HWCAPS_AMD64_BMI
4798 | VEX_HWCAPS_AMD64_AVX2)));
sewardjc33671d2005-02-01 20:30:00 +00004799
4800 /* Make up an initial environment to use. */
sewardj9a036bf2005-03-14 18:19:08 +00004801 env = LibVEX_Alloc(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00004802 env->vreg_ctr = 0;
4803
4804 /* Set up output code array. */
4805 env->code = newHInstrArray();
4806
4807 /* Copy BB's type env. */
4808 env->type_env = bb->tyenv;
4809
4810 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4811 change as we go along. */
4812 env->n_vregmap = bb->tyenv->types_used;
4813 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardj9b967672005-02-08 11:13:09 +00004814 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00004815
4816 /* and finally ... */
sewardjc6f970f2012-04-02 21:54:49 +00004817 env->chainingAllowed = chainingAllowed;
4818 env->hwcaps = hwcaps_host;
4819 env->max_ga = max_ga;
sewardjc33671d2005-02-01 20:30:00 +00004820
4821 /* For each IR temporary, allocate a suitably-kinded virtual
4822 register. */
4823 j = 0;
4824 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00004825 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00004826 switch (bb->tyenv->types[i]) {
4827 case Ity_I1:
sewardjc4530ae2012-05-21 10:18:49 +00004828 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4829 hreg = mkHReg(j++, HRcInt64, True);
4830 break;
4831 case Ity_I128:
4832 hreg = mkHReg(j++, HRcInt64, True);
4833 hregHI = mkHReg(j++, HRcInt64, True);
4834 break;
sewardjc33671d2005-02-01 20:30:00 +00004835 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00004836 case Ity_F64:
sewardjc4530ae2012-05-21 10:18:49 +00004837 case Ity_V128:
4838 hreg = mkHReg(j++, HRcVec128, True);
4839 break;
4840 case Ity_V256:
4841 hreg = mkHReg(j++, HRcVec128, True);
4842 hregHI = mkHReg(j++, HRcVec128, True);
4843 break;
4844 default:
4845 ppIRType(bb->tyenv->types[i]);
4846 vpanic("iselBB(amd64): IRTemp type");
sewardjc33671d2005-02-01 20:30:00 +00004847 }
4848 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00004849 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00004850 }
4851 env->vreg_ctr = j;
4852
sewardjc6f970f2012-04-02 21:54:49 +00004853 /* The very first instruction must be an event check. */
4854 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
4855 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
4856 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
4857
4858 /* Possibly a block counter increment (for profiling). At this
4859 point we don't know the address of the counter, so just pretend
4860 it is zero. It will have to be patched later, but before this
4861 translation is used, by a call to LibVEX_patchProfCtr. */
4862 if (addProfInc) {
4863 addInstr(env, AMD64Instr_ProfInc());
4864 }
4865
sewardjc33671d2005-02-01 20:30:00 +00004866 /* Ok, finally we can iterate over the statements. */
4867 for (i = 0; i < bb->stmts_used; i++)
4868 if (bb->stmts[i])
sewardjc6f970f2012-04-02 21:54:49 +00004869 iselStmt(env, bb->stmts[i]);
sewardjc33671d2005-02-01 20:30:00 +00004870
sewardjc6f970f2012-04-02 21:54:49 +00004871 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardjc33671d2005-02-01 20:30:00 +00004872
4873 /* record the number of vregs we used. */
4874 env->code->n_vregs = env->vreg_ctr;
4875 return env->code;
4876}
sewardja3e98302005-02-01 15:55:05 +00004877
4878
4879/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00004880/*--- end host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004881/*---------------------------------------------------------------*/