blob: 9d9d78e35149969fe1ca65ad6c8420c58d5b02fd [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2004-2013 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
sewardj69d98e32010-06-18 08:17:41 +000045#include "host_generic_simd128.h"
sewardjcc3d2192013-03-27 11:37:33 +000046#include "host_generic_simd256.h"
47#include "host_generic_maddf.h"
sewardjcef7d3e2009-07-02 12:21:59 +000048#include "host_amd64_defs.h"
sewardj1a01e652005-02-23 11:39:21 +000049
50
51/*---------------------------------------------------------*/
52/*--- x87/SSE control word stuff ---*/
53/*---------------------------------------------------------*/
54
55/* Vex-generated code expects to run with the FPU set as follows: all
56 exceptions masked, round-to-nearest, precision = 53 bits. This
57 corresponds to a FPU control word value of 0x027F.
58
59 Similarly the SSE control word (%mxcsr) should be 0x1F80.
60
61 %fpucw and %mxcsr should have these values on entry to
62 Vex-generated code, and should those values should be
63 unchanged at exit.
64*/
65
66#define DEFAULT_FPUCW 0x027F
67
68#define DEFAULT_MXCSR 0x1F80
69
70/* debugging only, do not use */
71/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000072
73
74/*---------------------------------------------------------*/
75/*--- misc helpers ---*/
76/*---------------------------------------------------------*/
77
78/* These are duplicated in guest-amd64/toIR.c */
79static IRExpr* unop ( IROp op, IRExpr* a )
80{
81 return IRExpr_Unop(op, a);
82}
83
84static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
85{
86 return IRExpr_Binop(op, a1, a2);
87}
88
sewardj05b3b6a2005-02-04 01:44:33 +000089static IRExpr* bind ( Int binder )
90{
91 return IRExpr_Binder(binder);
92}
sewardjc33671d2005-02-01 20:30:00 +000093
sewardj009230b2013-01-26 11:47:55 +000094static Bool isZeroU8 ( IRExpr* e )
95{
96 return e->tag == Iex_Const
97 && e->Iex.Const.con->tag == Ico_U8
98 && e->Iex.Const.con->Ico.U8 == 0;
99}
100
sewardjc33671d2005-02-01 20:30:00 +0000101
sewardjc33671d2005-02-01 20:30:00 +0000102/*---------------------------------------------------------*/
103/*--- ISelEnv ---*/
104/*---------------------------------------------------------*/
105
106/* This carries around:
107
108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
109 might encounter. This is computed before insn selection starts,
110 and does not change.
111
112 - A mapping from IRTemp to HReg. This tells the insn selector
113 which virtual register is associated with each IRTemp
114 temporary. This is computed before insn selection starts, and
115 does not change. We expect this mapping to map precisely the
116 same set of IRTemps as the type mapping does.
117
sewardj9b967672005-02-08 11:13:09 +0000118 - vregmap holds the primary register for the IRTemp.
119 - vregmapHI is only used for 128-bit integer-typed
120 IRTemps. It holds the identity of a second
121 64-bit virtual HReg, which holds the high half
122 of the value.
123
sewardjc6f970f2012-04-02 21:54:49 +0000124 - The host subarchitecture we are selecting insns for.
125 This is set at the start and does not change.
126
sewardjc33671d2005-02-01 20:30:00 +0000127 - The code array, that is, the insns selected so far.
128
129 - A counter, for generating new virtual registers.
130
sewardjc6f970f2012-04-02 21:54:49 +0000131 - A Bool for indicating whether we may generate chain-me
132 instructions for control flow transfers, or whether we must use
133 XAssisted.
134
135 - The maximum guest address of any guest insn in this block.
136 Actually, the address of the highest-addressed byte from any insn
137 in this block. Is set at the start and does not change. This is
138 used for detecting jumps which are definitely forward-edges from
139 this block, and therefore can be made (chained) to the fast entry
140 point of the destination, thereby avoiding the destination's
141 event check.
sewardjc33671d2005-02-01 20:30:00 +0000142
143 Note, this is all host-independent. (JRS 20050201: well, kinda
144 ... not completely. Compare with ISelEnv for X86.)
145*/
146
147typedef
148 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000149 /* Constant -- are set at the start and do not change. */
sewardjc33671d2005-02-01 20:30:00 +0000150 IRTypeEnv* type_env;
151
152 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000153 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000154 Int n_vregmap;
155
sewardj5117ce12006-01-27 21:20:15 +0000156 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000157
158 Bool chainingAllowed;
159 Addr64 max_ga;
160
161 /* These are modified as we go along. */
162 HInstrArray* code;
163 Int vreg_ctr;
sewardjc33671d2005-02-01 20:30:00 +0000164 }
165 ISelEnv;
166
167
168static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
169{
170 vassert(tmp >= 0);
171 vassert(tmp < env->n_vregmap);
172 return env->vregmap[tmp];
173}
174
sewardjc4530ae2012-05-21 10:18:49 +0000175static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
176 ISelEnv* env, IRTemp tmp )
sewardj9b967672005-02-08 11:13:09 +0000177{
178 vassert(tmp >= 0);
179 vassert(tmp < env->n_vregmap);
florian79efdc62013-02-11 00:47:35 +0000180 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
sewardj9b967672005-02-08 11:13:09 +0000181 *vrLO = env->vregmap[tmp];
182 *vrHI = env->vregmapHI[tmp];
183}
sewardj614b3fb2005-02-02 02:16:03 +0000184
185static void addInstr ( ISelEnv* env, AMD64Instr* instr )
186{
187 addHInstr(env->code, instr);
188 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjf355f6b2006-08-16 00:23:21 +0000189 ppAMD64Instr(instr, True);
sewardj614b3fb2005-02-02 02:16:03 +0000190 vex_printf("\n");
191 }
192}
193
sewardj8258a8c2005-02-02 03:11:24 +0000194static HReg newVRegI ( ISelEnv* env )
195{
sewardja5b50222015-03-26 07:18:32 +0000196 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0/*enc*/, env->vreg_ctr);
sewardj8258a8c2005-02-02 03:11:24 +0000197 env->vreg_ctr++;
198 return reg;
199}
200
sewardj0852a132005-02-21 08:28:46 +0000201static HReg newVRegV ( ISelEnv* env )
202{
sewardja5b50222015-03-26 07:18:32 +0000203 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
sewardj0852a132005-02-21 08:28:46 +0000204 env->vreg_ctr++;
205 return reg;
206}
sewardj614b3fb2005-02-02 02:16:03 +0000207
208
209/*---------------------------------------------------------*/
210/*--- ISEL: Forward declarations ---*/
211/*---------------------------------------------------------*/
212
213/* These are organised as iselXXX and iselXXX_wrk pairs. The
214 iselXXX_wrk do the real work, but are not to be called directly.
215 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
216 checks that all returned registers are virtual. You should not
217 call the _wrk version directly.
218*/
219static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
220static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
221
222static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
223static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
224
225static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
226static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
227
228static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
230
231static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
232static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
233
sewardjc4530ae2012-05-21 10:18:49 +0000234static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000235 ISelEnv* env, IRExpr* e );
sewardjc4530ae2012-05-21 10:18:49 +0000236static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
sewardj9b967672005-02-08 11:13:09 +0000237 ISelEnv* env, IRExpr* e );
238
sewardj614b3fb2005-02-02 02:16:03 +0000239static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
240static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
241
sewardj18303862005-02-21 12:36:54 +0000242static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000244
sewardj8d965312005-02-25 02:48:47 +0000245static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
246static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000247
sewardj0852a132005-02-21 08:28:46 +0000248static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000250
sewardjc4530ae2012-05-21 10:18:49 +0000251static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
252 ISelEnv* env, IRExpr* e );
253static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
254 ISelEnv* env, IRExpr* e );
255
sewardj614b3fb2005-02-02 02:16:03 +0000256
257/*---------------------------------------------------------*/
258/*--- ISEL: Misc helpers ---*/
259/*---------------------------------------------------------*/
260
261static Bool sane_AMode ( AMD64AMode* am )
262{
263 switch (am->tag) {
264 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000265 return
266 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
267 && (hregIsVirtual(am->Aam.IR.reg)
florian79efdc62013-02-11 00:47:35 +0000268 || sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
sewardj614b3fb2005-02-02 02:16:03 +0000269 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000270 return
271 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
272 && hregIsVirtual(am->Aam.IRRS.base)
273 && hregClass(am->Aam.IRRS.index) == HRcInt64
274 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000275 default:
276 vpanic("sane_AMode: unknown amd64 amode tag");
277 }
278}
279
280
281/* Can the lower 32 bits be signedly widened to produce the whole
282 64-bit value? In other words, are the top 33 bits either all 0 or
283 all 1 ? */
284static Bool fitsIn32Bits ( ULong x )
285{
florian108e03f2015-03-10 16:11:58 +0000286 Long y1;
287 y1 = x << 32;
sewardj614b3fb2005-02-02 02:16:03 +0000288 y1 >>=/*s*/ 32;
289 return toBool(x == y1);
290}
291
sewardjeb17e492007-08-25 23:07:44 +0000292/* Is this a 64-bit zero expression? */
293
294static Bool isZeroU64 ( IRExpr* e )
295{
296 return e->tag == Iex_Const
297 && e->Iex.Const.con->tag == Ico_U64
298 && e->Iex.Const.con->Ico.U64 == 0ULL;
299}
300
301static Bool isZeroU32 ( IRExpr* e )
302{
303 return e->tag == Iex_Const
304 && e->Iex.Const.con->tag == Ico_U32
305 && e->Iex.Const.con->Ico.U32 == 0;
306}
sewardj8258a8c2005-02-02 03:11:24 +0000307
308/* Make a int reg-reg move. */
309
310static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
311{
312 vassert(hregClass(src) == HRcInt64);
313 vassert(hregClass(dst) == HRcInt64);
314 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
315}
316
sewardjc4530ae2012-05-21 10:18:49 +0000317/* Make a vector (128 bit) reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000318
sewardj0852a132005-02-21 08:28:46 +0000319static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
320{
321 vassert(hregClass(src) == HRcVec128);
322 vassert(hregClass(dst) == HRcVec128);
323 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
324}
325
326/* Advance/retreat %rsp by n. */
327
328static void add_to_rsp ( ISelEnv* env, Int n )
329{
330 vassert(n > 0 && n < 256 && (n%8) == 0);
331 addInstr(env,
332 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
333 hregAMD64_RSP()));
334}
335
sewardj18303862005-02-21 12:36:54 +0000336static void sub_from_rsp ( ISelEnv* env, Int n )
337{
338 vassert(n > 0 && n < 256 && (n%8) == 0);
339 addInstr(env,
340 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
341 hregAMD64_RSP()));
342}
343
ded403e792010-04-02 14:15:58 +0000344/* Push 64-bit constants on the stack. */
345static void push_uimm64( ISelEnv* env, ULong uimm64 )
346{
347 /* If uimm64 can be expressed as the sign extension of its
348 lower 32 bits, we can do it the easy way. */
349 Long simm64 = (Long)uimm64;
florian108e03f2015-03-10 16:11:58 +0000350 if ( simm64 == ((Long)(uimm64 << 32) >> 32) ) {
ded403e792010-04-02 14:15:58 +0000351 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
352 } else {
353 HReg tmp = newVRegI(env);
354 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
355 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
356 }
357}
sewardj18303862005-02-21 12:36:54 +0000358
sewardj05b3b6a2005-02-04 01:44:33 +0000359
sewardj4d77a9c2007-08-25 23:21:08 +0000360/* Used only in doHelperCall. If possible, produce a single
361 instruction which computes 'e' into 'dst'. If not possible, return
362 NULL. */
363
364static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
365 HReg dst,
366 IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +0000367{
sewardj74142b82013-08-08 10:28:59 +0000368 /* Per comments in doHelperCall below, appearance of
florian90419562013-08-15 20:54:52 +0000369 Iex_VECRET implies ill-formed IR. */
370 vassert(e->tag != Iex_VECRET);
sewardj74142b82013-08-08 10:28:59 +0000371
372 /* In this case we give out a copy of the BaseBlock pointer. */
florian90419562013-08-15 20:54:52 +0000373 if (UNLIKELY(e->tag == Iex_BBPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000374 return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
375 }
376
sewardj4d77a9c2007-08-25 23:21:08 +0000377 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
378
379 if (e->tag == Iex_Const) {
380 vassert(e->Iex.Const.con->tag == Ico_U64);
381 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
382 return AMD64Instr_Alu64R(
383 Aalu_MOV,
384 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
385 dst
386 );
387 } else {
388 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
389 }
sewardj05b3b6a2005-02-04 01:44:33 +0000390 }
sewardj4d77a9c2007-08-25 23:21:08 +0000391
392 if (e->tag == Iex_RdTmp) {
393 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
394 return mk_iMOVsd_RR(src, dst);
395 }
396
397 if (e->tag == Iex_Get) {
398 vassert(e->Iex.Get.ty == Ity_I64);
399 return AMD64Instr_Alu64R(
400 Aalu_MOV,
401 AMD64RMI_Mem(
402 AMD64AMode_IR(e->Iex.Get.offset,
403 hregAMD64_RBP())),
404 dst);
405 }
406
407 if (e->tag == Iex_Unop
408 && e->Iex.Unop.op == Iop_32Uto64
409 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
410 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
sewardjca257bc2010-09-08 08:34:52 +0000411 return AMD64Instr_MovxLQ(False, src, dst);
sewardj4d77a9c2007-08-25 23:21:08 +0000412 }
413
414 if (0) { ppIRExpr(e); vex_printf("\n"); }
415
416 return NULL;
sewardj05b3b6a2005-02-04 01:44:33 +0000417}
418
419
sewardj74142b82013-08-08 10:28:59 +0000420/* Do a complete function call. |guard| is a Ity_Bit expression
sewardj05b3b6a2005-02-04 01:44:33 +0000421 indicating whether or not the call happens. If guard==NULL, the
sewardj74142b82013-08-08 10:28:59 +0000422 call is unconditional. |retloc| is set to indicate where the
423 return value is after the call. The caller (of this fn) must
424 generate code to add |stackAdjustAfterCall| to the stack pointer
425 after the call is done. */
sewardj05b3b6a2005-02-04 01:44:33 +0000426
427static
sewardj74142b82013-08-08 10:28:59 +0000428void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
429 /*OUT*/RetLoc* retloc,
430 ISelEnv* env,
431 IRExpr* guard,
432 IRCallee* cee, IRType retTy, IRExpr** args )
sewardj05b3b6a2005-02-04 01:44:33 +0000433{
434 AMD64CondCode cc;
435 HReg argregs[6];
436 HReg tmpregs[6];
sewardj4d77a9c2007-08-25 23:21:08 +0000437 AMD64Instr* fastinstrs[6];
sewardj74142b82013-08-08 10:28:59 +0000438 UInt n_args, i;
439
440 /* Set default returns. We'll update them later if needed. */
441 *stackAdjustAfterCall = 0;
442 *retloc = mk_RetLoc_INVALID();
443
444 /* These are used for cross-checking that IR-level constraints on
florian90419562013-08-15 20:54:52 +0000445 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
sewardj74142b82013-08-08 10:28:59 +0000446 UInt nVECRETs = 0;
447 UInt nBBPTRs = 0;
sewardj05b3b6a2005-02-04 01:44:33 +0000448
449 /* Marshal args for a call and do the call.
450
sewardj05b3b6a2005-02-04 01:44:33 +0000451 This function only deals with a tiny set of possibilities, which
452 cover all helpers in practice. The restrictions are that only
453 arguments in registers are supported, hence only 6x64 integer
454 bits in total can be passed. In fact the only supported arg
455 type is I64.
456
sewardj74142b82013-08-08 10:28:59 +0000457 The return type can be I{64,32,16,8} or V{128,256}. In the
458 latter two cases, it is expected that |args| will contain the
florian90419562013-08-15 20:54:52 +0000459 special node IRExpr_VECRET(), in which case this routine
sewardj74142b82013-08-08 10:28:59 +0000460 generates code to allocate space on the stack for the vector
461 return value. Since we are not passing any scalars on the
462 stack, it is enough to preallocate the return space before
463 marshalling any arguments, in this case.
464
florian90419562013-08-15 20:54:52 +0000465 |args| may also contain IRExpr_BBPTR(), in which case the
sewardj74142b82013-08-08 10:28:59 +0000466 value in %rbp is passed as the corresponding argument.
467
sewardj05b3b6a2005-02-04 01:44:33 +0000468 Generating code which is both efficient and correct when
469 parameters are to be passed in registers is difficult, for the
470 reasons elaborated in detail in comments attached to
471 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
472 of the method described in those comments.
473
474 The problem is split into two cases: the fast scheme and the
475 slow scheme. In the fast scheme, arguments are computed
476 directly into the target (real) registers. This is only safe
477 when we can be sure that computation of each argument will not
478 trash any real registers set by computation of any other
479 argument.
480
481 In the slow scheme, all args are first computed into vregs, and
482 once they are all done, they are moved to the relevant real
483 regs. This always gives correct code, but it also gives a bunch
484 of vreg-to-rreg moves which are usually redundant but are hard
485 for the register allocator to get rid of.
486
487 To decide which scheme to use, all argument expressions are
488 first examined. If they are all so simple that it is clear they
489 will be evaluated without use of any fixed registers, use the
490 fast scheme, else use the slow scheme. Note also that only
491 unconditional calls may use the fast scheme, since having to
492 compute a condition expression could itself trash real
sewardj74142b82013-08-08 10:28:59 +0000493 registers. Note that for simplicity, in the case where
florian90419562013-08-15 20:54:52 +0000494 IRExpr_VECRET() is present, we use the slow scheme. This is
sewardj74142b82013-08-08 10:28:59 +0000495 motivated by the desire to avoid any possible complexity
496 w.r.t. nested calls.
sewardj05b3b6a2005-02-04 01:44:33 +0000497
498 Note this requires being able to examine an expression and
499 determine whether or not evaluation of it might use a fixed
500 register. That requires knowledge of how the rest of this insn
501 selector works. Currently just the following 3 are regarded as
502 safe -- hopefully they cover the majority of arguments in
503 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
504 */
505
506 /* Note that the cee->regparms field is meaningless on AMD64 host
507 (since there is only one calling convention) and so we always
508 ignore it. */
sewardj05b3b6a2005-02-04 01:44:33 +0000509 n_args = 0;
510 for (i = 0; args[i]; i++)
511 n_args++;
512
sewardj74142b82013-08-08 10:28:59 +0000513 if (n_args > 6)
sewardj05b3b6a2005-02-04 01:44:33 +0000514 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
515
516 argregs[0] = hregAMD64_RDI();
517 argregs[1] = hregAMD64_RSI();
518 argregs[2] = hregAMD64_RDX();
519 argregs[3] = hregAMD64_RCX();
520 argregs[4] = hregAMD64_R8();
521 argregs[5] = hregAMD64_R9();
522
523 tmpregs[0] = tmpregs[1] = tmpregs[2] =
524 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
525
sewardj4d77a9c2007-08-25 23:21:08 +0000526 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
527 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
528
sewardj05b3b6a2005-02-04 01:44:33 +0000529 /* First decide which scheme (slow or fast) is to be used. First
530 assume the fast scheme, and select slow if any contraindications
531 (wow) appear. */
532
sewardj74142b82013-08-08 10:28:59 +0000533 /* We'll need space on the stack for the return value. Avoid
534 possible complications with nested calls by using the slow
535 scheme. */
536 if (retTy == Ity_V128 || retTy == Ity_V256)
537 goto slowscheme;
538
sewardj05b3b6a2005-02-04 01:44:33 +0000539 if (guard) {
540 if (guard->tag == Iex_Const
541 && guard->Iex.Const.con->tag == Ico_U1
542 && guard->Iex.Const.con->Ico.U1 == True) {
543 /* unconditional */
544 } else {
545 /* Not manifestly unconditional -- be conservative. */
sewardj4d77a9c2007-08-25 23:21:08 +0000546 goto slowscheme;
sewardj05b3b6a2005-02-04 01:44:33 +0000547 }
548 }
549
sewardj4d77a9c2007-08-25 23:21:08 +0000550 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
551 use the slow scheme. Because this is tentative, we can't call
552 addInstr (that is, commit to) any instructions until we're
553 handled all the arguments. So park the resulting instructions
554 in a buffer and emit that if we're successful. */
555
556 /* FAST SCHEME */
sewardj74142b82013-08-08 10:28:59 +0000557 /* In this loop, we process args that can be computed into the
558 destination (real) register with a single instruction, without
florian90419562013-08-15 20:54:52 +0000559 using any fixed regs. That also includes IRExpr_BBPTR(), but
560 not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can
561 never see IRExpr_VECRET() at this point, since the return-type
sewardj74142b82013-08-08 10:28:59 +0000562 check above should ensure all those cases use the slow scheme
563 instead. */
564 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000565 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000566 IRExpr* arg = args[i];
florian90419562013-08-15 20:54:52 +0000567 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
sewardj74142b82013-08-08 10:28:59 +0000568 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
569 }
570 fastinstrs[i]
571 = iselIntExpr_single_instruction( env, argregs[i], args[i] );
572 if (fastinstrs[i] == NULL)
sewardj4d77a9c2007-08-25 23:21:08 +0000573 goto slowscheme;
sewardj4d77a9c2007-08-25 23:21:08 +0000574 }
575
576 /* Looks like we're in luck. Emit the accumulated instructions and
577 move on to doing the call itself. */
sewardj74142b82013-08-08 10:28:59 +0000578 for (i = 0; i < n_args; i++)
sewardj4d77a9c2007-08-25 23:21:08 +0000579 addInstr(env, fastinstrs[i]);
580
581 /* Fast scheme only applies for unconditional calls. Hence: */
582 cc = Acc_ALWAYS;
583
584 goto handle_call;
585
586
587 /* SLOW SCHEME; move via temporaries */
588 slowscheme:
sewardj74142b82013-08-08 10:28:59 +0000589 {}
sewardjc4530ae2012-05-21 10:18:49 +0000590# if 0 /* debug only */
591 if (n_args > 0) {for (i = 0; args[i]; i++) {
592 ppIRExpr(args[i]); vex_printf(" "); }
593 vex_printf("\n");}
594# endif
sewardj4d77a9c2007-08-25 23:21:08 +0000595
sewardj74142b82013-08-08 10:28:59 +0000596 /* If we have a vector return type, allocate a place for it on the
597 stack and record its address. */
598 HReg r_vecRetAddr = INVALID_HREG;
599 if (retTy == Ity_V128) {
600 r_vecRetAddr = newVRegI(env);
601 sub_from_rsp(env, 16);
602 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
603 }
604 else if (retTy == Ity_V256) {
sewardj74142b82013-08-08 10:28:59 +0000605 r_vecRetAddr = newVRegI(env);
606 sub_from_rsp(env, 32);
607 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
sewardj4d77a9c2007-08-25 23:21:08 +0000608 }
609
sewardj74142b82013-08-08 10:28:59 +0000610 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000611 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000612 IRExpr* arg = args[i];
florian90419562013-08-15 20:54:52 +0000613 if (UNLIKELY(arg->tag == Iex_BBPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000614 tmpregs[i] = newVRegI(env);
615 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
616 nBBPTRs++;
617 }
florian90419562013-08-15 20:54:52 +0000618 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
sewardj74142b82013-08-08 10:28:59 +0000619 /* We stashed the address of the return slot earlier, so just
620 retrieve it now. */
621 vassert(!hregIsInvalid(r_vecRetAddr));
622 tmpregs[i] = r_vecRetAddr;
623 nVECRETs++;
624 }
625 else {
626 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
627 tmpregs[i] = iselIntExpr_R(env, args[i]);
628 }
sewardj4d77a9c2007-08-25 23:21:08 +0000629 }
630
631 /* Now we can compute the condition. We can't do it earlier
632 because the argument computations could trash the condition
633 codes. Be a bit clever to handle the common case where the
634 guard is 1:Bit. */
635 cc = Acc_ALWAYS;
636 if (guard) {
637 if (guard->tag == Iex_Const
638 && guard->Iex.Const.con->tag == Ico_U1
639 && guard->Iex.Const.con->Ico.U1 == True) {
640 /* unconditional -- do nothing */
641 } else {
642 cc = iselCondCode( env, guard );
sewardj05b3b6a2005-02-04 01:44:33 +0000643 }
644 }
645
sewardj4d77a9c2007-08-25 23:21:08 +0000646 /* Move the args to their final destinations. */
sewardj74142b82013-08-08 10:28:59 +0000647 for (i = 0; i < n_args; i++) {
sewardj4d77a9c2007-08-25 23:21:08 +0000648 /* None of these insns, including any spill code that might
649 be generated, may alter the condition codes. */
650 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
sewardj05b3b6a2005-02-04 01:44:33 +0000651 }
652
sewardj4d77a9c2007-08-25 23:21:08 +0000653
sewardj74142b82013-08-08 10:28:59 +0000654 /* Do final checks, set the return values, and generate the call
655 instruction proper. */
sewardj4d77a9c2007-08-25 23:21:08 +0000656 handle_call:
sewardj74142b82013-08-08 10:28:59 +0000657
658 if (retTy == Ity_V128 || retTy == Ity_V256) {
659 vassert(nVECRETs == 1);
660 } else {
661 vassert(nVECRETs == 0);
662 }
663
664 vassert(nBBPTRs == 0 || nBBPTRs == 1);
665
666 vassert(*stackAdjustAfterCall == 0);
667 vassert(is_RetLoc_INVALID(*retloc));
668 switch (retTy) {
669 case Ity_INVALID:
670 /* Function doesn't return a value. */
671 *retloc = mk_RetLoc_simple(RLPri_None);
672 break;
673 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
674 *retloc = mk_RetLoc_simple(RLPri_Int);
675 break;
676 case Ity_V128:
677 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
678 *stackAdjustAfterCall = 16;
679 break;
680 case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +0000681 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
682 *stackAdjustAfterCall = 32;
683 break;
684 default:
685 /* IR can denote other possible return types, but we don't
686 handle those here. */
687 vassert(0);
688 }
689
690 /* Finally, generate the call itself. This needs the *retloc value
691 set in the switch above, which is why it's at the end. */
692 addInstr(env,
florian93a09742015-01-07 20:14:48 +0000693 AMD64Instr_Call(cc, (Addr)cee->addr, n_args, *retloc));
sewardj05b3b6a2005-02-04 01:44:33 +0000694}
695
696
sewardj8d965312005-02-25 02:48:47 +0000697/* Given a guest-state array descriptor, an index expression and a
698 bias, generate an AMD64AMode holding the relevant guest state
699 offset. */
700
701static
sewardjdd40fdf2006-12-24 02:20:24 +0000702AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
sewardj8d965312005-02-25 02:48:47 +0000703 IRExpr* off, Int bias )
704{
705 HReg tmp, roff;
706 Int elemSz = sizeofIRType(descr->elemTy);
707 Int nElems = descr->nElems;
708
709 /* Throw out any cases not generated by an amd64 front end. In
710 theory there might be a day where we need to handle them -- if
711 we ever run non-amd64-guest on amd64 host. */
712
713 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
714 vpanic("genGuestArrayOffset(amd64 host)");
715
716 /* Compute off into a reg, %off. Then return:
717
718 movq %off, %tmp
719 addq $bias, %tmp (if bias != 0)
720 andq %tmp, 7
721 ... base(%rbp, %tmp, shift) ...
722 */
723 tmp = newVRegI(env);
724 roff = iselIntExpr_R(env, off);
725 addInstr(env, mk_iMOVsd_RR(roff, tmp));
726 if (bias != 0) {
727 /* Make sure the bias is sane, in the sense that there are
728 no significant bits above bit 30 in it. */
729 vassert(-10000 < bias && bias < 10000);
730 addInstr(env,
731 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
732 }
733 addInstr(env,
734 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
735 vassert(elemSz == 1 || elemSz == 8);
736 return
737 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
738 elemSz==8 ? 3 : 0);
739}
740
sewardj1a01e652005-02-23 11:39:21 +0000741
742/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
743static
744void set_SSE_rounding_default ( ISelEnv* env )
745{
746 /* pushq $DEFAULT_MXCSR
747 ldmxcsr 0(%rsp)
748 addq $8, %rsp
749 */
750 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
751 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
752 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
753 add_to_rsp(env, 8);
754}
755
sewardj25a85812005-05-08 23:03:48 +0000756/* Mess with the FPU's rounding mode: set to the default rounding mode
757 (DEFAULT_FPUCW). */
758static
759void set_FPU_rounding_default ( ISelEnv* env )
760{
761 /* movq $DEFAULT_FPUCW, -8(%rsp)
762 fldcw -8(%esp)
763 */
764 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
765 addInstr(env, AMD64Instr_Alu64M(
766 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
767 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
768}
sewardj1a01e652005-02-23 11:39:21 +0000769
770
771/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
772 expression denoting a value in the range 0 .. 3, indicating a round
773 mode encoded as per type IRRoundingMode. Set the SSE machinery to
774 have the same rounding.
775*/
776static
777void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
778{
779 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
780 both rounding bits == 0. If that wasn't the case, we couldn't
781 create a new rounding field simply by ORing the new value into
782 place. */
783
784 /* movq $3, %reg
785 andq [[mode]], %reg -- shouldn't be needed; paranoia
786 shlq $13, %reg
787 orq $DEFAULT_MXCSR, %reg
788 pushq %reg
789 ldmxcsr 0(%esp)
790 addq $8, %rsp
791 */
792 HReg reg = newVRegI(env);
793 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
794 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
795 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
796 iselIntExpr_RMI(env, mode), reg));
sewardj501a3392005-05-11 15:37:50 +0000797 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
sewardj1a01e652005-02-23 11:39:21 +0000798 addInstr(env, AMD64Instr_Alu64R(
799 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
800 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
801 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
802 add_to_rsp(env, 8);
803}
804
805
sewardj25a85812005-05-08 23:03:48 +0000806/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
807 expression denoting a value in the range 0 .. 3, indicating a round
808 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
809 the same rounding.
810*/
811static
812void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
813{
814 HReg rrm = iselIntExpr_R(env, mode);
815 HReg rrm2 = newVRegI(env);
816 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
817
818 /* movq %rrm, %rrm2
819 andq $3, %rrm2 -- shouldn't be needed; paranoia
820 shlq $10, %rrm2
821 orq $DEFAULT_FPUCW, %rrm2
822 movq %rrm2, -8(%rsp)
823 fldcw -8(%esp)
824 */
825 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
826 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
sewardj501a3392005-05-11 15:37:50 +0000827 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
sewardj25a85812005-05-08 23:03:48 +0000828 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
829 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
830 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
831 AMD64RI_Reg(rrm2), m8_rsp));
832 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
833}
sewardj8d965312005-02-25 02:48:47 +0000834
835
sewardjac530442005-05-11 16:13:37 +0000836/* Generate all-zeroes into a new vector register.
837*/
838static HReg generate_zeroes_V128 ( ISelEnv* env )
839{
840 HReg dst = newVRegV(env);
841 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
842 return dst;
843}
844
845/* Generate all-ones into a new vector register.
846*/
847static HReg generate_ones_V128 ( ISelEnv* env )
848{
849 HReg dst = newVRegV(env);
850 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
851 return dst;
852}
853
854
sewardj09717342005-05-05 21:34:02 +0000855/* Generate !src into a new vector register. Amazing that there isn't
856 a less crappy way to do this.
sewardj8d965312005-02-25 02:48:47 +0000857*/
858static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
859{
sewardjac530442005-05-11 16:13:37 +0000860 HReg dst = generate_ones_V128(env);
sewardj8d965312005-02-25 02:48:47 +0000861 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
862 return dst;
863}
864
865
sewardjacfbd7d2010-08-17 22:52:08 +0000866/* Expand the given byte into a 64-bit word, by cloning each bit
867 8 times. */
868static ULong bitmask8_to_bytemask64 ( UShort w8 )
869{
870 vassert(w8 == (w8 & 0xFF));
871 ULong w64 = 0;
872 Int i;
873 for (i = 0; i < 8; i++) {
874 if (w8 & (1<<i))
875 w64 |= (0xFFULL << (8 * i));
876 }
877 return w64;
878}
879
880
sewardj8258a8c2005-02-02 03:11:24 +0000881/*---------------------------------------------------------*/
882/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
883/*---------------------------------------------------------*/
884
885/* Select insns for an integer-typed expression, and add them to the
886 code list. Return a reg holding the result. This reg will be a
887 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
888 want to modify it, ask for a new vreg, copy it in there, and modify
889 the copy. The register allocator will do its best to map both
890 vregs to the same real register, so the copies will often disappear
891 later in the game.
892
893 This should handle expressions of 64, 32, 16 and 8-bit type. All
894 results are returned in a 64-bit register. For 32-, 16- and 8-bit
sewardje13074c2012-11-08 10:57:08 +0000895 expressions, the upper 32/48/56 bits are arbitrary, so you should
sewardj8258a8c2005-02-02 03:11:24 +0000896 mask or sign extend partial values if necessary.
897*/
898
899static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
900{
901 HReg r = iselIntExpr_R_wrk(env, e);
902 /* sanity checks ... */
903# if 0
904 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
905# endif
906 vassert(hregClass(r) == HRcInt64);
907 vassert(hregIsVirtual(r));
908 return r;
909}
910
911/* DO NOT CALL THIS DIRECTLY ! */
912static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
913{
sewardje7905662005-05-09 18:15:21 +0000914 /* Used for unary/binary SIMD64 ops. */
915 HWord fn = 0;
sewardj8711f662005-05-09 17:52:56 +0000916 Bool second_is_UInt;
sewardje7905662005-05-09 18:15:21 +0000917
sewardj05b3b6a2005-02-04 01:44:33 +0000918 MatchInfo mi;
sewardj176ad2f2005-04-27 11:55:08 +0000919 DECLARE_PATTERN(p_1Uto8_64to1);
sewardjca257bc2010-09-08 08:34:52 +0000920 DECLARE_PATTERN(p_LDle8_then_8Uto64);
921 DECLARE_PATTERN(p_LDle16_then_16Uto64);
sewardj8258a8c2005-02-02 03:11:24 +0000922
923 IRType ty = typeOfIRExpr(env->type_env,e);
sewardj13f12a52011-05-03 07:51:49 +0000924 switch (ty) {
925 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
926 default: vassert(0);
927 }
sewardj8258a8c2005-02-02 03:11:24 +0000928
929 switch (e->tag) {
930
931 /* --------- TEMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +0000932 case Iex_RdTmp: {
933 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj8258a8c2005-02-02 03:11:24 +0000934 }
935
936 /* --------- LOAD --------- */
sewardjaf1ceca2005-06-30 23:31:27 +0000937 case Iex_Load: {
sewardj8258a8c2005-02-02 03:11:24 +0000938 HReg dst = newVRegI(env);
sewardjaf1ceca2005-06-30 23:31:27 +0000939 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
940
sewardje9d8a262009-07-01 08:06:34 +0000941 /* We can't handle big-endian loads, nor load-linked. */
sewardjaf1ceca2005-06-30 23:31:27 +0000942 if (e->Iex.Load.end != Iend_LE)
943 goto irreducible;
944
sewardjf67eadf2005-02-03 03:53:52 +0000945 if (ty == Ity_I64) {
946 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
947 AMD64RMI_Mem(amode), dst) );
948 return dst;
949 }
sewardj8258a8c2005-02-02 03:11:24 +0000950 if (ty == Ity_I32) {
951 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
952 return dst;
953 }
sewardj05b3b6a2005-02-04 01:44:33 +0000954 if (ty == Ity_I16) {
955 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
956 return dst;
957 }
sewardj7f039c42005-02-04 21:13:55 +0000958 if (ty == Ity_I8) {
959 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
960 return dst;
961 }
sewardj8258a8c2005-02-02 03:11:24 +0000962 break;
963 }
964
965 /* --------- BINARY OP --------- */
966 case Iex_Binop: {
967 AMD64AluOp aluOp;
968 AMD64ShiftOp shOp;
sewardj8711f662005-05-09 17:52:56 +0000969
sewardjeb17e492007-08-25 23:07:44 +0000970 /* Pattern: Sub64(0,x) */
971 /* and: Sub32(0,x) */
972 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
973 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
974 HReg dst = newVRegI(env);
975 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
976 addInstr(env, mk_iMOVsd_RR(reg,dst));
977 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
978 return dst;
979 }
980
sewardj8258a8c2005-02-02 03:11:24 +0000981 /* Is it an addition or logical style op? */
982 switch (e->Iex.Binop.op) {
983 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
984 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000985 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
986 aluOp = Aalu_SUB; break;
987 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
988 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000989 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000990 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000991 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
992 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000993 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000994 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000995 default:
996 aluOp = Aalu_INVALID; break;
997 }
998 /* For commutative ops we assume any literal
999 values are on the second operand. */
1000 if (aluOp != Aalu_INVALID) {
1001 HReg dst = newVRegI(env);
1002 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1003 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1004 addInstr(env, mk_iMOVsd_RR(reg,dst));
1005 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
1006 return dst;
1007 }
1008
1009 /* Perhaps a shift op? */
1010 switch (e->Iex.Binop.op) {
1011 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
1012 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +00001013 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
1014 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +00001015 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
1016 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +00001017 default:
1018 shOp = Ash_INVALID; break;
1019 }
1020 if (shOp != Ash_INVALID) {
1021 HReg dst = newVRegI(env);
1022
1023 /* regL = the value to be shifted */
1024 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1025 addInstr(env, mk_iMOVsd_RR(regL,dst));
1026
1027 /* Do any necessary widening for 32/16/8 bit operands */
1028 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +00001029 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
1030 break;
sewardj85520e42005-02-19 15:22:38 +00001031 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +00001032 break;
sewardj85520e42005-02-19 15:22:38 +00001033 case Iop_Shr8:
1034 addInstr(env, AMD64Instr_Alu64R(
1035 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
1036 break;
1037 case Iop_Shr16:
1038 addInstr(env, AMD64Instr_Alu64R(
1039 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
1040 break;
sewardjb095fba2005-02-13 14:13:04 +00001041 case Iop_Shr32:
sewardjca257bc2010-09-08 08:34:52 +00001042 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
sewardjb095fba2005-02-13 14:13:04 +00001043 break;
sewardje83d9b22005-08-13 23:58:34 +00001044 case Iop_Sar8:
1045 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1046 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
1047 break;
1048 case Iop_Sar16:
1049 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
1050 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
1051 break;
sewardj05b3b6a2005-02-04 01:44:33 +00001052 case Iop_Sar32:
sewardjca257bc2010-09-08 08:34:52 +00001053 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
sewardj05b3b6a2005-02-04 01:44:33 +00001054 break;
1055 default:
sewardj909c06d2005-02-19 22:47:41 +00001056 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +00001057 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +00001058 }
1059
1060 /* Now consider the shift amount. If it's a literal, we
1061 can do a much better job than the general case. */
1062 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1063 /* assert that the IR is well-typed */
1064 Int nshift;
1065 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1066 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1067 vassert(nshift >= 0);
1068 if (nshift > 0)
1069 /* Can't allow nshift==0 since that means %cl */
sewardj501a3392005-05-11 15:37:50 +00001070 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001071 } else {
1072 /* General case; we have to force the amount into %cl. */
1073 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1074 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
sewardj501a3392005-05-11 15:37:50 +00001075 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001076 }
1077 return dst;
1078 }
1079
sewardj8711f662005-05-09 17:52:56 +00001080 /* Deal with 64-bit SIMD binary ops */
1081 second_is_UInt = False;
1082 switch (e->Iex.Binop.op) {
1083 case Iop_Add8x8:
1084 fn = (HWord)h_generic_calc_Add8x8; break;
1085 case Iop_Add16x4:
1086 fn = (HWord)h_generic_calc_Add16x4; break;
1087 case Iop_Add32x2:
1088 fn = (HWord)h_generic_calc_Add32x2; break;
sewardja7ba8c42005-05-10 20:08:34 +00001089
1090 case Iop_Avg8Ux8:
1091 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1092 case Iop_Avg16Ux4:
1093 fn = (HWord)h_generic_calc_Avg16Ux4; break;
sewardj8711f662005-05-09 17:52:56 +00001094
1095 case Iop_CmpEQ8x8:
1096 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1097 case Iop_CmpEQ16x4:
1098 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1099 case Iop_CmpEQ32x2:
1100 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1101
1102 case Iop_CmpGT8Sx8:
1103 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1104 case Iop_CmpGT16Sx4:
1105 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1106 case Iop_CmpGT32Sx2:
1107 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1108
1109 case Iop_InterleaveHI8x8:
1110 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1111 case Iop_InterleaveLO8x8:
1112 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1113 case Iop_InterleaveHI16x4:
1114 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1115 case Iop_InterleaveLO16x4:
1116 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1117 case Iop_InterleaveHI32x2:
1118 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1119 case Iop_InterleaveLO32x2:
1120 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
sewardjd166e282008-02-06 11:42:45 +00001121 case Iop_CatOddLanes16x4:
1122 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1123 case Iop_CatEvenLanes16x4:
1124 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1125 case Iop_Perm8x8:
1126 fn = (HWord)h_generic_calc_Perm8x8; break;
sewardj8711f662005-05-09 17:52:56 +00001127
sewardja7ba8c42005-05-10 20:08:34 +00001128 case Iop_Max8Ux8:
1129 fn = (HWord)h_generic_calc_Max8Ux8; break;
1130 case Iop_Max16Sx4:
1131 fn = (HWord)h_generic_calc_Max16Sx4; break;
1132 case Iop_Min8Ux8:
1133 fn = (HWord)h_generic_calc_Min8Ux8; break;
1134 case Iop_Min16Sx4:
1135 fn = (HWord)h_generic_calc_Min16Sx4; break;
sewardj8711f662005-05-09 17:52:56 +00001136
1137 case Iop_Mul16x4:
1138 fn = (HWord)h_generic_calc_Mul16x4; break;
sewardjd166e282008-02-06 11:42:45 +00001139 case Iop_Mul32x2:
1140 fn = (HWord)h_generic_calc_Mul32x2; break;
sewardj8711f662005-05-09 17:52:56 +00001141 case Iop_MulHi16Sx4:
1142 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
sewardja7ba8c42005-05-10 20:08:34 +00001143 case Iop_MulHi16Ux4:
1144 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1145
sewardj8711f662005-05-09 17:52:56 +00001146 case Iop_QAdd8Sx8:
1147 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1148 case Iop_QAdd16Sx4:
1149 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1150 case Iop_QAdd8Ux8:
1151 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1152 case Iop_QAdd16Ux4:
1153 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1154
sewardj5f438dd2011-06-16 11:36:23 +00001155 case Iop_QNarrowBin32Sto16Sx4:
1156 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
1157 case Iop_QNarrowBin16Sto8Sx8:
1158 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
1159 case Iop_QNarrowBin16Sto8Ux8:
1160 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
sewardjad2c9ea2011-10-22 09:32:16 +00001161 case Iop_NarrowBin16to8x8:
1162 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
1163 case Iop_NarrowBin32to16x4:
1164 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
sewardj8711f662005-05-09 17:52:56 +00001165
1166 case Iop_QSub8Sx8:
1167 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1168 case Iop_QSub16Sx4:
1169 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1170 case Iop_QSub8Ux8:
1171 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1172 case Iop_QSub16Ux4:
1173 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1174
1175 case Iop_Sub8x8:
1176 fn = (HWord)h_generic_calc_Sub8x8; break;
1177 case Iop_Sub16x4:
1178 fn = (HWord)h_generic_calc_Sub16x4; break;
1179 case Iop_Sub32x2:
1180 fn = (HWord)h_generic_calc_Sub32x2; break;
1181
1182 case Iop_ShlN32x2:
1183 fn = (HWord)h_generic_calc_ShlN32x2;
1184 second_is_UInt = True;
1185 break;
1186 case Iop_ShlN16x4:
1187 fn = (HWord)h_generic_calc_ShlN16x4;
1188 second_is_UInt = True;
1189 break;
sewardjd166e282008-02-06 11:42:45 +00001190 case Iop_ShlN8x8:
1191 fn = (HWord)h_generic_calc_ShlN8x8;
1192 second_is_UInt = True;
1193 break;
sewardj8711f662005-05-09 17:52:56 +00001194 case Iop_ShrN32x2:
1195 fn = (HWord)h_generic_calc_ShrN32x2;
1196 second_is_UInt = True;
1197 break;
1198 case Iop_ShrN16x4:
1199 fn = (HWord)h_generic_calc_ShrN16x4;
1200 second_is_UInt = True;
1201 break;
1202 case Iop_SarN32x2:
1203 fn = (HWord)h_generic_calc_SarN32x2;
1204 second_is_UInt = True;
1205 break;
1206 case Iop_SarN16x4:
1207 fn = (HWord)h_generic_calc_SarN16x4;
1208 second_is_UInt = True;
1209 break;
sewardj02f79f12007-09-01 18:59:53 +00001210 case Iop_SarN8x8:
1211 fn = (HWord)h_generic_calc_SarN8x8;
1212 second_is_UInt = True;
1213 break;
sewardj8711f662005-05-09 17:52:56 +00001214
1215 default:
1216 fn = (HWord)0; break;
1217 }
1218 if (fn != (HWord)0) {
1219 /* Note: the following assumes all helpers are of signature
1220 ULong fn ( ULong, ULong ), and they are
1221 not marked as regparm functions.
1222 */
1223 HReg dst = newVRegI(env);
1224 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1225 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1226 if (second_is_UInt)
sewardjca257bc2010-09-08 08:34:52 +00001227 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
sewardj8711f662005-05-09 17:52:56 +00001228 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1229 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
sewardj74142b82013-08-08 10:28:59 +00001230 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
1231 mk_RetLoc_simple(RLPri_Int) ));
sewardj8711f662005-05-09 17:52:56 +00001232 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1233 return dst;
1234 }
1235
sewardj7de0d3c2005-02-13 02:26:41 +00001236 /* Handle misc other ops. */
1237
sewardj478646f2008-05-01 20:13:04 +00001238 if (e->Iex.Binop.op == Iop_Max32U) {
sewardj9cc2bbf2011-06-05 17:56:03 +00001239 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1240 HReg dst = newVRegI(env);
1241 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1242 addInstr(env, mk_iMOVsd_RR(src1, dst));
1243 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
sewardje357c672015-01-27 23:35:58 +00001244 addInstr(env, AMD64Instr_CMov64(Acc_B, src2, dst));
sewardj478646f2008-05-01 20:13:04 +00001245 return dst;
1246 }
1247
sewardj7de0d3c2005-02-13 02:26:41 +00001248 if (e->Iex.Binop.op == Iop_DivModS64to32
1249 || e->Iex.Binop.op == Iop_DivModU64to32) {
1250 /* 64 x 32 -> (32(rem),32(div)) division */
1251 /* Get the 64-bit operand into edx:eax, and the other into
1252 any old R/M. */
1253 HReg rax = hregAMD64_RAX();
1254 HReg rdx = hregAMD64_RDX();
1255 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00001256 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +00001257 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +00001258 /* Compute the left operand into a reg, and then
1259 put the top half in edx and the bottom in eax. */
1260 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +00001261 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1262 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj501a3392005-05-11 15:37:50 +00001263 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001264 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardjca257bc2010-09-08 08:34:52 +00001265 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
1266 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
sewardj501a3392005-05-11 15:37:50 +00001267 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001268 addInstr(env, mk_iMOVsd_RR(rax, dst));
1269 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1270 return dst;
1271 }
1272
1273 if (e->Iex.Binop.op == Iop_32HLto64) {
1274 HReg hi32 = newVRegI(env);
1275 HReg lo32 = newVRegI(env);
1276 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1277 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1278 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1279 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
sewardj501a3392005-05-11 15:37:50 +00001280 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
sewardjca257bc2010-09-08 08:34:52 +00001281 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
sewardj7de0d3c2005-02-13 02:26:41 +00001282 addInstr(env, AMD64Instr_Alu64R(
1283 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1284 return hi32;
1285 }
1286
sewardj85520e42005-02-19 15:22:38 +00001287 if (e->Iex.Binop.op == Iop_16HLto32) {
1288 HReg hi16 = newVRegI(env);
1289 HReg lo16 = newVRegI(env);
1290 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1291 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1292 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1293 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
sewardj501a3392005-05-11 15:37:50 +00001294 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
sewardj85520e42005-02-19 15:22:38 +00001295 addInstr(env, AMD64Instr_Alu64R(
1296 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1297 addInstr(env, AMD64Instr_Alu64R(
1298 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1299 return hi16;
1300 }
sewardj7de0d3c2005-02-13 02:26:41 +00001301
sewardja64f8ad2005-04-24 00:26:37 +00001302 if (e->Iex.Binop.op == Iop_8HLto16) {
1303 HReg hi8 = newVRegI(env);
1304 HReg lo8 = newVRegI(env);
1305 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1306 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1307 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1308 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
sewardj501a3392005-05-11 15:37:50 +00001309 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
sewardja64f8ad2005-04-24 00:26:37 +00001310 addInstr(env, AMD64Instr_Alu64R(
1311 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1312 addInstr(env, AMD64Instr_Alu64R(
1313 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1314 return hi8;
1315 }
sewardj85520e42005-02-19 15:22:38 +00001316
1317 if (e->Iex.Binop.op == Iop_MullS32
1318 || e->Iex.Binop.op == Iop_MullS16
1319 || e->Iex.Binop.op == Iop_MullS8
1320 || e->Iex.Binop.op == Iop_MullU32
1321 || e->Iex.Binop.op == Iop_MullU16
1322 || e->Iex.Binop.op == Iop_MullU8) {
1323 HReg a32 = newVRegI(env);
1324 HReg b32 = newVRegI(env);
1325 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1326 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1327 Int shift = 0;
1328 AMD64ShiftOp shr_op = Ash_SHR;
1329 switch (e->Iex.Binop.op) {
1330 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1331 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1332 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1333 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1334 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1335 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1336 default: vassert(0);
1337 }
1338
1339 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1340 addInstr(env, mk_iMOVsd_RR(b32s, b32));
sewardj501a3392005-05-11 15:37:50 +00001341 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1342 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1343 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1344 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
sewardj85520e42005-02-19 15:22:38 +00001345 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1346 return b32;
1347 }
1348
sewardj18303862005-02-21 12:36:54 +00001349 if (e->Iex.Binop.op == Iop_CmpF64) {
1350 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1351 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1352 HReg dst = newVRegI(env);
1353 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1354 /* Mask out irrelevant parts of the result so as to conform
1355 to the CmpF64 definition. */
1356 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1357 return dst;
1358 }
1359
sewardj6c299f32009-12-31 18:00:12 +00001360 if (e->Iex.Binop.op == Iop_F64toI32S
1361 || e->Iex.Binop.op == Iop_F64toI64S) {
1362 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001363 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1364 HReg dst = newVRegI(env);
1365 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001366 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001367 set_SSE_rounding_default(env);
1368 return dst;
1369 }
1370
sewardj8258a8c2005-02-02 03:11:24 +00001371 break;
1372 }
1373
sewardjf67eadf2005-02-03 03:53:52 +00001374 /* --------- UNARY OP --------- */
1375 case Iex_Unop: {
sewardj7f039c42005-02-04 21:13:55 +00001376
sewardj176ad2f2005-04-27 11:55:08 +00001377 /* 1Uto8(64to1(expr64)) */
sewardjca257bc2010-09-08 08:34:52 +00001378 {
1379 DEFINE_PATTERN( p_1Uto8_64to1,
1380 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1381 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
1382 IRExpr* expr64 = mi.bindee[0];
1383 HReg dst = newVRegI(env);
1384 HReg src = iselIntExpr_R(env, expr64);
1385 addInstr(env, mk_iMOVsd_RR(src,dst) );
1386 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1387 AMD64RMI_Imm(1), dst));
sewardjf67eadf2005-02-03 03:53:52 +00001388 return dst;
1389 }
sewardjca257bc2010-09-08 08:34:52 +00001390 }
1391
1392 /* 8Uto64(LDle(expr64)) */
1393 {
1394 DEFINE_PATTERN(p_LDle8_then_8Uto64,
1395 unop(Iop_8Uto64,
1396 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1397 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
1398 HReg dst = newVRegI(env);
1399 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1400 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
1401 return dst;
1402 }
1403 }
1404
1405 /* 16Uto64(LDle(expr64)) */
1406 {
1407 DEFINE_PATTERN(p_LDle16_then_16Uto64,
1408 unop(Iop_16Uto64,
1409 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1410 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
1411 HReg dst = newVRegI(env);
1412 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1413 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
1414 return dst;
1415 }
1416 }
1417
sewardj9cc2bbf2011-06-05 17:56:03 +00001418 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
1419 Use 32 bit arithmetic and let the default zero-extend rule
1420 do the 32Uto64 for free. */
1421 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
1422 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
1423 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
1424 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
1425 AMD64AluOp aluOp = Aalu_INVALID;
1426 switch (opi) {
1427 case Iop_Add32: aluOp = Aalu_ADD; break;
1428 case Iop_Sub32: aluOp = Aalu_SUB; break;
1429 case Iop_And32: aluOp = Aalu_AND; break;
1430 case Iop_Or32: aluOp = Aalu_OR; break;
1431 case Iop_Xor32: aluOp = Aalu_XOR; break;
1432 default: break;
1433 }
1434 if (aluOp != Aalu_INVALID) {
1435 /* For commutative ops we assume any literal values are on
1436 the second operand. */
1437 HReg dst = newVRegI(env);
1438 HReg reg = iselIntExpr_R(env, argL);
1439 AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
1440 addInstr(env, mk_iMOVsd_RR(reg,dst));
1441 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
1442 return dst;
1443 }
1444 /* just fall through to normal handling for Iop_32Uto64 */
1445 }
1446
1447 /* Fallback cases */
sewardjca257bc2010-09-08 08:34:52 +00001448 switch (e->Iex.Unop.op) {
1449 case Iop_32Uto64:
sewardj05b3b6a2005-02-04 01:44:33 +00001450 case Iop_32Sto64: {
1451 HReg dst = newVRegI(env);
1452 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjca257bc2010-09-08 08:34:52 +00001453 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
1454 src, dst) );
sewardj05b3b6a2005-02-04 01:44:33 +00001455 return dst;
1456 }
sewardj9b967672005-02-08 11:13:09 +00001457 case Iop_128HIto64: {
1458 HReg rHi, rLo;
1459 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1460 return rHi; /* and abandon rLo */
1461 }
1462 case Iop_128to64: {
1463 HReg rHi, rLo;
1464 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1465 return rLo; /* and abandon rHi */
1466 }
sewardj85520e42005-02-19 15:22:38 +00001467 case Iop_8Uto16:
sewardjec93f982005-06-21 13:51:18 +00001468 case Iop_8Uto32:
sewardj176ad2f2005-04-27 11:55:08 +00001469 case Iop_8Uto64:
1470 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001471 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001472 HReg dst = newVRegI(env);
1473 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001474 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1475 || e->Iex.Unop.op==Iop_16Uto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001476 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001477 addInstr(env, mk_iMOVsd_RR(src,dst) );
1478 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1479 AMD64RMI_Imm(mask), dst));
1480 return dst;
1481 }
sewardj85520e42005-02-19 15:22:38 +00001482 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001483 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001484 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001485 case Iop_16Sto32:
1486 case Iop_16Sto64: {
1487 HReg dst = newVRegI(env);
1488 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001489 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1490 || e->Iex.Unop.op==Iop_16Sto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001491 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001492 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001493 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1494 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj486074e2005-02-08 20:10:04 +00001495 return dst;
1496 }
sewardj85520e42005-02-19 15:22:38 +00001497 case Iop_Not8:
1498 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001499 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001500 case Iop_Not64: {
1501 HReg dst = newVRegI(env);
1502 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1503 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001504 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
sewardjd0a12df2005-02-10 02:07:43 +00001505 return dst;
1506 }
de5a70f5c2010-04-01 23:08:59 +00001507 case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001508 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001509 case Iop_64HIto32: {
1510 HReg dst = newVRegI(env);
1511 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1512 Int shift = 0;
1513 switch (e->Iex.Unop.op) {
sewardj9ba870d2010-04-02 11:29:23 +00001514 case Iop_16HIto8: shift = 8; break;
sewardj85520e42005-02-19 15:22:38 +00001515 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001516 case Iop_64HIto32: shift = 32; break;
1517 default: vassert(0);
1518 }
1519 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001520 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
sewardj7de0d3c2005-02-13 02:26:41 +00001521 return dst;
1522 }
sewardj176ad2f2005-04-27 11:55:08 +00001523 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001524 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001525 case Iop_1Uto8: {
1526 HReg dst = newVRegI(env);
1527 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1528 addInstr(env, AMD64Instr_Set64(cond,dst));
1529 return dst;
1530 }
sewardja64f8ad2005-04-24 00:26:37 +00001531 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001532 case Iop_1Sto16:
1533 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001534 case Iop_1Sto64: {
1535 /* could do better than this, but for now ... */
1536 HReg dst = newVRegI(env);
1537 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1538 addInstr(env, AMD64Instr_Set64(cond,dst));
sewardj501a3392005-05-11 15:37:50 +00001539 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1540 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
sewardj42322b52005-04-20 22:57:11 +00001541 return dst;
1542 }
sewardjf53b7352005-04-06 20:01:56 +00001543 case Iop_Ctz64: {
1544 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1545 HReg dst = newVRegI(env);
1546 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1547 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1548 return dst;
1549 }
sewardj537cab02005-04-07 02:03:52 +00001550 case Iop_Clz64: {
1551 /* Count leading zeroes. Do 'bsrq' to establish the index
1552 of the highest set bit, and subtract that value from
1553 63. */
1554 HReg tmp = newVRegI(env);
1555 HReg dst = newVRegI(env);
1556 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1557 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1558 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1559 AMD64RMI_Imm(63), dst));
1560 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1561 AMD64RMI_Reg(tmp), dst));
1562 return dst;
1563 }
sewardjeb17e492007-08-25 23:07:44 +00001564
1565 case Iop_CmpwNEZ64: {
sewardj176ad2f2005-04-27 11:55:08 +00001566 HReg dst = newVRegI(env);
sewardjeb17e492007-08-25 23:07:44 +00001567 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1568 addInstr(env, mk_iMOVsd_RR(src,dst));
sewardj501a3392005-05-11 15:37:50 +00001569 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
sewardjeb17e492007-08-25 23:07:44 +00001570 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1571 AMD64RMI_Reg(src), dst));
1572 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1573 return dst;
1574 }
1575
1576 case Iop_CmpwNEZ32: {
1577 HReg src = newVRegI(env);
1578 HReg dst = newVRegI(env);
1579 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1580 addInstr(env, mk_iMOVsd_RR(pre,src));
sewardjca257bc2010-09-08 08:34:52 +00001581 addInstr(env, AMD64Instr_MovxLQ(False, src, src));
sewardjeb17e492007-08-25 23:07:44 +00001582 addInstr(env, mk_iMOVsd_RR(src,dst));
1583 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1584 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1585 AMD64RMI_Reg(src), dst));
1586 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1587 return dst;
1588 }
1589
1590 case Iop_Left8:
1591 case Iop_Left16:
1592 case Iop_Left32:
1593 case Iop_Left64: {
1594 HReg dst = newVRegI(env);
1595 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1596 addInstr(env, mk_iMOVsd_RR(src, dst));
1597 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1598 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
sewardj176ad2f2005-04-27 11:55:08 +00001599 return dst;
1600 }
sewardj537cab02005-04-07 02:03:52 +00001601
sewardj478fe702005-04-23 01:15:47 +00001602 case Iop_V128to32: {
1603 HReg dst = newVRegI(env);
1604 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1605 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1606 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1607 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1608 return dst;
1609 }
sewardj1a01e652005-02-23 11:39:21 +00001610
1611 /* V128{HI}to64 */
1612 case Iop_V128HIto64:
1613 case Iop_V128to64: {
sewardj1a01e652005-02-23 11:39:21 +00001614 HReg dst = newVRegI(env);
sewardjc4530ae2012-05-21 10:18:49 +00001615 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16;
1616 HReg rsp = hregAMD64_RSP();
sewardj1a01e652005-02-23 11:39:21 +00001617 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
sewardjc4530ae2012-05-21 10:18:49 +00001618 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1619 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1620 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1621 16, vec, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00001622 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
sewardjc4530ae2012-05-21 10:18:49 +00001623 AMD64RMI_Mem(off_rsp), dst ));
1624 return dst;
1625 }
1626
1627 case Iop_V256to64_0: case Iop_V256to64_1:
1628 case Iop_V256to64_2: case Iop_V256to64_3: {
1629 HReg vHi, vLo, vec;
1630 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
1631 /* Do the first part of the selection by deciding which of
1632 the 128 bit registers do look at, and second part using
1633 the same scheme as for V128{HI}to64 above. */
1634 Int off = 0;
1635 switch (e->Iex.Unop.op) {
1636 case Iop_V256to64_0: vec = vLo; off = -16; break;
1637 case Iop_V256to64_1: vec = vLo; off = -8; break;
1638 case Iop_V256to64_2: vec = vHi; off = -16; break;
1639 case Iop_V256to64_3: vec = vHi; off = -8; break;
1640 default: vassert(0);
1641 }
1642 HReg dst = newVRegI(env);
1643 HReg rsp = hregAMD64_RSP();
1644 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1645 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1646 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1647 16, vec, m16_rsp));
1648 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1649 AMD64RMI_Mem(off_rsp), dst ));
sewardj1a01e652005-02-23 11:39:21 +00001650 return dst;
1651 }
1652
sewardj924215b2005-03-26 21:50:31 +00001653 /* ReinterpF64asI64(e) */
1654 /* Given an IEEE754 double, produce an I64 with the same bit
1655 pattern. */
1656 case Iop_ReinterpF64asI64: {
1657 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1658 HReg dst = newVRegI(env);
1659 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1660 /* paranoia */
1661 set_SSE_rounding_default(env);
1662 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1663 addInstr(env, AMD64Instr_Alu64R(
1664 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1665 return dst;
1666 }
1667
sewardj79501112008-07-29 09:48:26 +00001668 /* ReinterpF32asI32(e) */
1669 /* Given an IEEE754 single, produce an I64 with the same bit
1670 pattern in the lower half. */
1671 case Iop_ReinterpF32asI32: {
1672 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1673 HReg dst = newVRegI(env);
1674 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1675 /* paranoia */
1676 set_SSE_rounding_default(env);
1677 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1678 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1679 return dst;
1680 }
1681
sewardj85520e42005-02-19 15:22:38 +00001682 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001683 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001684 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001685 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001686 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001687 case Iop_64to32:
1688 /* These are no-ops. */
1689 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001690
sewardje13074c2012-11-08 10:57:08 +00001691 case Iop_GetMSBs8x8: {
1692 /* Note: the following assumes the helper is of
1693 signature
1694 UInt fn ( ULong ), and is not a regparm fn.
1695 */
1696 HReg dst = newVRegI(env);
1697 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1698 fn = (HWord)h_generic_calc_GetMSBs8x8;
1699 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardjcfe046e2013-01-17 14:23:53 +00001700 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001701 1, mk_RetLoc_simple(RLPri_Int) ));
sewardje13074c2012-11-08 10:57:08 +00001702 /* MovxLQ is not exactly the right thing here. We just
1703 need to get the bottom 8 bits of RAX into dst, and zero
1704 out everything else. Assuming that the helper returns
1705 a UInt with the top 24 bits zeroed out, it'll do,
1706 though. */
1707 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1708 return dst;
1709 }
1710
sewardj78a20592012-12-13 18:29:56 +00001711 case Iop_GetMSBs8x16: {
1712 /* Note: the following assumes the helper is of signature
1713 UInt fn ( ULong w64hi, ULong w64Lo ),
1714 and is not a regparm fn. */
1715 HReg dst = newVRegI(env);
1716 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1717 HReg rsp = hregAMD64_RSP();
1718 fn = (HWord)h_generic_calc_GetMSBs8x16;
1719 AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp);
1720 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1721 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1722 16, vec, m16_rsp));
1723 /* hi 64 bits into RDI -- the first arg */
1724 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1725 AMD64RMI_Mem(m8_rsp),
1726 hregAMD64_RDI() )); /* 1st arg */
1727 /* lo 64 bits into RSI -- the 2nd arg */
1728 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1729 AMD64RMI_Mem(m16_rsp),
1730 hregAMD64_RSI() )); /* 2nd arg */
sewardjcfe046e2013-01-17 14:23:53 +00001731 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001732 2, mk_RetLoc_simple(RLPri_Int) ));
sewardj78a20592012-12-13 18:29:56 +00001733 /* MovxLQ is not exactly the right thing here. We just
sewardj9213c612012-12-19 08:39:11 +00001734 need to get the bottom 16 bits of RAX into dst, and zero
sewardj78a20592012-12-13 18:29:56 +00001735 out everything else. Assuming that the helper returns
sewardj9213c612012-12-19 08:39:11 +00001736 a UInt with the top 16 bits zeroed out, it'll do,
sewardj78a20592012-12-13 18:29:56 +00001737 though. */
1738 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1739 return dst;
1740 }
1741
sewardjf67eadf2005-02-03 03:53:52 +00001742 default:
1743 break;
1744 }
sewardje7905662005-05-09 18:15:21 +00001745
1746 /* Deal with unary 64-bit SIMD ops. */
1747 switch (e->Iex.Unop.op) {
1748 case Iop_CmpNEZ32x2:
1749 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1750 case Iop_CmpNEZ16x4:
1751 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1752 case Iop_CmpNEZ8x8:
1753 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1754 default:
1755 fn = (HWord)0; break;
1756 }
1757 if (fn != (HWord)0) {
1758 /* Note: the following assumes all helpers are of
1759 signature
1760 ULong fn ( ULong ), and they are
1761 not marked as regparm functions.
1762 */
1763 HReg dst = newVRegI(env);
1764 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1765 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardj74142b82013-08-08 10:28:59 +00001766 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
1767 mk_RetLoc_simple(RLPri_Int) ));
sewardje7905662005-05-09 18:15:21 +00001768 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1769 return dst;
1770 }
1771
sewardjf67eadf2005-02-03 03:53:52 +00001772 break;
1773 }
sewardj8258a8c2005-02-02 03:11:24 +00001774
1775 /* --------- GET --------- */
1776 case Iex_Get: {
1777 if (ty == Ity_I64) {
1778 HReg dst = newVRegI(env);
1779 addInstr(env, AMD64Instr_Alu64R(
1780 Aalu_MOV,
1781 AMD64RMI_Mem(
1782 AMD64AMode_IR(e->Iex.Get.offset,
1783 hregAMD64_RBP())),
1784 dst));
1785 return dst;
1786 }
1787 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1788 HReg dst = newVRegI(env);
1789 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001790 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001791 False,
1792 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1793 dst));
1794 return dst;
1795 }
1796 break;
1797 }
1798
sewardj8d965312005-02-25 02:48:47 +00001799 case Iex_GetI: {
1800 AMD64AMode* am
1801 = genGuestArrayOffset(
1802 env, e->Iex.GetI.descr,
1803 e->Iex.GetI.ix, e->Iex.GetI.bias );
1804 HReg dst = newVRegI(env);
1805 if (ty == Ity_I8) {
1806 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1807 return dst;
1808 }
sewardj1e015d82005-04-23 23:41:46 +00001809 if (ty == Ity_I64) {
1810 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1811 return dst;
1812 }
sewardj8d965312005-02-25 02:48:47 +00001813 break;
1814 }
sewardj05b3b6a2005-02-04 01:44:33 +00001815
1816 /* --------- CCALL --------- */
1817 case Iex_CCall: {
1818 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001819 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001820
sewardjcfe046e2013-01-17 14:23:53 +00001821 /* be very restrictive for now. Only 64-bit ints allowed for
sewardj74142b82013-08-08 10:28:59 +00001822 args, and 64 or 32 bits for return type. */
sewardje8aaa872005-07-07 13:12:04 +00001823 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
sewardj05b3b6a2005-02-04 01:44:33 +00001824 goto irreducible;
1825
sewardj7f039c42005-02-04 21:13:55 +00001826 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00001827 UInt addToSp = 0;
1828 RetLoc rloc = mk_RetLoc_INVALID();
1829 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1830 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1831 vassert(is_sane_RetLoc(rloc));
1832 vassert(rloc.pri == RLPri_Int);
1833 vassert(addToSp == 0);
sewardj05b3b6a2005-02-04 01:44:33 +00001834
sewardje8aaa872005-07-07 13:12:04 +00001835 /* Move to dst, and zero out the top 32 bits if the result type is
1836 Ity_I32. Probably overkill, but still .. */
1837 if (e->Iex.CCall.retty == Ity_I64)
1838 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1839 else
sewardjca257bc2010-09-08 08:34:52 +00001840 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
sewardje8aaa872005-07-07 13:12:04 +00001841
sewardj05b3b6a2005-02-04 01:44:33 +00001842 return dst;
1843 }
1844
sewardj7f039c42005-02-04 21:13:55 +00001845 /* --------- LITERAL --------- */
1846 /* 64/32/16/8-bit literals */
1847 case Iex_Const:
1848 if (ty == Ity_I64) {
1849 HReg r = newVRegI(env);
1850 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1851 return r;
1852 } else {
1853 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1854 HReg r = newVRegI(env);
1855 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1856 return r;
1857 }
sewardj05b3b6a2005-02-04 01:44:33 +00001858
1859 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00001860 case Iex_ITE: { // VFD
sewardj009230b2013-01-26 11:47:55 +00001861 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
florian99dd03e2013-01-29 03:56:06 +00001862 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1863 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
sewardje357c672015-01-27 23:35:58 +00001864 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
sewardj009230b2013-01-26 11:47:55 +00001865 HReg dst = newVRegI(env);
florian99dd03e2013-01-29 03:56:06 +00001866 addInstr(env, mk_iMOVsd_RR(r1,dst));
1867 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00001868 addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst));
1869 return dst;
sewardj05b3b6a2005-02-04 01:44:33 +00001870 }
1871 break;
1872 }
sewardj8258a8c2005-02-02 03:11:24 +00001873
sewardjf4c803b2006-09-11 11:07:34 +00001874 /* --------- TERNARY OP --------- */
1875 case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001876 IRTriop *triop = e->Iex.Triop.details;
sewardjf4c803b2006-09-11 11:07:34 +00001877 /* C3210 flags following FPU partial remainder (fprem), both
1878 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001879 if (triop->op == Iop_PRemC3210F64
1880 || triop->op == Iop_PRem1C3210F64) {
sewardjf4c803b2006-09-11 11:07:34 +00001881 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00001882 HReg arg1 = iselDblExpr(env, triop->arg2);
1883 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardjf4c803b2006-09-11 11:07:34 +00001884 HReg dst = newVRegI(env);
1885 addInstr(env, AMD64Instr_A87Free(2));
1886
1887 /* one arg -> top of x87 stack */
1888 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001889 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001890
1891 /* other arg -> top of x87 stack */
1892 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001893 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001894
florian420bfa92012-06-02 20:29:22 +00001895 switch (triop->op) {
sewardjf4c803b2006-09-11 11:07:34 +00001896 case Iop_PRemC3210F64:
1897 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1898 break;
sewardj4970e4e2008-10-11 10:07:55 +00001899 case Iop_PRem1C3210F64:
1900 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1901 break;
sewardjf4c803b2006-09-11 11:07:34 +00001902 default:
1903 vassert(0);
1904 }
1905 /* Ignore the result, and instead make off with the FPU's
1906 C3210 flags (in the status word). */
1907 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1908 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1909 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1910 return dst;
1911 }
1912 break;
1913 }
1914
sewardj8258a8c2005-02-02 03:11:24 +00001915 default:
1916 break;
1917 } /* switch (e->tag) */
1918
1919 /* We get here if no pattern matched. */
1920 irreducible:
1921 ppIRExpr(e);
1922 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1923}
sewardj614b3fb2005-02-02 02:16:03 +00001924
1925
1926/*---------------------------------------------------------*/
1927/*--- ISEL: Integer expression auxiliaries ---*/
1928/*---------------------------------------------------------*/
1929
1930/* --------------------- AMODEs --------------------- */
1931
1932/* Return an AMode which computes the value of the specified
1933 expression, possibly also adding insns to the code list as a
1934 result. The expression may only be a 32-bit one.
1935*/
1936
sewardj8258a8c2005-02-02 03:11:24 +00001937static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1938{
1939 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1940 vassert(sane_AMode(am));
1941 return am;
1942}
1943
1944/* DO NOT CALL THIS DIRECTLY ! */
1945static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1946{
sewardj05b3b6a2005-02-04 01:44:33 +00001947 MatchInfo mi;
1948 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001949 IRType ty = typeOfIRExpr(env->type_env,e);
1950 vassert(ty == Ity_I64);
1951
sewardj05b3b6a2005-02-04 01:44:33 +00001952 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1953 /* bind0 bind1 bind2 bind3 */
1954 DEFINE_PATTERN(p_complex,
1955 binop( Iop_Add64,
1956 binop( Iop_Add64,
1957 bind(0),
1958 binop(Iop_Shl64, bind(1), bind(2))
1959 ),
1960 bind(3)
1961 )
1962 );
1963 if (matchIRExpr(&mi, p_complex, e)) {
1964 IRExpr* expr1 = mi.bindee[0];
1965 IRExpr* expr2 = mi.bindee[1];
1966 IRExpr* imm8 = mi.bindee[2];
1967 IRExpr* simm32 = mi.bindee[3];
1968 if (imm8->tag == Iex_Const
1969 && imm8->Iex.Const.con->tag == Ico_U8
1970 && imm8->Iex.Const.con->Ico.U8 < 4
1971 /* imm8 is OK, now check simm32 */
1972 && simm32->tag == Iex_Const
1973 && simm32->Iex.Const.con->tag == Ico_U64
1974 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1975 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001976 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001977 HReg r1 = iselIntExpr_R(env, expr1);
1978 HReg r2 = iselIntExpr_R(env, expr2);
1979 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1980 return AMD64AMode_IRRS(offset, r1, r2, shift);
1981 }
1982 }
1983
sewardj8258a8c2005-02-02 03:11:24 +00001984 /* Add64(expr1, Shl64(expr2, imm)) */
1985 if (e->tag == Iex_Binop
1986 && e->Iex.Binop.op == Iop_Add64
1987 && e->Iex.Binop.arg2->tag == Iex_Binop
1988 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1989 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1990 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1991 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1992 if (shift == 1 || shift == 2 || shift == 3) {
1993 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1994 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1995 return AMD64AMode_IRRS(0, r1, r2, shift);
1996 }
1997 }
1998
1999 /* Add64(expr,i) */
2000 if (e->tag == Iex_Binop
2001 && e->Iex.Binop.op == Iop_Add64
2002 && e->Iex.Binop.arg2->tag == Iex_Const
2003 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2004 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
2005 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2006 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00002007 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00002008 r1
2009 );
2010 }
2011
2012 /* Doesn't match anything in particular. Generate it into
2013 a register and use that. */
2014 {
2015 HReg r1 = iselIntExpr_R(env, e);
2016 return AMD64AMode_IR(0, r1);
2017 }
2018}
sewardj614b3fb2005-02-02 02:16:03 +00002019
2020
2021/* --------------------- RMIs --------------------- */
2022
2023/* Similarly, calculate an expression into an X86RMI operand. As with
2024 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
2025
2026static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
2027{
2028 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
2029 /* sanity checks ... */
2030 switch (rmi->tag) {
2031 case Armi_Imm:
2032 return rmi;
2033 case Armi_Reg:
2034 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
2035 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
2036 return rmi;
2037 case Armi_Mem:
2038 vassert(sane_AMode(rmi->Armi.Mem.am));
2039 return rmi;
2040 default:
2041 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
2042 }
2043}
2044
2045/* DO NOT CALL THIS DIRECTLY ! */
2046static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
2047{
2048 IRType ty = typeOfIRExpr(env->type_env,e);
2049 vassert(ty == Ity_I64 || ty == Ity_I32
2050 || ty == Ity_I16 || ty == Ity_I8);
2051
2052 /* special case: immediate 64/32/16/8 */
2053 if (e->tag == Iex_Const) {
2054 switch (e->Iex.Const.con->tag) {
2055 case Ico_U64:
2056 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002057 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00002058 }
2059 break;
2060 case Ico_U32:
2061 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
2062 case Ico_U16:
2063 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
2064 case Ico_U8:
2065 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
2066 default:
2067 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2068 }
2069 }
2070
2071 /* special case: 64-bit GET */
2072 if (e->tag == Iex_Get && ty == Ity_I64) {
2073 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2074 hregAMD64_RBP()));
2075 }
2076
sewardj0852a132005-02-21 08:28:46 +00002077 /* special case: 64-bit load from memory */
sewardje9d8a262009-07-01 08:06:34 +00002078 if (e->tag == Iex_Load && ty == Ity_I64
sewardje768e922009-11-26 17:17:37 +00002079 && e->Iex.Load.end == Iend_LE) {
sewardjaf1ceca2005-06-30 23:31:27 +00002080 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj0852a132005-02-21 08:28:46 +00002081 return AMD64RMI_Mem(am);
2082 }
sewardj614b3fb2005-02-02 02:16:03 +00002083
2084 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00002085 {
2086 HReg r = iselIntExpr_R ( env, e );
2087 return AMD64RMI_Reg(r);
2088 }
sewardj614b3fb2005-02-02 02:16:03 +00002089}
2090
2091
sewardjf67eadf2005-02-03 03:53:52 +00002092/* --------------------- RIs --------------------- */
2093
2094/* Calculate an expression into an AMD64RI operand. As with
2095 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2096 bits. */
2097
2098static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
2099{
2100 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
2101 /* sanity checks ... */
2102 switch (ri->tag) {
2103 case Ari_Imm:
2104 return ri;
sewardj80d6e6d2008-05-28 09:40:29 +00002105 case Ari_Reg:
sewardjf67eadf2005-02-03 03:53:52 +00002106 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
2107 vassert(hregIsVirtual(ri->Ari.Reg.reg));
2108 return ri;
2109 default:
2110 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2111 }
2112}
2113
2114/* DO NOT CALL THIS DIRECTLY ! */
2115static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2116{
2117 IRType ty = typeOfIRExpr(env->type_env,e);
2118 vassert(ty == Ity_I64 || ty == Ity_I32
2119 || ty == Ity_I16 || ty == Ity_I8);
2120
2121 /* special case: immediate */
2122 if (e->tag == Iex_Const) {
2123 switch (e->Iex.Const.con->tag) {
2124 case Ico_U64:
2125 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002126 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00002127 }
2128 break;
2129 case Ico_U32:
2130 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2131 case Ico_U16:
2132 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2133 case Ico_U8:
2134 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2135 default:
2136 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2137 }
2138 }
2139
2140 /* default case: calculate into a register and return that */
2141 {
2142 HReg r = iselIntExpr_R ( env, e );
2143 return AMD64RI_Reg(r);
2144 }
2145}
2146
2147
sewardj05b3b6a2005-02-04 01:44:33 +00002148/* --------------------- RMs --------------------- */
2149
2150/* Similarly, calculate an expression into an AMD64RM operand. As
2151 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2152 bits. */
2153
2154static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2155{
2156 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2157 /* sanity checks ... */
2158 switch (rm->tag) {
2159 case Arm_Reg:
2160 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2161 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2162 return rm;
2163 case Arm_Mem:
2164 vassert(sane_AMode(rm->Arm.Mem.am));
2165 return rm;
2166 default:
2167 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2168 }
2169}
2170
2171/* DO NOT CALL THIS DIRECTLY ! */
2172static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2173{
2174 IRType ty = typeOfIRExpr(env->type_env,e);
2175 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2176
2177 /* special case: 64-bit GET */
2178 if (e->tag == Iex_Get && ty == Ity_I64) {
2179 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2180 hregAMD64_RBP()));
2181 }
2182
2183 /* special case: load from memory */
2184
2185 /* default case: calculate into a register and return that */
2186 {
2187 HReg r = iselIntExpr_R ( env, e );
2188 return AMD64RM_Reg(r);
2189 }
2190}
2191
2192
2193/* --------------------- CONDCODE --------------------- */
2194
2195/* Generate code to evaluated a bit-typed expression, returning the
2196 condition code which would correspond when the expression would
2197 notionally have returned 1. */
2198
2199static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2200{
2201 /* Uh, there's nothing we can sanity check here, unfortunately. */
2202 return iselCondCode_wrk(env,e);
2203}
2204
2205/* DO NOT CALL THIS DIRECTLY ! */
2206static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2207{
sewardjf8c37f72005-02-07 18:55:29 +00002208 MatchInfo mi;
sewardj0af46ab2005-04-26 01:52:29 +00002209
sewardj05b3b6a2005-02-04 01:44:33 +00002210 vassert(e);
2211 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2212
sewardj176ad2f2005-04-27 11:55:08 +00002213 /* var */
sewardjdd40fdf2006-12-24 02:20:24 +00002214 if (e->tag == Iex_RdTmp) {
2215 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj176ad2f2005-04-27 11:55:08 +00002216 HReg dst = newVRegI(env);
2217 addInstr(env, mk_iMOVsd_RR(r64,dst));
2218 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2219 return Acc_NZ;
2220 }
2221
sewardj109e9352005-07-19 08:42:56 +00002222 /* Constant 1:Bit */
2223 if (e->tag == Iex_Const) {
2224 HReg r;
2225 vassert(e->Iex.Const.con->tag == Ico_U1);
2226 vassert(e->Iex.Const.con->Ico.U1 == True
2227 || e->Iex.Const.con->Ico.U1 == False);
2228 r = newVRegI(env);
2229 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2230 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2231 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2232 }
sewardj486074e2005-02-08 20:10:04 +00002233
2234 /* Not1(...) */
2235 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2236 /* Generate code for the arg, and negate the test condition */
2237 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2238 }
2239
sewardj176ad2f2005-04-27 11:55:08 +00002240 /* --- patterns rooted at: 64to1 --- */
2241
sewardj176ad2f2005-04-27 11:55:08 +00002242 /* 64to1 */
2243 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
sewardj501a3392005-05-11 15:37:50 +00002244 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2245 addInstr(env, AMD64Instr_Test64(1,reg));
sewardjf8c37f72005-02-07 18:55:29 +00002246 return Acc_NZ;
2247 }
2248
florianc862f282012-07-19 17:22:33 +00002249 /* --- patterns rooted at: 32to1 --- */
2250
2251 /* 32to1 */
2252 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
2253 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2254 addInstr(env, AMD64Instr_Test64(1,reg));
2255 return Acc_NZ;
2256 }
2257
sewardj176ad2f2005-04-27 11:55:08 +00002258 /* --- patterns rooted at: CmpNEZ8 --- */
2259
2260 /* CmpNEZ8(x) */
2261 if (e->tag == Iex_Unop
2262 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2263 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002264 addInstr(env, AMD64Instr_Test64(0xFF,r));
sewardj176ad2f2005-04-27 11:55:08 +00002265 return Acc_NZ;
2266 }
2267
sewardj86ec28b2005-04-27 13:39:35 +00002268 /* --- patterns rooted at: CmpNEZ16 --- */
2269
2270 /* CmpNEZ16(x) */
2271 if (e->tag == Iex_Unop
2272 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2273 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002274 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
sewardj86ec28b2005-04-27 13:39:35 +00002275 return Acc_NZ;
2276 }
2277
sewardj176ad2f2005-04-27 11:55:08 +00002278 /* --- patterns rooted at: CmpNEZ32 --- */
2279
2280 /* CmpNEZ32(x) */
2281 if (e->tag == Iex_Unop
2282 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2283 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj176ad2f2005-04-27 11:55:08 +00002284 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
sewardj9cc2bbf2011-06-05 17:56:03 +00002285 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
sewardj176ad2f2005-04-27 11:55:08 +00002286 return Acc_NZ;
2287 }
2288
2289 /* --- patterns rooted at: CmpNEZ64 --- */
2290
sewardj0bc78ab2005-05-11 22:47:32 +00002291 /* CmpNEZ64(Or64(x,y)) */
2292 {
2293 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2294 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2295 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2296 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2297 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2298 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2299 HReg tmp = newVRegI(env);
2300 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2301 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2302 return Acc_NZ;
2303 }
2304 }
2305
sewardj176ad2f2005-04-27 11:55:08 +00002306 /* CmpNEZ64(x) */
2307 if (e->tag == Iex_Unop
2308 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2309 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2310 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2311 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2312 return Acc_NZ;
2313 }
2314
2315 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2316
sewardj42322b52005-04-20 22:57:11 +00002317 /* CmpEQ8 / CmpNE8 */
2318 if (e->tag == Iex_Binop
2319 && (e->Iex.Binop.op == Iop_CmpEQ8
sewardj1fb8c922009-07-12 12:56:53 +00002320 || e->Iex.Binop.op == Iop_CmpNE8
2321 || e->Iex.Binop.op == Iop_CasCmpEQ8
2322 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
sewardj009230b2013-01-26 11:47:55 +00002323 if (isZeroU8(e->Iex.Binop.arg2)) {
2324 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2325 addInstr(env, AMD64Instr_Test64(0xFF,r1));
2326 switch (e->Iex.Binop.op) {
2327 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2328 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2329 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
2330 }
2331 } else {
2332 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2333 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2334 HReg r = newVRegI(env);
2335 addInstr(env, mk_iMOVsd_RR(r1,r));
2336 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2337 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2338 switch (e->Iex.Binop.op) {
2339 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2340 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2341 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
2342 }
sewardj42322b52005-04-20 22:57:11 +00002343 }
2344 }
2345
sewardj0af46ab2005-04-26 01:52:29 +00002346 /* CmpEQ16 / CmpNE16 */
2347 if (e->tag == Iex_Binop
2348 && (e->Iex.Binop.op == Iop_CmpEQ16
sewardj1fb8c922009-07-12 12:56:53 +00002349 || e->Iex.Binop.op == Iop_CmpNE16
2350 || e->Iex.Binop.op == Iop_CasCmpEQ16
2351 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
sewardj0af46ab2005-04-26 01:52:29 +00002352 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2353 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2354 HReg r = newVRegI(env);
2355 addInstr(env, mk_iMOVsd_RR(r1,r));
2356 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2357 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2358 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002359 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2360 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002361 default: vpanic("iselCondCode(amd64): CmpXX16");
2362 }
2363 }
2364
sewardj50d89bf2011-01-10 15:10:48 +00002365 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
2366 Saves a "movq %rax, %tmp" compared to the default route. */
2367 if (e->tag == Iex_Binop
2368 && e->Iex.Binop.op == Iop_CmpNE64
2369 && e->Iex.Binop.arg1->tag == Iex_CCall
2370 && e->Iex.Binop.arg2->tag == Iex_Const) {
2371 IRExpr* cal = e->Iex.Binop.arg1;
2372 IRExpr* con = e->Iex.Binop.arg2;
2373 HReg tmp = newVRegI(env);
2374 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
2375 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
2376 vassert(con->Iex.Const.con->tag == Ico_U64);
2377 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00002378 UInt addToSp = 0;
2379 RetLoc rloc = mk_RetLoc_INVALID();
2380 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2381 cal->Iex.CCall.cee,
2382 cal->Iex.CCall.retty, cal->Iex.CCall.args );
2383 vassert(is_sane_RetLoc(rloc));
2384 vassert(rloc.pri == RLPri_Int);
2385 vassert(addToSp == 0);
2386 /* */
sewardj50d89bf2011-01-10 15:10:48 +00002387 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
2388 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
2389 AMD64RMI_Reg(hregAMD64_RAX()), tmp));
2390 return Acc_NZ;
2391 }
2392
sewardjd0a12df2005-02-10 02:07:43 +00002393 /* Cmp*64*(x,y) */
2394 if (e->tag == Iex_Binop
2395 && (e->Iex.Binop.op == Iop_CmpEQ64
2396 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00002397 || e->Iex.Binop.op == Iop_CmpLT64S
2398 || e->Iex.Binop.op == Iop_CmpLT64U
2399 || e->Iex.Binop.op == Iop_CmpLE64S
sewardja9e4a802005-12-26 19:33:55 +00002400 || e->Iex.Binop.op == Iop_CmpLE64U
sewardj1fb8c922009-07-12 12:56:53 +00002401 || e->Iex.Binop.op == Iop_CasCmpEQ64
sewardje13074c2012-11-08 10:57:08 +00002402 || e->Iex.Binop.op == Iop_CasCmpNE64
2403 || e->Iex.Binop.op == Iop_ExpCmpNE64)) {
sewardjd0a12df2005-02-10 02:07:43 +00002404 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2405 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2406 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2407 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002408 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
sewardje13074c2012-11-08 10:57:08 +00002409 case Iop_CmpNE64:
2410 case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002411 case Iop_CmpLT64S: return Acc_L;
2412 case Iop_CmpLT64U: return Acc_B;
2413 case Iop_CmpLE64S: return Acc_LE;
sewardja9e4a802005-12-26 19:33:55 +00002414 case Iop_CmpLE64U: return Acc_BE;
sewardjd0a12df2005-02-10 02:07:43 +00002415 default: vpanic("iselCondCode(amd64): CmpXX64");
2416 }
2417 }
2418
sewardj9cc2bbf2011-06-05 17:56:03 +00002419 /* Cmp*32*(x,y) */
2420 if (e->tag == Iex_Binop
2421 && (e->Iex.Binop.op == Iop_CmpEQ32
2422 || e->Iex.Binop.op == Iop_CmpNE32
2423 || e->Iex.Binop.op == Iop_CmpLT32S
2424 || e->Iex.Binop.op == Iop_CmpLT32U
2425 || e->Iex.Binop.op == Iop_CmpLE32S
2426 || e->Iex.Binop.op == Iop_CmpLE32U
2427 || e->Iex.Binop.op == Iop_CasCmpEQ32
sewardj009230b2013-01-26 11:47:55 +00002428 || e->Iex.Binop.op == Iop_CasCmpNE32
2429 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
sewardj9cc2bbf2011-06-05 17:56:03 +00002430 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2431 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2432 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
2433 switch (e->Iex.Binop.op) {
2434 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
sewardj009230b2013-01-26 11:47:55 +00002435 case Iop_CmpNE32:
2436 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ;
sewardj9cc2bbf2011-06-05 17:56:03 +00002437 case Iop_CmpLT32S: return Acc_L;
2438 case Iop_CmpLT32U: return Acc_B;
2439 case Iop_CmpLE32S: return Acc_LE;
2440 case Iop_CmpLE32U: return Acc_BE;
2441 default: vpanic("iselCondCode(amd64): CmpXX32");
2442 }
2443 }
2444
sewardj05b3b6a2005-02-04 01:44:33 +00002445 ppIRExpr(e);
2446 vpanic("iselCondCode(amd64)");
2447}
2448
2449
sewardj9b967672005-02-08 11:13:09 +00002450/*---------------------------------------------------------*/
2451/*--- ISEL: Integer expressions (128 bit) ---*/
2452/*---------------------------------------------------------*/
2453
2454/* Compute a 128-bit value into a register pair, which is returned as
2455 the first two parameters. As with iselIntExpr_R, these may be
2456 either real or virtual regs; in any case they must not be changed
2457 by subsequent code emitted by the caller. */
2458
2459static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2460 ISelEnv* env, IRExpr* e )
2461{
2462 iselInt128Expr_wrk(rHi, rLo, env, e);
2463# if 0
2464 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2465# endif
2466 vassert(hregClass(*rHi) == HRcInt64);
2467 vassert(hregIsVirtual(*rHi));
2468 vassert(hregClass(*rLo) == HRcInt64);
2469 vassert(hregIsVirtual(*rLo));
2470}
2471
2472/* DO NOT CALL THIS DIRECTLY ! */
2473static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2474 ISelEnv* env, IRExpr* e )
2475{
sewardj9b967672005-02-08 11:13:09 +00002476 vassert(e);
2477 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2478
sewardj9b967672005-02-08 11:13:09 +00002479 /* read 128-bit IRTemp */
sewardjdd40fdf2006-12-24 02:20:24 +00002480 if (e->tag == Iex_RdTmp) {
sewardjc4530ae2012-05-21 10:18:49 +00002481 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
sewardj9b967672005-02-08 11:13:09 +00002482 return;
2483 }
2484
sewardj9b967672005-02-08 11:13:09 +00002485 /* --------- BINARY ops --------- */
2486 if (e->tag == Iex_Binop) {
2487 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002488 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002489 case Iop_MullU64:
2490 case Iop_MullS64: {
2491 /* get one operand into %rax, and the other into a R/M.
2492 Need to make an educated guess about which is better in
2493 which. */
2494 HReg tLo = newVRegI(env);
2495 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002496 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002497 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2498 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2499 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
sewardj501a3392005-05-11 15:37:50 +00002500 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
sewardj9b967672005-02-08 11:13:09 +00002501 /* Result is now in RDX:RAX. Tell the caller. */
2502 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2503 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2504 *rHi = tHi;
2505 *rLo = tLo;
2506 return;
2507 }
sewardj7de0d3c2005-02-13 02:26:41 +00002508
sewardja6b93d12005-02-17 09:28:28 +00002509 /* 128 x 64 -> (64(rem),64(div)) division */
2510 case Iop_DivModU128to64:
2511 case Iop_DivModS128to64: {
2512 /* Get the 128-bit operand into rdx:rax, and the other into
2513 any old R/M. */
2514 HReg sHi, sLo;
2515 HReg tLo = newVRegI(env);
2516 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002517 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002518 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2519 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2520 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2521 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2522 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2523 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2524 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2525 *rHi = tHi;
2526 *rLo = tLo;
2527 return;
2528 }
2529
2530 /* 64HLto128(e1,e2) */
2531 case Iop_64HLto128:
2532 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2533 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2534 return;
2535
sewardj9b967672005-02-08 11:13:09 +00002536 default:
2537 break;
2538 }
2539 } /* if (e->tag == Iex_Binop) */
2540
sewardj9b967672005-02-08 11:13:09 +00002541 ppIRExpr(e);
2542 vpanic("iselInt128Expr");
2543}
2544
2545
sewardj8d965312005-02-25 02:48:47 +00002546/*---------------------------------------------------------*/
2547/*--- ISEL: Floating point expressions (32 bit) ---*/
2548/*---------------------------------------------------------*/
2549
2550/* Nothing interesting here; really just wrappers for
2551 64-bit stuff. */
2552
2553static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2554{
2555 HReg r = iselFltExpr_wrk( env, e );
2556# if 0
2557 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2558# endif
2559 vassert(hregClass(r) == HRcVec128);
2560 vassert(hregIsVirtual(r));
2561 return r;
2562}
2563
2564/* DO NOT CALL THIS DIRECTLY */
2565static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2566{
2567 IRType ty = typeOfIRExpr(env->type_env,e);
2568 vassert(ty == Ity_F32);
2569
sewardjdd40fdf2006-12-24 02:20:24 +00002570 if (e->tag == Iex_RdTmp) {
2571 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardjc49ce232005-02-25 13:03:03 +00002572 }
2573
sewardje768e922009-11-26 17:17:37 +00002574 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardjc49ce232005-02-25 13:03:03 +00002575 AMD64AMode* am;
2576 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002577 vassert(e->Iex.Load.ty == Ity_F32);
2578 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardjc49ce232005-02-25 13:03:03 +00002579 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2580 return res;
2581 }
sewardj8d965312005-02-25 02:48:47 +00002582
2583 if (e->tag == Iex_Binop
2584 && e->Iex.Binop.op == Iop_F64toF32) {
2585 /* Although the result is still held in a standard SSE register,
2586 we need to round it to reflect the loss of accuracy/range
2587 entailed in casting it to a 32-bit float. */
2588 HReg dst = newVRegV(env);
2589 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2590 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2591 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2592 set_SSE_rounding_default( env );
2593 return dst;
2594 }
2595
sewardjc49ce232005-02-25 13:03:03 +00002596 if (e->tag == Iex_Get) {
2597 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2598 hregAMD64_RBP() );
2599 HReg res = newVRegV(env);
2600 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2601 return res;
2602 }
2603
sewardj5992bd02005-05-11 02:13:42 +00002604 if (e->tag == Iex_Unop
2605 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2606 /* Given an I32, produce an IEEE754 float with the same bit
2607 pattern. */
2608 HReg dst = newVRegV(env);
2609 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2610 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2611 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2612 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2613 return dst;
2614 }
sewardj8d965312005-02-25 02:48:47 +00002615
sewardjd15b5972010-06-27 09:06:34 +00002616 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2617 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2618 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
2619 HReg dst = newVRegV(env);
2620
2621 /* rf now holds the value to be rounded. The first thing to do
2622 is set the FPU's rounding mode accordingly. */
2623
2624 /* Set host x87 rounding mode */
2625 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2626
2627 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
2628 addInstr(env, AMD64Instr_A87Free(1));
2629 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
2630 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2631 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
2632 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
2633
2634 /* Restore default x87 rounding. */
2635 set_FPU_rounding_default( env );
2636
2637 return dst;
2638 }
2639
sewardjcc3d2192013-03-27 11:37:33 +00002640 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) {
2641 /* Sigh ... very rough code. Could do much better. */
2642 /* Get the 128-bit literal 00---0 10---0 into a register
2643 and xor it with the value to be negated. */
2644 HReg r1 = newVRegI(env);
2645 HReg dst = newVRegV(env);
2646 HReg tmp = newVRegV(env);
2647 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2648 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
2649 addInstr(env, mk_vMOVsd_RR(src,tmp));
2650 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2651 addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 ));
2652 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
2653 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
2654 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
2655 add_to_rsp(env, 16);
2656 return dst;
2657 }
2658
2659 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) {
2660 IRQop *qop = e->Iex.Qop.details;
2661 HReg dst = newVRegV(env);
2662 HReg argX = iselFltExpr(env, qop->arg2);
2663 HReg argY = iselFltExpr(env, qop->arg3);
2664 HReg argZ = iselFltExpr(env, qop->arg4);
2665 /* XXXROUNDINGFIXME */
2666 /* set roundingmode here */
2667 /* subq $16, %rsp -- make a space*/
2668 sub_from_rsp(env, 16);
2669 /* Prepare 4 arg regs:
2670 leaq 0(%rsp), %rdi
2671 leaq 4(%rsp), %rsi
2672 leaq 8(%rsp), %rdx
2673 leaq 12(%rsp), %rcx
2674 */
2675 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2676 hregAMD64_RDI()));
2677 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()),
2678 hregAMD64_RSI()));
2679 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2680 hregAMD64_RDX()));
2681 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()),
2682 hregAMD64_RCX()));
2683 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2684 movss %argX, 0(%rsi)
2685 movss %argY, 0(%rdx)
2686 movss %argZ, 0(%rcx)
2687 */
2688 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX,
2689 AMD64AMode_IR(0, hregAMD64_RSI())));
2690 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY,
2691 AMD64AMode_IR(0, hregAMD64_RDX())));
2692 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ,
2693 AMD64AMode_IR(0, hregAMD64_RCX())));
2694 /* call the helper */
2695 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2696 (ULong)(HWord)h_generic_calc_MAddF32,
sewardj74142b82013-08-08 10:28:59 +00002697 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002698 /* fetch the result from memory, using %r_argp, which the
2699 register allocator will keep alive across the call. */
2700 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst,
2701 AMD64AMode_IR(0, hregAMD64_RSP())));
2702 /* and finally, clear the space */
2703 add_to_rsp(env, 16);
2704 return dst;
2705 }
2706
sewardj8d965312005-02-25 02:48:47 +00002707 ppIRExpr(e);
2708 vpanic("iselFltExpr_wrk");
2709}
sewardj18303862005-02-21 12:36:54 +00002710
2711
2712/*---------------------------------------------------------*/
2713/*--- ISEL: Floating point expressions (64 bit) ---*/
2714/*---------------------------------------------------------*/
2715
2716/* Compute a 64-bit floating point value into the lower half of an xmm
2717 register, the identity of which is returned. As with
2718 iselIntExpr_R, the returned reg will be virtual, and it must not be
2719 changed by subsequent code emitted by the caller.
2720*/
2721
2722/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2723
2724 Type S (1 bit) E (11 bits) F (52 bits)
2725 ---- --------- ----------- -----------
2726 signalling NaN u 2047 (max) .0uuuuu---u
2727 (with at least
2728 one 1 bit)
2729 quiet NaN u 2047 (max) .1uuuuu---u
2730
2731 negative infinity 1 2047 (max) .000000---0
2732
2733 positive infinity 0 2047 (max) .000000---0
2734
2735 negative zero 1 0 .000000---0
2736
2737 positive zero 0 0 .000000---0
2738*/
2739
2740static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2741{
2742 HReg r = iselDblExpr_wrk( env, e );
2743# if 0
2744 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2745# endif
2746 vassert(hregClass(r) == HRcVec128);
2747 vassert(hregIsVirtual(r));
2748 return r;
2749}
2750
2751/* DO NOT CALL THIS DIRECTLY */
2752static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2753{
2754 IRType ty = typeOfIRExpr(env->type_env,e);
2755 vassert(e);
2756 vassert(ty == Ity_F64);
2757
sewardjdd40fdf2006-12-24 02:20:24 +00002758 if (e->tag == Iex_RdTmp) {
2759 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj18303862005-02-21 12:36:54 +00002760 }
2761
sewardj8d965312005-02-25 02:48:47 +00002762 if (e->tag == Iex_Const) {
2763 union { ULong u64; Double f64; } u;
2764 HReg res = newVRegV(env);
2765 HReg tmp = newVRegI(env);
2766 vassert(sizeof(u) == 8);
2767 vassert(sizeof(u.u64) == 8);
2768 vassert(sizeof(u.f64) == 8);
2769
2770 if (e->Iex.Const.con->tag == Ico_F64) {
2771 u.f64 = e->Iex.Const.con->Ico.F64;
2772 }
2773 else if (e->Iex.Const.con->tag == Ico_F64i) {
2774 u.u64 = e->Iex.Const.con->Ico.F64i;
2775 }
2776 else
2777 vpanic("iselDblExpr(amd64): const");
2778
2779 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2780 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2781 addInstr(env, AMD64Instr_SseLdSt(
2782 True/*load*/, 8, res,
2783 AMD64AMode_IR(0, hregAMD64_RSP())
2784 ));
2785 add_to_rsp(env, 8);
2786 return res;
2787 }
sewardj9da16972005-02-21 13:58:26 +00002788
sewardje768e922009-11-26 17:17:37 +00002789 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj9da16972005-02-21 13:58:26 +00002790 AMD64AMode* am;
2791 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002792 vassert(e->Iex.Load.ty == Ity_F64);
2793 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj9da16972005-02-21 13:58:26 +00002794 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2795 return res;
2796 }
sewardj18303862005-02-21 12:36:54 +00002797
2798 if (e->tag == Iex_Get) {
2799 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2800 hregAMD64_RBP() );
2801 HReg res = newVRegV(env);
2802 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2803 return res;
2804 }
2805
sewardj8d965312005-02-25 02:48:47 +00002806 if (e->tag == Iex_GetI) {
2807 AMD64AMode* am
2808 = genGuestArrayOffset(
2809 env, e->Iex.GetI.descr,
2810 e->Iex.GetI.ix, e->Iex.GetI.bias );
2811 HReg res = newVRegV(env);
2812 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2813 return res;
2814 }
2815
sewardj4796d662006-02-05 16:06:26 +00002816 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00002817 IRTriop *triop = e->Iex.Triop.details;
sewardj137015d2005-03-27 04:01:15 +00002818 AMD64SseOp op = Asse_INVALID;
florian420bfa92012-06-02 20:29:22 +00002819 switch (triop->op) {
sewardj137015d2005-03-27 04:01:15 +00002820 case Iop_AddF64: op = Asse_ADDF; break;
2821 case Iop_SubF64: op = Asse_SUBF; break;
2822 case Iop_MulF64: op = Asse_MULF; break;
2823 case Iop_DivF64: op = Asse_DIVF; break;
2824 default: break;
2825 }
2826 if (op != Asse_INVALID) {
2827 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002828 HReg argL = iselDblExpr(env, triop->arg2);
2829 HReg argR = iselDblExpr(env, triop->arg3);
sewardj137015d2005-03-27 04:01:15 +00002830 addInstr(env, mk_vMOVsd_RR(argL, dst));
sewardj4796d662006-02-05 16:06:26 +00002831 /* XXXROUNDINGFIXME */
2832 /* set roundingmode here */
sewardj137015d2005-03-27 04:01:15 +00002833 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2834 return dst;
2835 }
2836 }
2837
sewardjcc3d2192013-03-27 11:37:33 +00002838 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) {
2839 IRQop *qop = e->Iex.Qop.details;
2840 HReg dst = newVRegV(env);
2841 HReg argX = iselDblExpr(env, qop->arg2);
2842 HReg argY = iselDblExpr(env, qop->arg3);
2843 HReg argZ = iselDblExpr(env, qop->arg4);
2844 /* XXXROUNDINGFIXME */
2845 /* set roundingmode here */
2846 /* subq $32, %rsp -- make a space*/
2847 sub_from_rsp(env, 32);
2848 /* Prepare 4 arg regs:
2849 leaq 0(%rsp), %rdi
2850 leaq 8(%rsp), %rsi
2851 leaq 16(%rsp), %rdx
2852 leaq 24(%rsp), %rcx
2853 */
2854 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2855 hregAMD64_RDI()));
2856 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2857 hregAMD64_RSI()));
2858 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()),
2859 hregAMD64_RDX()));
2860 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()),
2861 hregAMD64_RCX()));
2862 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2863 movsd %argX, 0(%rsi)
2864 movsd %argY, 0(%rdx)
2865 movsd %argZ, 0(%rcx)
2866 */
2867 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX,
2868 AMD64AMode_IR(0, hregAMD64_RSI())));
2869 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY,
2870 AMD64AMode_IR(0, hregAMD64_RDX())));
2871 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ,
2872 AMD64AMode_IR(0, hregAMD64_RCX())));
2873 /* call the helper */
2874 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2875 (ULong)(HWord)h_generic_calc_MAddF64,
sewardj74142b82013-08-08 10:28:59 +00002876 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002877 /* fetch the result from memory, using %r_argp, which the
2878 register allocator will keep alive across the call. */
2879 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst,
2880 AMD64AMode_IR(0, hregAMD64_RSP())));
2881 /* and finally, clear the space */
2882 add_to_rsp(env, 32);
2883 return dst;
2884 }
2885
sewardjb183b852006-02-03 16:08:03 +00002886 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
sewardj25a85812005-05-08 23:03:48 +00002887 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2888 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2889 HReg dst = newVRegV(env);
2890
2891 /* rf now holds the value to be rounded. The first thing to do
2892 is set the FPU's rounding mode accordingly. */
2893
2894 /* Set host x87 rounding mode */
2895 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2896
2897 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2898 addInstr(env, AMD64Instr_A87Free(1));
sewardjd15b5972010-06-27 09:06:34 +00002899 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002900 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
sewardjd15b5972010-06-27 09:06:34 +00002901 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002902 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2903
2904 /* Restore default x87 rounding. */
2905 set_FPU_rounding_default( env );
2906
2907 return dst;
2908 }
2909
florian420bfa92012-06-02 20:29:22 +00002910 IRTriop *triop = e->Iex.Triop.details;
sewardj4796d662006-02-05 16:06:26 +00002911 if (e->tag == Iex_Triop
florian420bfa92012-06-02 20:29:22 +00002912 && (triop->op == Iop_ScaleF64
2913 || triop->op == Iop_AtanF64
2914 || triop->op == Iop_Yl2xF64
2915 || triop->op == Iop_Yl2xp1F64
2916 || triop->op == Iop_PRemF64
2917 || triop->op == Iop_PRem1F64)
sewardj25a85812005-05-08 23:03:48 +00002918 ) {
2919 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00002920 HReg arg1 = iselDblExpr(env, triop->arg2);
2921 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardj25a85812005-05-08 23:03:48 +00002922 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002923 Bool arg2first = toBool(triop->op == Iop_ScaleF64
2924 || triop->op == Iop_PRemF64
2925 || triop->op == Iop_PRem1F64);
sewardj25a85812005-05-08 23:03:48 +00002926 addInstr(env, AMD64Instr_A87Free(2));
2927
2928 /* one arg -> top of x87 stack */
2929 addInstr(env, AMD64Instr_SseLdSt(
2930 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002931 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002932
2933 /* other arg -> top of x87 stack */
2934 addInstr(env, AMD64Instr_SseLdSt(
2935 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002936 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002937
2938 /* do it */
sewardj4796d662006-02-05 16:06:26 +00002939 /* XXXROUNDINGFIXME */
2940 /* set roundingmode here */
florian420bfa92012-06-02 20:29:22 +00002941 switch (triop->op) {
sewardj25a85812005-05-08 23:03:48 +00002942 case Iop_ScaleF64:
2943 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2944 break;
2945 case Iop_AtanF64:
2946 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2947 break;
2948 case Iop_Yl2xF64:
2949 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2950 break;
sewardj5e205372005-05-09 02:57:08 +00002951 case Iop_Yl2xp1F64:
2952 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2953 break;
sewardjf4c803b2006-09-11 11:07:34 +00002954 case Iop_PRemF64:
2955 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2956 break;
sewardj4970e4e2008-10-11 10:07:55 +00002957 case Iop_PRem1F64:
2958 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
2959 break;
sewardj25a85812005-05-08 23:03:48 +00002960 default:
2961 vassert(0);
2962 }
2963
2964 /* save result */
sewardjd15b5972010-06-27 09:06:34 +00002965 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002966 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2967 return dst;
2968 }
sewardj1a01e652005-02-23 11:39:21 +00002969
sewardj6c299f32009-12-31 18:00:12 +00002970 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002971 HReg dst = newVRegV(env);
2972 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2973 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2974 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2975 set_SSE_rounding_default( env );
2976 return dst;
2977 }
2978
sewardj6c299f32009-12-31 18:00:12 +00002979 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002980 HReg dst = newVRegV(env);
2981 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2982 set_SSE_rounding_default( env );
2983 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2984 return dst;
2985 }
2986
sewardj137015d2005-03-27 04:01:15 +00002987 if (e->tag == Iex_Unop
2988 && (e->Iex.Unop.op == Iop_NegF64
2989 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00002990 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00002991 /* Get the 128-bit literal 00---0 10---0 into a register
2992 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00002993 HReg r1 = newVRegI(env);
2994 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00002995 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00002996 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2997 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00002998 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00002999 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3000 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3001 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00003002 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3003
3004 if (e->Iex.Unop.op == Iop_NegF64)
3005 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3006 else
3007 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3008
sewardj8d965312005-02-25 02:48:47 +00003009 add_to_rsp(env, 16);
3010 return dst;
3011 }
3012
sewardj4796d662006-02-05 16:06:26 +00003013 if (e->tag == Iex_Binop) {
sewardj25a85812005-05-08 23:03:48 +00003014 A87FpOp fpop = Afp_INVALID;
sewardj4796d662006-02-05 16:06:26 +00003015 switch (e->Iex.Binop.op) {
sewardj25a85812005-05-08 23:03:48 +00003016 case Iop_SqrtF64: fpop = Afp_SQRT; break;
sewardj5e205372005-05-09 02:57:08 +00003017 case Iop_SinF64: fpop = Afp_SIN; break;
3018 case Iop_CosF64: fpop = Afp_COS; break;
3019 case Iop_TanF64: fpop = Afp_TAN; break;
sewardj25a85812005-05-08 23:03:48 +00003020 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3021 default: break;
3022 }
3023 if (fpop != Afp_INVALID) {
3024 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00003025 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
sewardj25a85812005-05-08 23:03:48 +00003026 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00003027 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
sewardj25a85812005-05-08 23:03:48 +00003028 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
sewardj5e205372005-05-09 02:57:08 +00003029 addInstr(env, AMD64Instr_A87Free(nNeeded));
sewardjd15b5972010-06-27 09:06:34 +00003030 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj4796d662006-02-05 16:06:26 +00003031 /* XXXROUNDINGFIXME */
3032 /* set roundingmode here */
sewardje9c51c92014-04-30 22:50:34 +00003033 /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition
3034 codes. I don't think that matters, since this insn
3035 selector never generates such an instruction intervening
3036 between an flag-setting instruction and a flag-using
3037 instruction. */
sewardj25a85812005-05-08 23:03:48 +00003038 addInstr(env, AMD64Instr_A87FpOp(fpop));
sewardjd15b5972010-06-27 09:06:34 +00003039 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00003040 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3041 return dst;
3042 }
3043 }
sewardjc49ce232005-02-25 13:03:03 +00003044
3045 if (e->tag == Iex_Unop) {
3046 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00003047//.. case Iop_I32toF64: {
3048//.. HReg dst = newVRegF(env);
3049//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3050//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3051//.. set_FPU_rounding_default(env);
3052//.. addInstr(env, X86Instr_FpLdStI(
3053//.. True/*load*/, 4, dst,
3054//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00003055//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00003056//.. return dst;
3057//.. }
sewardj924215b2005-03-26 21:50:31 +00003058 case Iop_ReinterpI64asF64: {
3059 /* Given an I64, produce an IEEE754 double with the same
3060 bit pattern. */
3061 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3062 HReg dst = newVRegV(env);
3063 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3064 /* paranoia */
3065 set_SSE_rounding_default(env);
3066 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3067 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3068 return dst;
3069 }
sewardjc49ce232005-02-25 13:03:03 +00003070 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00003071 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00003072 HReg f64 = newVRegV(env);
3073 /* this shouldn't be necessary, but be paranoid ... */
3074 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00003075 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00003076 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3077 return f64;
3078 }
3079 default:
3080 break;
3081 }
3082 }
sewardj8d965312005-02-25 02:48:47 +00003083
3084 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00003085 if (e->tag == Iex_ITE) { // VFD
3086 HReg r1, r0, dst;
sewardj8d965312005-02-25 02:48:47 +00003087 vassert(ty == Ity_F64);
florian99dd03e2013-01-29 03:56:06 +00003088 vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1);
3089 r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3090 r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
sewardj8d965312005-02-25 02:48:47 +00003091 dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003092 addInstr(env, mk_vMOVsd_RR(r1,dst));
3093 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003094 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardj8d965312005-02-25 02:48:47 +00003095 return dst;
3096 }
sewardj18303862005-02-21 12:36:54 +00003097
3098 ppIRExpr(e);
3099 vpanic("iselDblExpr_wrk");
3100}
sewardjc2bcb6f2005-02-07 00:17:12 +00003101
sewardj0852a132005-02-21 08:28:46 +00003102
3103/*---------------------------------------------------------*/
3104/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3105/*---------------------------------------------------------*/
3106
3107static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3108{
3109 HReg r = iselVecExpr_wrk( env, e );
3110# if 0
3111 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3112# endif
3113 vassert(hregClass(r) == HRcVec128);
3114 vassert(hregIsVirtual(r));
3115 return r;
3116}
3117
3118
3119/* DO NOT CALL THIS DIRECTLY */
3120static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3121{
sewardj69d98e32010-06-18 08:17:41 +00003122 HWord fn = 0; /* address of helper fn, if required */
3123 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00003124 AMD64SseOp op = Asse_INVALID;
3125 IRType ty = typeOfIRExpr(env->type_env,e);
3126 vassert(e);
3127 vassert(ty == Ity_V128);
3128
sewardjdd40fdf2006-12-24 02:20:24 +00003129 if (e->tag == Iex_RdTmp) {
3130 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj0852a132005-02-21 08:28:46 +00003131 }
3132
3133 if (e->tag == Iex_Get) {
3134 HReg dst = newVRegV(env);
3135 addInstr(env, AMD64Instr_SseLdSt(
3136 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00003137 16,
sewardj0852a132005-02-21 08:28:46 +00003138 dst,
3139 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3140 )
3141 );
3142 return dst;
3143 }
3144
sewardje768e922009-11-26 17:17:37 +00003145 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj1a01e652005-02-23 11:39:21 +00003146 HReg dst = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00003147 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj1a01e652005-02-23 11:39:21 +00003148 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3149 return dst;
3150 }
3151
3152 if (e->tag == Iex_Const) {
3153 HReg dst = newVRegV(env);
3154 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj9ba870d2010-04-02 11:29:23 +00003155 switch (e->Iex.Const.con->Ico.V128) {
3156 case 0x0000:
3157 dst = generate_zeroes_V128(env);
sewardjacfbd7d2010-08-17 22:52:08 +00003158 break;
sewardj9ba870d2010-04-02 11:29:23 +00003159 case 0xFFFF:
3160 dst = generate_ones_V128(env);
sewardj9ba870d2010-04-02 11:29:23 +00003161 break;
sewardjacfbd7d2010-08-17 22:52:08 +00003162 default: {
3163 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3164 /* do push_uimm64 twice, first time for the high-order half. */
3165 push_uimm64(env, bitmask8_to_bytemask64(
3166 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
3167 ));
3168 push_uimm64(env, bitmask8_to_bytemask64(
3169 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
3170 ));
3171 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3172 add_to_rsp(env, 16);
3173 break;
3174 }
sewardj1a01e652005-02-23 11:39:21 +00003175 }
sewardj9ba870d2010-04-02 11:29:23 +00003176 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003177 }
sewardj0852a132005-02-21 08:28:46 +00003178
3179 if (e->tag == Iex_Unop) {
3180 switch (e->Iex.Unop.op) {
3181
sewardj8d965312005-02-25 02:48:47 +00003182 case Iop_NotV128: {
3183 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3184 return do_sse_NotV128(env, arg);
3185 }
3186
sewardj09717342005-05-05 21:34:02 +00003187 case Iop_CmpNEZ64x2: {
3188 /* We can use SSE2 instructions for this. */
3189 /* Ideally, we want to do a 64Ix2 comparison against zero of
3190 the operand. Problem is no such insn exists. Solution
3191 therefore is to do a 32Ix4 comparison instead, and bitwise-
3192 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3193 let the not'd result of this initial comparison be a:b:c:d.
3194 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3195 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3196 giving the required result.
3197
3198 The required selection sequence is 2,3,0,1, which
3199 according to Intel's documentation means the pshufd
3200 literal value is 0xB1, that is,
3201 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3202 */
3203 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
sewardjac530442005-05-11 16:13:37 +00003204 HReg tmp = generate_zeroes_V128(env);
sewardj09717342005-05-05 21:34:02 +00003205 HReg dst = newVRegV(env);
sewardj09717342005-05-05 21:34:02 +00003206 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3207 tmp = do_sse_NotV128(env, tmp);
3208 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3209 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3210 return dst;
3211 }
3212
sewardjac530442005-05-11 16:13:37 +00003213 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3214 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3215 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3216 do_CmpNEZ_vector:
3217 {
3218 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3219 HReg tmp = newVRegV(env);
3220 HReg zero = generate_zeroes_V128(env);
3221 HReg dst;
3222 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3223 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3224 dst = do_sse_NotV128(env, tmp);
3225 return dst;
3226 }
sewardja7ba8c42005-05-10 20:08:34 +00003227
sewardj1ddee212014-08-24 14:00:19 +00003228 case Iop_RecipEst32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3229 case Iop_RSqrtEst32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
sewardja7ba8c42005-05-10 20:08:34 +00003230 do_32Fx4_unary:
3231 {
3232 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3233 HReg dst = newVRegV(env);
3234 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3235 return dst;
3236 }
3237
sewardj1ddee212014-08-24 14:00:19 +00003238 case Iop_RecipEst32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3239 case Iop_RSqrtEst32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3240 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
sewardja7ba8c42005-05-10 20:08:34 +00003241 do_32F0x4_unary:
3242 {
3243 /* A bit subtle. We have to copy the arg to the result
3244 register first, because actually doing the SSE scalar insn
3245 leaves the upper 3/4 of the destination register
3246 unchanged. Whereas the required semantics of these
3247 primops is that the upper 3/4 is simply copied in from the
3248 argument. */
3249 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3250 HReg dst = newVRegV(env);
3251 addInstr(env, mk_vMOVsd_RR(arg, dst));
3252 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3253 return dst;
3254 }
3255
sewardj0852a132005-02-21 08:28:46 +00003256 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3257 do_64F0x2_unary:
3258 {
3259 /* A bit subtle. We have to copy the arg to the result
3260 register first, because actually doing the SSE scalar insn
3261 leaves the upper half of the destination register
3262 unchanged. Whereas the required semantics of these
3263 primops is that the upper half is simply copied in from the
3264 argument. */
3265 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3266 HReg dst = newVRegV(env);
3267 addInstr(env, mk_vMOVsd_RR(arg, dst));
3268 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3269 return dst;
3270 }
3271
sewardj8d965312005-02-25 02:48:47 +00003272 case Iop_32UtoV128: {
3273 HReg dst = newVRegV(env);
3274 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3275 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3276 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3277 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3278 return dst;
3279 }
sewardj0852a132005-02-21 08:28:46 +00003280
3281 case Iop_64UtoV128: {
3282 HReg dst = newVRegV(env);
3283 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3284 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3285 addInstr(env, AMD64Instr_Push(rmi));
3286 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3287 add_to_rsp(env, 8);
3288 return dst;
3289 }
3290
sewardj4b1cc832012-06-13 11:10:20 +00003291 case Iop_V256toV128_0:
3292 case Iop_V256toV128_1: {
3293 HReg vHi, vLo;
3294 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
3295 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
3296 }
3297
sewardj0852a132005-02-21 08:28:46 +00003298 default:
3299 break;
3300 } /* switch (e->Iex.Unop.op) */
3301 } /* if (e->tag == Iex_Unop) */
3302
3303 if (e->tag == Iex_Binop) {
3304 switch (e->Iex.Binop.op) {
3305
sewardjcd4637e2015-04-07 09:07:51 +00003306 case Iop_Sqrt64Fx2:
3307 case Iop_Sqrt32Fx4: {
3308 /* :: (rmode, vec) -> vec */
3309 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3310 HReg dst = newVRegV(env);
3311 /* XXXROUNDINGFIXME */
3312 /* set roundingmode here */
3313 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3314 ? AMD64Instr_Sse64Fx2 : AMD64Instr_Sse32Fx4)
3315 (Asse_SQRTF, arg, dst));
3316 return dst;
3317 }
3318
sewardjc4530ae2012-05-21 10:18:49 +00003319 /* FIXME: could we generate MOVQ here? */
sewardj18303862005-02-21 12:36:54 +00003320 case Iop_SetV128lo64: {
3321 HReg dst = newVRegV(env);
3322 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3323 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003324 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3325 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3326 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3327 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3328 return dst;
3329 }
3330
sewardjc4530ae2012-05-21 10:18:49 +00003331 /* FIXME: could we generate MOVD here? */
sewardj478fe702005-04-23 01:15:47 +00003332 case Iop_SetV128lo32: {
3333 HReg dst = newVRegV(env);
3334 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3335 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3336 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3337 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3338 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3339 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003340 return dst;
3341 }
3342
sewardj1a01e652005-02-23 11:39:21 +00003343 case Iop_64HLtoV128: {
sewardjc4530ae2012-05-21 10:18:49 +00003344 HReg rsp = hregAMD64_RSP();
3345 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
3346 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
3347 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1);
3348 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2);
3349 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp));
3350 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003351 HReg dst = newVRegV(env);
sewardjc4530ae2012-05-21 10:18:49 +00003352 /* One store-forwarding stall coming up, oh well :-( */
3353 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003354 return dst;
3355 }
3356
sewardj432f8b62005-05-10 02:50:05 +00003357 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3358 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3359 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
sewardjb9282632005-11-05 02:33:25 +00003360 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003361 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3362 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003363 do_32Fx4:
3364 {
3365 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3366 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3367 HReg dst = newVRegV(env);
3368 addInstr(env, mk_vMOVsd_RR(argL, dst));
3369 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3370 return dst;
3371 }
3372
sewardj97628592005-05-10 22:42:54 +00003373 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3374 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3375 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
sewardjb9282632005-11-05 02:33:25 +00003376 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
sewardj5992bd02005-05-11 02:13:42 +00003377 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3378 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003379 do_64Fx2:
3380 {
3381 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3382 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3383 HReg dst = newVRegV(env);
3384 addInstr(env, mk_vMOVsd_RR(argL, dst));
3385 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3386 return dst;
3387 }
sewardj8d965312005-02-25 02:48:47 +00003388
sewardj432f8b62005-05-10 02:50:05 +00003389 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003390 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardj4c328cf2005-05-05 12:05:54 +00003391 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
sewardjb9282632005-11-05 02:33:25 +00003392 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003393 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003394 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003395 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3396 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003397 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3398 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3399 do_32F0x4: {
3400 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3401 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3402 HReg dst = newVRegV(env);
3403 addInstr(env, mk_vMOVsd_RR(argL, dst));
3404 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3405 return dst;
3406 }
3407
sewardj137015d2005-03-27 04:01:15 +00003408 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003409 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003410 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardjb9282632005-11-05 02:33:25 +00003411 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003412 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3413 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003414 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003415 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003416 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3417 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3418 do_64F0x2: {
3419 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3420 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3421 HReg dst = newVRegV(env);
3422 addInstr(env, mk_vMOVsd_RR(argL, dst));
3423 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3424 return dst;
3425 }
3426
sewardj5f438dd2011-06-16 11:36:23 +00003427 case Iop_QNarrowBin32Sto16Sx8:
sewardj97628592005-05-10 22:42:54 +00003428 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003429 case Iop_QNarrowBin16Sto8Sx16:
sewardj97628592005-05-10 22:42:54 +00003430 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003431 case Iop_QNarrowBin16Sto8Ux16:
sewardj97628592005-05-10 22:42:54 +00003432 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3433
3434 case Iop_InterleaveHI8x16:
3435 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3436 case Iop_InterleaveHI16x8:
3437 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3438 case Iop_InterleaveHI32x4:
3439 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3440 case Iop_InterleaveHI64x2:
3441 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3442
3443 case Iop_InterleaveLO8x16:
3444 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3445 case Iop_InterleaveLO16x8:
3446 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3447 case Iop_InterleaveLO32x4:
3448 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3449 case Iop_InterleaveLO64x2:
3450 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3451
sewardj1a01e652005-02-23 11:39:21 +00003452 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003453 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003454 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003455 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003456 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003457 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003458 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003459 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3460 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3461 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3462 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3463 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3464 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3465 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3466 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3467 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3468 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3469 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3470 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
sewardjadffcef2005-05-11 00:03:06 +00003471 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3472 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3473 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3474 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3475 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3476 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3477 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003478 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3479 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3480 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003481 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003482 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3483 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3484 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3485 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003486 do_SseReRg: {
3487 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3488 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3489 HReg dst = newVRegV(env);
3490 if (arg1isEReg) {
sewardj9da16972005-02-21 13:58:26 +00003491 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3492 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3493 } else {
3494 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3495 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3496 }
3497 return dst;
3498 }
3499
sewardjadffcef2005-05-11 00:03:06 +00003500 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3501 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3502 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3503 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3504 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3505 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3506 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
sewardj09717342005-05-05 21:34:02 +00003507 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3508 do_SseShift: {
3509 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3510 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3511 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3512 HReg ereg = newVRegV(env);
3513 HReg dst = newVRegV(env);
3514 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3515 addInstr(env, AMD64Instr_Push(rmi));
3516 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3517 addInstr(env, mk_vMOVsd_RR(greg, dst));
3518 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3519 add_to_rsp(env, 16);
3520 return dst;
3521 }
sewardj0852a132005-02-21 08:28:46 +00003522
sewardj69d98e32010-06-18 08:17:41 +00003523 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
3524 goto do_SseAssistedBinary;
3525 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
3526 goto do_SseAssistedBinary;
3527 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
3528 goto do_SseAssistedBinary;
3529 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
3530 goto do_SseAssistedBinary;
3531 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
3532 goto do_SseAssistedBinary;
3533 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
3534 goto do_SseAssistedBinary;
3535 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
3536 goto do_SseAssistedBinary;
3537 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
3538 goto do_SseAssistedBinary;
3539 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
3540 goto do_SseAssistedBinary;
sewardjd8815622011-10-19 15:24:01 +00003541 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2;
3542 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003543 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
3544 goto do_SseAssistedBinary;
sewardjd8bca7e2012-06-20 11:46:19 +00003545 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4;
3546 goto do_SseAssistedBinary;
sewardj5f438dd2011-06-16 11:36:23 +00003547 case Iop_QNarrowBin32Sto16Ux8:
3548 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
sewardj2260b992011-06-15 16:05:07 +00003549 goto do_SseAssistedBinary;
sewardjad2c9ea2011-10-22 09:32:16 +00003550 case Iop_NarrowBin16to8x16:
3551 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3552 goto do_SseAssistedBinary;
3553 case Iop_NarrowBin32to16x8:
3554 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3555 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003556 do_SseAssistedBinary: {
3557 /* RRRufff! RRRufff code is what we're generating here. Oh
3558 well. */
3559 vassert(fn != 0);
3560 HReg dst = newVRegV(env);
3561 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3562 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3563 HReg argp = newVRegI(env);
3564 /* subq $112, %rsp -- make a space*/
3565 sub_from_rsp(env, 112);
3566 /* leaq 48(%rsp), %r_argp -- point into it */
3567 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3568 argp));
3569 /* andq $-16, %r_argp -- 16-align the pointer */
3570 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3571 AMD64RMI_Imm( ~(UInt)15 ),
3572 argp));
3573 /* Prepare 3 arg regs:
3574 leaq 0(%r_argp), %rdi
3575 leaq 16(%r_argp), %rsi
3576 leaq 32(%r_argp), %rdx
3577 */
3578 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3579 hregAMD64_RDI()));
3580 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3581 hregAMD64_RSI()));
3582 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
3583 hregAMD64_RDX()));
3584 /* Store the two args, at (%rsi) and (%rdx):
3585 movupd %argL, 0(%rsi)
3586 movupd %argR, 0(%rdx)
3587 */
3588 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3589 AMD64AMode_IR(0, hregAMD64_RSI())));
3590 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
3591 AMD64AMode_IR(0, hregAMD64_RDX())));
3592 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003593 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003594 3, mk_RetLoc_simple(RLPri_None) ));
sewardj69d98e32010-06-18 08:17:41 +00003595 /* fetch the result from memory, using %r_argp, which the
3596 register allocator will keep alive across the call. */
3597 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3598 AMD64AMode_IR(0, argp)));
3599 /* and finally, clear the space */
3600 add_to_rsp(env, 112);
3601 return dst;
3602 }
3603
sewardj0874bee2011-01-17 10:32:18 +00003604 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
3605 goto do_SseAssistedVectorAndScalar;
3606 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
3607 goto do_SseAssistedVectorAndScalar;
3608 do_SseAssistedVectorAndScalar: {
3609 /* RRRufff! RRRufff code is what we're generating here. Oh
3610 well. */
3611 vassert(fn != 0);
3612 HReg dst = newVRegV(env);
3613 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3614 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3615 HReg argp = newVRegI(env);
3616 /* subq $112, %rsp -- make a space*/
3617 sub_from_rsp(env, 112);
3618 /* leaq 48(%rsp), %r_argp -- point into it */
3619 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3620 argp));
3621 /* andq $-16, %r_argp -- 16-align the pointer */
3622 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3623 AMD64RMI_Imm( ~(UInt)15 ),
3624 argp));
3625 /* Prepare 2 vector arg regs:
3626 leaq 0(%r_argp), %rdi
3627 leaq 16(%r_argp), %rsi
3628 */
3629 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3630 hregAMD64_RDI()));
3631 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3632 hregAMD64_RSI()));
3633 /* Store the vector arg, at (%rsi):
3634 movupd %argL, 0(%rsi)
3635 */
3636 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3637 AMD64AMode_IR(0, hregAMD64_RSI())));
3638 /* And get the scalar value into rdx */
3639 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
3640
3641 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003642 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003643 3, mk_RetLoc_simple(RLPri_None) ));
sewardj0874bee2011-01-17 10:32:18 +00003644 /* fetch the result from memory, using %r_argp, which the
3645 register allocator will keep alive across the call. */
3646 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3647 AMD64AMode_IR(0, argp)));
3648 /* and finally, clear the space */
3649 add_to_rsp(env, 112);
3650 return dst;
3651 }
3652
sewardj0852a132005-02-21 08:28:46 +00003653 default:
3654 break;
3655 } /* switch (e->Iex.Binop.op) */
3656 } /* if (e->tag == Iex_Binop) */
3657
sewardj9571dc02014-01-26 18:34:23 +00003658 if (e->tag == Iex_Triop) {
3659 IRTriop *triop = e->Iex.Triop.details;
3660 switch (triop->op) {
3661
3662 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm;
3663 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm;
3664 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm;
3665 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm;
3666 do_64Fx2_w_rm:
3667 {
3668 HReg argL = iselVecExpr(env, triop->arg2);
3669 HReg argR = iselVecExpr(env, triop->arg3);
3670 HReg dst = newVRegV(env);
3671 addInstr(env, mk_vMOVsd_RR(argL, dst));
3672 /* XXXROUNDINGFIXME */
3673 /* set roundingmode here */
3674 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3675 return dst;
3676 }
3677
3678 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm;
3679 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm;
3680 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm;
3681 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm;
3682 do_32Fx4_w_rm:
3683 {
3684 HReg argL = iselVecExpr(env, triop->arg2);
3685 HReg argR = iselVecExpr(env, triop->arg3);
3686 HReg dst = newVRegV(env);
3687 addInstr(env, mk_vMOVsd_RR(argL, dst));
3688 /* XXXROUNDINGFIXME */
3689 /* set roundingmode here */
3690 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3691 return dst;
3692 }
3693
3694 default:
3695 break;
3696 } /* switch (triop->op) */
3697 } /* if (e->tag == Iex_Triop) */
3698
florian99dd03e2013-01-29 03:56:06 +00003699 if (e->tag == Iex_ITE) { // VFD
3700 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3701 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
sewardjadffcef2005-05-11 00:03:06 +00003702 HReg dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003703 addInstr(env, mk_vMOVsd_RR(r1,dst));
floriane6be61f2013-02-01 16:11:51 +00003704 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003705 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardjadffcef2005-05-11 00:03:06 +00003706 return dst;
3707 }
3708
sewardjacfbd7d2010-08-17 22:52:08 +00003709 //vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003710 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
sewardj5117ce12006-01-27 21:20:15 +00003711 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
sewardj0852a132005-02-21 08:28:46 +00003712 ppIRExpr(e);
3713 vpanic("iselVecExpr_wrk");
3714}
sewardjc33671d2005-02-01 20:30:00 +00003715
3716
3717/*---------------------------------------------------------*/
sewardjc4530ae2012-05-21 10:18:49 +00003718/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
3719/*---------------------------------------------------------*/
3720
sewardj56c30312012-06-12 08:45:39 +00003721static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003722 ISelEnv* env, IRExpr* e )
3723{
3724 iselDVecExpr_wrk( rHi, rLo, env, e );
3725# if 0
3726 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3727# endif
3728 vassert(hregClass(*rHi) == HRcVec128);
3729 vassert(hregClass(*rLo) == HRcVec128);
3730 vassert(hregIsVirtual(*rHi));
3731 vassert(hregIsVirtual(*rLo));
3732}
3733
3734
3735/* DO NOT CALL THIS DIRECTLY */
sewardj56c30312012-06-12 08:45:39 +00003736static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
sewardjc4530ae2012-05-21 10:18:49 +00003737 ISelEnv* env, IRExpr* e )
3738{
sewardjcc3d2192013-03-27 11:37:33 +00003739 HWord fn = 0; /* address of helper fn, if required */
sewardjc4530ae2012-05-21 10:18:49 +00003740 vassert(e);
3741 IRType ty = typeOfIRExpr(env->type_env,e);
3742 vassert(ty == Ity_V256);
3743
sewardj56c30312012-06-12 08:45:39 +00003744 AMD64SseOp op = Asse_INVALID;
3745
sewardjc4530ae2012-05-21 10:18:49 +00003746 /* read 256-bit IRTemp */
3747 if (e->tag == Iex_RdTmp) {
3748 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3749 return;
3750 }
3751
3752 if (e->tag == Iex_Get) {
3753 HReg vHi = newVRegV(env);
3754 HReg vLo = newVRegV(env);
3755 HReg rbp = hregAMD64_RBP();
3756 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp);
3757 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp);
3758 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3759 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3760 *rHi = vHi;
3761 *rLo = vLo;
3762 return;
3763 }
3764
3765 if (e->tag == Iex_Load) {
3766 HReg vHi = newVRegV(env);
3767 HReg vLo = newVRegV(env);
3768 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr);
3769 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
3770 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
3771 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3772 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3773 *rHi = vHi;
3774 *rLo = vLo;
3775 return;
3776 }
3777
sewardj37a505b2012-06-29 15:28:24 +00003778 if (e->tag == Iex_Const) {
3779 vassert(e->Iex.Const.con->tag == Ico_V256);
3780 switch (e->Iex.Const.con->Ico.V256) {
3781 case 0x00000000: {
3782 HReg vHi = generate_zeroes_V128(env);
3783 HReg vLo = newVRegV(env);
3784 addInstr(env, mk_vMOVsd_RR(vHi, vLo));
3785 *rHi = vHi;
3786 *rLo = vLo;
3787 return;
3788 }
3789 default:
3790 break; /* give up. Until such time as is necessary. */
3791 }
3792 }
3793
sewardj2a2bda92012-06-14 23:32:02 +00003794 if (e->tag == Iex_Unop) {
3795 switch (e->Iex.Unop.op) {
3796
3797 case Iop_NotV256: {
3798 HReg argHi, argLo;
3799 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3800 *rHi = do_sse_NotV128(env, argHi);
3801 *rLo = do_sse_NotV128(env, argLo);
3802 return;
3803 }
3804
sewardj1ddee212014-08-24 14:00:19 +00003805 case Iop_RecipEst32Fx8: op = Asse_RCPF; goto do_32Fx8_unary;
3806 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary;
3807 case Iop_RSqrtEst32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary;
sewardj66becf32012-06-18 23:15:16 +00003808 do_32Fx8_unary:
3809 {
3810 HReg argHi, argLo;
3811 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3812 HReg dstHi = newVRegV(env);
3813 HReg dstLo = newVRegV(env);
3814 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi));
3815 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo));
3816 *rHi = dstHi;
3817 *rLo = dstLo;
3818 return;
3819 }
3820
3821 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary;
3822 do_64Fx4_unary:
3823 {
3824 HReg argHi, argLo;
3825 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3826 HReg dstHi = newVRegV(env);
3827 HReg dstLo = newVRegV(env);
3828 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi));
3829 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo));
3830 *rHi = dstHi;
3831 *rLo = dstLo;
3832 return;
3833 }
3834
sewardj23db8a02012-06-25 07:46:18 +00003835 case Iop_CmpNEZ64x4: {
3836 /* We can use SSE2 instructions for this. */
3837 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide
3838 (obviously). See comment on Iop_CmpNEZ64x2 for
3839 explanation of what's going on here. */
3840 HReg argHi, argLo;
3841 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3842 HReg tmpHi = generate_zeroes_V128(env);
3843 HReg tmpLo = newVRegV(env);
3844 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo));
3845 HReg dstHi = newVRegV(env);
3846 HReg dstLo = newVRegV(env);
3847 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi));
3848 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo));
3849 tmpHi = do_sse_NotV128(env, tmpHi);
3850 tmpLo = do_sse_NotV128(env, tmpLo);
3851 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi));
3852 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo));
3853 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi));
3854 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo));
3855 *rHi = dstHi;
3856 *rLo = dstLo;
3857 return;
3858 }
3859
3860 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
sewardjcc3d2192013-03-27 11:37:33 +00003861 case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3862 case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
sewardj23db8a02012-06-25 07:46:18 +00003863 do_CmpNEZ_vector:
3864 {
3865 HReg argHi, argLo;
3866 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3867 HReg tmpHi = newVRegV(env);
3868 HReg tmpLo = newVRegV(env);
3869 HReg zero = generate_zeroes_V128(env);
3870 HReg dstHi, dstLo;
3871 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi));
3872 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo));
3873 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi));
3874 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo));
3875 dstHi = do_sse_NotV128(env, tmpHi);
3876 dstLo = do_sse_NotV128(env, tmpLo);
3877 *rHi = dstHi;
3878 *rLo = dstLo;
3879 return;
3880 }
3881
sewardj2a2bda92012-06-14 23:32:02 +00003882 default:
3883 break;
3884 } /* switch (e->Iex.Unop.op) */
3885 } /* if (e->tag == Iex_Unop) */
3886
sewardj56c30312012-06-12 08:45:39 +00003887 if (e->tag == Iex_Binop) {
3888 switch (e->Iex.Binop.op) {
3889
sewardj8eb7ae82012-06-24 14:00:27 +00003890 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4;
3891 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4;
sewardj56c30312012-06-12 08:45:39 +00003892 do_64Fx4:
3893 {
3894 HReg argLhi, argLlo, argRhi, argRlo;
3895 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3896 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3897 HReg dstHi = newVRegV(env);
3898 HReg dstLo = newVRegV(env);
3899 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3900 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3901 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
3902 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
3903 *rHi = dstHi;
3904 *rLo = dstLo;
3905 return;
3906 }
3907
sewardj8eb7ae82012-06-24 14:00:27 +00003908 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8;
3909 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8;
sewardj56c30312012-06-12 08:45:39 +00003910 do_32Fx8:
3911 {
3912 HReg argLhi, argLlo, argRhi, argRlo;
3913 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3914 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3915 HReg dstHi = newVRegV(env);
3916 HReg dstLo = newVRegV(env);
3917 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3918 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3919 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
3920 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
3921 *rHi = dstHi;
3922 *rLo = dstLo;
3923 return;
3924 }
3925
sewardj4b1cc832012-06-13 11:10:20 +00003926 case Iop_AndV256: op = Asse_AND; goto do_SseReRg;
sewardj2a2bda92012-06-14 23:32:02 +00003927 case Iop_OrV256: op = Asse_OR; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003928 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg;
sewardjcc3d2192013-03-27 11:37:33 +00003929 case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg;
3930 case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg;
3931 case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg;
3932 case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg;
3933 case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg;
3934 case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg;
3935 case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg;
3936 case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg;
3937 case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg;
3938 case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg;
3939 case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg;
3940 case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg;
3941 case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg;
3942 case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg;
3943 case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg;
3944 case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg;
3945 case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg;
3946 case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg;
3947 case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg;
3948 case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg;
3949 case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg;
3950 case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg;
3951 case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg;
3952 case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg;
3953 case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg;
3954 case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg;
3955 case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg;
3956 case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg;
3957 case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg;
3958 case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg;
3959 case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003960 do_SseReRg:
3961 {
3962 HReg argLhi, argLlo, argRhi, argRlo;
3963 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3964 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3965 HReg dstHi = newVRegV(env);
3966 HReg dstLo = newVRegV(env);
3967 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3968 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3969 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi));
3970 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo));
3971 *rHi = dstHi;
3972 *rLo = dstLo;
3973 return;
3974 }
3975
sewardjcc3d2192013-03-27 11:37:33 +00003976 case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift;
3977 case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift;
3978 case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift;
3979 case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift;
3980 case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift;
3981 case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift;
3982 case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift;
3983 case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift;
3984 do_SseShift: {
3985 HReg gregHi, gregLo;
3986 iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1);
3987 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3988 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3989 HReg ereg = newVRegV(env);
3990 HReg dstHi = newVRegV(env);
3991 HReg dstLo = newVRegV(env);
3992 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3993 addInstr(env, AMD64Instr_Push(rmi));
3994 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3995 addInstr(env, mk_vMOVsd_RR(gregHi, dstHi));
3996 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi));
3997 addInstr(env, mk_vMOVsd_RR(gregLo, dstLo));
3998 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo));
3999 add_to_rsp(env, 16);
4000 *rHi = dstHi;
4001 *rLo = dstLo;
4002 return;
4003 }
4004
sewardj4b1cc832012-06-13 11:10:20 +00004005 case Iop_V128HLtoV256: {
4006 *rHi = iselVecExpr(env, e->Iex.Binop.arg1);
4007 *rLo = iselVecExpr(env, e->Iex.Binop.arg2);
4008 return;
4009 }
4010
sewardjcc3d2192013-03-27 11:37:33 +00004011 case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4;
4012 goto do_SseAssistedBinary;
4013 case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4;
4014 goto do_SseAssistedBinary;
4015 case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4;
4016 goto do_SseAssistedBinary;
4017 case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4;
4018 goto do_SseAssistedBinary;
4019 case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4;
4020 goto do_SseAssistedBinary;
4021 case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8;
4022 goto do_SseAssistedBinary;
4023 case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8;
4024 goto do_SseAssistedBinary;
4025 case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16;
4026 goto do_SseAssistedBinary;
4027 case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16;
4028 goto do_SseAssistedBinary;
4029 case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2;
4030 goto do_SseAssistedBinary;
4031 case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2;
4032 goto do_SseAssistedBinary;
4033 do_SseAssistedBinary: {
4034 /* RRRufff! RRRufff code is what we're generating here. Oh
4035 well. */
4036 vassert(fn != 0);
4037 HReg dstHi = newVRegV(env);
4038 HReg dstLo = newVRegV(env);
4039 HReg argLhi, argLlo, argRhi, argRlo;
4040 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4041 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4042 HReg argp = newVRegI(env);
4043 /* subq $160, %rsp -- make a space*/
4044 sub_from_rsp(env, 160);
4045 /* leaq 48(%rsp), %r_argp -- point into it */
4046 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4047 argp));
4048 /* andq $-16, %r_argp -- 16-align the pointer */
4049 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4050 AMD64RMI_Imm( ~(UInt)15 ),
4051 argp));
4052 /* Prepare 3 arg regs:
4053 leaq 0(%r_argp), %rdi
4054 leaq 16(%r_argp), %rsi
4055 leaq 32(%r_argp), %rdx
4056 */
4057 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4058 hregAMD64_RDI()));
4059 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
4060 hregAMD64_RSI()));
4061 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4062 hregAMD64_RDX()));
4063 /* Store the two high args, at (%rsi) and (%rdx):
4064 movupd %argLhi, 0(%rsi)
4065 movupd %argRhi, 0(%rdx)
4066 */
4067 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4068 AMD64AMode_IR(0, hregAMD64_RSI())));
4069 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4070 AMD64AMode_IR(0, hregAMD64_RDX())));
4071 /* Store the two low args, at 48(%rsi) and 48(%rdx):
4072 movupd %argLlo, 48(%rsi)
4073 movupd %argRlo, 48(%rdx)
4074 */
4075 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4076 AMD64AMode_IR(48, hregAMD64_RSI())));
4077 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4078 AMD64AMode_IR(48, hregAMD64_RDX())));
4079 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004080 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4081 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004082 /* Prepare 3 arg regs:
4083 leaq 48(%r_argp), %rdi
4084 leaq 64(%r_argp), %rsi
4085 leaq 80(%r_argp), %rdx
4086 */
4087 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp),
4088 hregAMD64_RDI()));
4089 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4090 hregAMD64_RSI()));
4091 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp),
4092 hregAMD64_RDX()));
4093 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004094 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4095 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004096 /* fetch the result from memory, using %r_argp, which the
4097 register allocator will keep alive across the call. */
4098 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4099 AMD64AMode_IR(0, argp)));
4100 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4101 AMD64AMode_IR(48, argp)));
4102 /* and finally, clear the space */
4103 add_to_rsp(env, 160);
4104 *rHi = dstHi;
4105 *rLo = dstLo;
4106 return;
4107 }
4108
4109 case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8;
4110 goto do_SseAssistedBinary256;
4111 do_SseAssistedBinary256: {
4112 /* RRRufff! RRRufff code is what we're generating here. Oh
4113 well. */
4114 vassert(fn != 0);
4115 HReg dstHi = newVRegV(env);
4116 HReg dstLo = newVRegV(env);
4117 HReg argLhi, argLlo, argRhi, argRlo;
4118 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4119 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4120 HReg argp = newVRegI(env);
4121 /* subq $160, %rsp -- make a space*/
4122 sub_from_rsp(env, 160);
4123 /* leaq 48(%rsp), %r_argp -- point into it */
4124 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4125 argp));
4126 /* andq $-16, %r_argp -- 16-align the pointer */
4127 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4128 AMD64RMI_Imm( ~(UInt)15 ),
4129 argp));
4130 /* Prepare 3 arg regs:
4131 leaq 0(%r_argp), %rdi
4132 leaq 32(%r_argp), %rsi
4133 leaq 64(%r_argp), %rdx
4134 */
4135 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4136 hregAMD64_RDI()));
4137 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4138 hregAMD64_RSI()));
4139 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4140 hregAMD64_RDX()));
4141 /* Store the two args, at (%rsi) and (%rdx):
4142 movupd %argLlo, 0(%rsi)
4143 movupd %argLhi, 16(%rsi)
4144 movupd %argRlo, 0(%rdx)
4145 movupd %argRhi, 16(%rdx)
4146 */
4147 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4148 AMD64AMode_IR(0, hregAMD64_RSI())));
4149 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4150 AMD64AMode_IR(16, hregAMD64_RSI())));
4151 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4152 AMD64AMode_IR(0, hregAMD64_RDX())));
4153 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4154 AMD64AMode_IR(16, hregAMD64_RDX())));
4155 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004156 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4157 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004158 /* fetch the result from memory, using %r_argp, which the
4159 register allocator will keep alive across the call. */
4160 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4161 AMD64AMode_IR(0, argp)));
4162 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4163 AMD64AMode_IR(16, argp)));
4164 /* and finally, clear the space */
4165 add_to_rsp(env, 160);
4166 *rHi = dstHi;
4167 *rLo = dstLo;
4168 return;
4169 }
4170
sewardj56c30312012-06-12 08:45:39 +00004171 default:
4172 break;
4173 } /* switch (e->Iex.Binop.op) */
4174 } /* if (e->tag == Iex_Binop) */
4175
sewardj9571dc02014-01-26 18:34:23 +00004176 if (e->tag == Iex_Triop) {
4177 IRTriop *triop = e->Iex.Triop.details;
4178 switch (triop->op) {
4179
4180 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm;
4181 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm;
4182 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm;
4183 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm;
4184 do_64Fx4_w_rm:
4185 {
4186 HReg argLhi, argLlo, argRhi, argRlo;
4187 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
4188 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
4189 HReg dstHi = newVRegV(env);
4190 HReg dstLo = newVRegV(env);
4191 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
4192 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
4193 /* XXXROUNDINGFIXME */
4194 /* set roundingmode here */
4195 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
4196 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
4197 *rHi = dstHi;
4198 *rLo = dstLo;
4199 return;
4200 }
4201
4202 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm;
4203 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm;
4204 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm;
4205 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm;
4206 do_32Fx8_w_rm:
4207 {
4208 HReg argLhi, argLlo, argRhi, argRlo;
4209 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
4210 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
4211 HReg dstHi = newVRegV(env);
4212 HReg dstLo = newVRegV(env);
4213 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
4214 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
4215 /* XXXROUNDINGFIXME */
4216 /* set roundingmode here */
4217 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
4218 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
4219 *rHi = dstHi;
4220 *rLo = dstLo;
4221 return;
4222 }
4223
4224 default:
4225 break;
4226 } /* switch (triop->op) */
4227 } /* if (e->tag == Iex_Triop) */
4228
4229
florian96d7cc32012-06-01 20:41:24 +00004230 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) {
sewardjc4530ae2012-05-21 10:18:49 +00004231 HReg rsp = hregAMD64_RSP();
4232 HReg vHi = newVRegV(env);
4233 HReg vLo = newVRegV(env);
4234 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
4235 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
4236 /* arg1 is the most significant (Q3), arg4 the least (Q0) */
4237 /* Get all the args into regs, before messing with the stack. */
florian96d7cc32012-06-01 20:41:24 +00004238 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1);
4239 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2);
4240 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3);
4241 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4);
sewardjc4530ae2012-05-21 10:18:49 +00004242 /* less significant lane (Q2) at the lower address (-16(rsp)) */
4243 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp));
4244 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp));
4245 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp));
4246 /* and then the lower half .. */
4247 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp));
4248 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp));
4249 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp));
4250 *rHi = vHi;
4251 *rLo = vLo;
4252 return;
4253 }
4254
sewardjcc3d2192013-03-27 11:37:33 +00004255 if (e->tag == Iex_ITE) {
4256 HReg r1Hi, r1Lo, r0Hi, r0Lo;
4257 iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue);
4258 iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse);
4259 HReg dstHi = newVRegV(env);
4260 HReg dstLo = newVRegV(env);
4261 addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi));
4262 addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo));
4263 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
4264 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi));
4265 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo));
4266 *rHi = dstHi;
4267 *rLo = dstLo;
4268 return;
4269 }
4270
sewardjc4530ae2012-05-21 10:18:49 +00004271 //avx_fail:
4272 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n",
4273 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
4274 ppIRExpr(e);
4275 vpanic("iselDVecExpr_wrk");
4276}
4277
4278
4279/*---------------------------------------------------------*/
sewardjc33671d2005-02-01 20:30:00 +00004280/*--- ISEL: Statements ---*/
4281/*---------------------------------------------------------*/
4282
4283static void iselStmt ( ISelEnv* env, IRStmt* stmt )
4284{
4285 if (vex_traceflags & VEX_TRACE_VCODE) {
4286 vex_printf("\n-- ");
4287 ppIRStmt(stmt);
4288 vex_printf("\n");
4289 }
4290
4291 switch (stmt->tag) {
4292
sewardjbdea5502015-01-27 23:17:02 +00004293 /* --------- LOADG (guarded load) --------- */
4294 case Ist_LoadG: {
4295 IRLoadG* lg = stmt->Ist.LoadG.details;
4296 if (lg->end != Iend_LE)
4297 goto stmt_fail;
4298
4299 UChar szB = 0; /* invalid */
4300 switch (lg->cvt) {
4301 case ILGop_Ident32: szB = 4; break;
4302 case ILGop_Ident64: szB = 8; break;
4303 default: break;
4304 }
4305 if (szB == 0)
4306 goto stmt_fail;
4307
4308 AMD64AMode* amAddr = iselIntExpr_AMode(env, lg->addr);
4309 HReg rAlt = iselIntExpr_R(env, lg->alt);
4310 HReg rDst = lookupIRTemp(env, lg->dst);
4311 /* Get the alt value into the dst. We'll do a conditional load
4312 which overwrites it -- or not -- with loaded data. */
4313 addInstr(env, mk_iMOVsd_RR(rAlt, rDst));
4314 AMD64CondCode cc = iselCondCode(env, lg->guard);
4315 addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst));
4316 return;
4317 }
4318
sewardj6f1ec582015-01-28 10:52:36 +00004319 /* --------- STOREG (guarded store) --------- */
4320 case Ist_StoreG: {
4321 IRStoreG* sg = stmt->Ist.StoreG.details;
4322 if (sg->end != Iend_LE)
4323 goto stmt_fail;
4324
4325 UChar szB = 0; /* invalid */
4326 switch (typeOfIRExpr(env->type_env, sg->data)) {
4327 case Ity_I32: szB = 4; break;
4328 case Ity_I64: szB = 8; break;
4329 default: break;
4330 }
4331 if (szB == 0)
4332 goto stmt_fail;
4333
4334 AMD64AMode* amAddr = iselIntExpr_AMode(env, sg->addr);
4335 HReg rSrc = iselIntExpr_R(env, sg->data);
4336 AMD64CondCode cc = iselCondCode(env, sg->guard);
4337 addInstr(env, AMD64Instr_CStore(cc, szB, rSrc, amAddr));
4338 return;
4339 }
4340
sewardj05b3b6a2005-02-04 01:44:33 +00004341 /* --------- STORE --------- */
sewardjaf1ceca2005-06-30 23:31:27 +00004342 case Ist_Store: {
sewardje9d8a262009-07-01 08:06:34 +00004343 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
4344 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
4345 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00004346
sewardje768e922009-11-26 17:17:37 +00004347 if (tya != Ity_I64 || end != Iend_LE)
sewardjaf1ceca2005-06-30 23:31:27 +00004348 goto stmt_fail;
4349
sewardj31191072005-02-05 18:24:47 +00004350 if (tyd == Ity_I64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004351 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004352 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
sewardj31191072005-02-05 18:24:47 +00004353 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
4354 return;
4355 }
sewardj05b3b6a2005-02-04 01:44:33 +00004356 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004357 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004358 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
sewardj428fabd2005-03-21 03:11:17 +00004359 addInstr(env, AMD64Instr_Store(
4360 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
4361 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00004362 return;
4363 }
sewardj8d965312005-02-25 02:48:47 +00004364 if (tyd == Ity_F64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004365 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004366 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
sewardj8d965312005-02-25 02:48:47 +00004367 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
4368 return;
4369 }
sewardjc49ce232005-02-25 13:03:03 +00004370 if (tyd == Ity_F32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004371 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004372 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
sewardjc49ce232005-02-25 13:03:03 +00004373 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
4374 return;
4375 }
sewardj0852a132005-02-21 08:28:46 +00004376 if (tyd == Ity_V128) {
sewardjbf0d86c2007-11-26 23:18:52 +00004377 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004378 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
sewardj18303862005-02-21 12:36:54 +00004379 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00004380 return;
4381 }
sewardjc4530ae2012-05-21 10:18:49 +00004382 if (tyd == Ity_V256) {
4383 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
4384 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
4385 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
4386 HReg vHi, vLo;
4387 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data);
4388 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4389 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4390 return;
4391 }
sewardj0852a132005-02-21 08:28:46 +00004392 break;
sewardj05b3b6a2005-02-04 01:44:33 +00004393 }
sewardjf67eadf2005-02-03 03:53:52 +00004394
4395 /* --------- PUT --------- */
4396 case Ist_Put: {
4397 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4398 if (ty == Ity_I64) {
4399 /* We're going to write to memory, so compute the RHS into an
4400 AMD64RI. */
4401 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
4402 addInstr(env,
4403 AMD64Instr_Alu64M(
4404 Aalu_MOV,
4405 ri,
4406 AMD64AMode_IR(stmt->Ist.Put.offset,
4407 hregAMD64_RBP())
4408 ));
4409 return;
4410 }
sewardjf67eadf2005-02-03 03:53:52 +00004411 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
4412 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4413 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00004414 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00004415 r,
4416 AMD64AMode_IR(stmt->Ist.Put.offset,
4417 hregAMD64_RBP())));
4418 return;
4419 }
sewardj8d965312005-02-25 02:48:47 +00004420 if (ty == Ity_F32) {
4421 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4422 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
4423 set_SSE_rounding_default(env); /* paranoia */
4424 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
4425 return;
4426 }
sewardj1a01e652005-02-23 11:39:21 +00004427 if (ty == Ity_F64) {
4428 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4429 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
4430 hregAMD64_RBP() );
4431 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
4432 return;
4433 }
sewardjc4530ae2012-05-21 10:18:49 +00004434 if (ty == Ity_V128) {
4435 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4436 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
4437 hregAMD64_RBP());
4438 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
4439 return;
4440 }
4441 if (ty == Ity_V256) {
4442 HReg vHi, vLo;
4443 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data);
4444 HReg rbp = hregAMD64_RBP();
4445 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp);
4446 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp);
4447 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4448 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4449 return;
4450 }
sewardjf67eadf2005-02-03 03:53:52 +00004451 break;
4452 }
4453
sewardj8d965312005-02-25 02:48:47 +00004454 /* --------- Indexed PUT --------- */
4455 case Ist_PutI: {
floriand6f38b32012-05-31 15:46:18 +00004456 IRPutI *puti = stmt->Ist.PutI.details;
4457
sewardj8d965312005-02-25 02:48:47 +00004458 AMD64AMode* am
4459 = genGuestArrayOffset(
floriand6f38b32012-05-31 15:46:18 +00004460 env, puti->descr,
4461 puti->ix, puti->bias );
sewardj8d965312005-02-25 02:48:47 +00004462
floriand6f38b32012-05-31 15:46:18 +00004463 IRType ty = typeOfIRExpr(env->type_env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004464 if (ty == Ity_F64) {
floriand6f38b32012-05-31 15:46:18 +00004465 HReg val = iselDblExpr(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004466 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
4467 return;
4468 }
4469 if (ty == Ity_I8) {
floriand6f38b32012-05-31 15:46:18 +00004470 HReg r = iselIntExpr_R(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004471 addInstr(env, AMD64Instr_Store( 1, r, am ));
4472 return;
4473 }
sewardj1e015d82005-04-23 23:41:46 +00004474 if (ty == Ity_I64) {
floriand6f38b32012-05-31 15:46:18 +00004475 AMD64RI* ri = iselIntExpr_RI(env, puti->data);
sewardj1e015d82005-04-23 23:41:46 +00004476 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
4477 return;
4478 }
sewardj8d965312005-02-25 02:48:47 +00004479 break;
4480 }
sewardj614b3fb2005-02-02 02:16:03 +00004481
4482 /* --------- TMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +00004483 case Ist_WrTmp: {
4484 IRTemp tmp = stmt->Ist.WrTmp.tmp;
sewardj614b3fb2005-02-02 02:16:03 +00004485 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj6ce1a232007-03-31 19:12:38 +00004486
4487 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
4488 compute it into an AMode and then use LEA. This usually
4489 produces fewer instructions, often because (for memcheck
4490 created IR) we get t = address-expression, (t is later used
4491 twice) and so doing this naturally turns address-expression
4492 back into an AMD64 amode. */
4493 if (ty == Ity_I64
4494 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4495 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
4496 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4497 HReg dst = lookupIRTemp(env, tmp);
4498 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
4499 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4500 value into a register. Just emit a normal reg-reg move
4501 so reg-alloc can coalesce it away in the usual way. */
4502 HReg src = am->Aam.IR.reg;
4503 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
4504 } else {
4505 addInstr(env, AMD64Instr_Lea64(am,dst));
4506 }
4507 return;
4508 }
4509
sewardj9b967672005-02-08 11:13:09 +00004510 if (ty == Ity_I64 || ty == Ity_I32
4511 || ty == Ity_I16 || ty == Ity_I8) {
sewardjdd40fdf2006-12-24 02:20:24 +00004512 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
sewardj614b3fb2005-02-02 02:16:03 +00004513 HReg dst = lookupIRTemp(env, tmp);
4514 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
4515 return;
4516 }
sewardj9b967672005-02-08 11:13:09 +00004517 if (ty == Ity_I128) {
4518 HReg rHi, rLo, dstHi, dstLo;
sewardjdd40fdf2006-12-24 02:20:24 +00004519 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
sewardjc4530ae2012-05-21 10:18:49 +00004520 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
sewardj9b967672005-02-08 11:13:09 +00004521 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4522 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4523 return;
4524 }
sewardja5bd0af2005-03-24 20:40:12 +00004525 if (ty == Ity_I1) {
sewardjdd40fdf2006-12-24 02:20:24 +00004526 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
sewardja5bd0af2005-03-24 20:40:12 +00004527 HReg dst = lookupIRTemp(env, tmp);
4528 addInstr(env, AMD64Instr_Set64(cond, dst));
4529 return;
4530 }
sewardj18303862005-02-21 12:36:54 +00004531 if (ty == Ity_F64) {
4532 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004533 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004534 addInstr(env, mk_vMOVsd_RR(src, dst));
4535 return;
4536 }
sewardjc49ce232005-02-25 13:03:03 +00004537 if (ty == Ity_F32) {
4538 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004539 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
sewardjc49ce232005-02-25 13:03:03 +00004540 addInstr(env, mk_vMOVsd_RR(src, dst));
4541 return;
4542 }
sewardj0852a132005-02-21 08:28:46 +00004543 if (ty == Ity_V128) {
4544 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004545 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004546 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00004547 return;
4548 }
sewardjc4530ae2012-05-21 10:18:49 +00004549 if (ty == Ity_V256) {
4550 HReg rHi, rLo, dstHi, dstLo;
4551 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4552 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4553 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) );
4554 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) );
4555 return;
4556 }
sewardj614b3fb2005-02-02 02:16:03 +00004557 break;
4558 }
4559
sewardjd0a12df2005-02-10 02:07:43 +00004560 /* --------- Call to DIRTY helper --------- */
4561 case Ist_Dirty: {
sewardjd0a12df2005-02-10 02:07:43 +00004562 IRDirty* d = stmt->Ist.Dirty.details;
sewardjd0a12df2005-02-10 02:07:43 +00004563
sewardjcfe046e2013-01-17 14:23:53 +00004564 /* Figure out the return type, if any. */
4565 IRType retty = Ity_INVALID;
4566 if (d->tmp != IRTemp_INVALID)
4567 retty = typeOfIRTemp(env->type_env, d->tmp);
4568
sewardj74142b82013-08-08 10:28:59 +00004569 /* Throw out any return types we don't know about. */
4570 Bool retty_ok = False;
sewardjcfe046e2013-01-17 14:23:53 +00004571 switch (retty) {
4572 case Ity_INVALID: /* function doesn't return anything */
sewardj74142b82013-08-08 10:28:59 +00004573 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj82cc37c2013-08-16 08:32:15 +00004574 case Ity_V128: case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +00004575 retty_ok = True; break;
sewardjcfe046e2013-01-17 14:23:53 +00004576 default:
4577 break;
4578 }
sewardj74142b82013-08-08 10:28:59 +00004579 if (!retty_ok)
sewardjcfe046e2013-01-17 14:23:53 +00004580 break; /* will go to stmt_fail: */
4581
sewardj74142b82013-08-08 10:28:59 +00004582 /* Marshal args, do the call, and set the return value to
4583 0x555..555 if this is a conditional call that returns a value
4584 and the call is skipped. */
4585 UInt addToSp = 0;
4586 RetLoc rloc = mk_RetLoc_INVALID();
4587 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4588 vassert(is_sane_RetLoc(rloc));
sewardjd0a12df2005-02-10 02:07:43 +00004589
4590 /* Now figure out what to do with the returned value, if any. */
sewardj74142b82013-08-08 10:28:59 +00004591 switch (retty) {
4592 case Ity_INVALID: {
4593 /* No return value. Nothing to do. */
4594 vassert(d->tmp == IRTemp_INVALID);
4595 vassert(rloc.pri == RLPri_None);
4596 vassert(addToSp == 0);
4597 return;
4598 }
4599 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4600 /* The returned value is in %rax. Park it in the register
4601 associated with tmp. */
4602 vassert(rloc.pri == RLPri_Int);
4603 vassert(addToSp == 0);
4604 HReg dst = lookupIRTemp(env, d->tmp);
4605 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
4606 return;
4607 }
4608 case Ity_V128: {
sewardj54eea4e2013-09-02 13:17:49 +00004609 /* The returned value is on the stack, and rloc.spOff
4610 tells us where. Fish it off the stack and then move
4611 the stack pointer upwards to clear it, as directed by
sewardj74142b82013-08-08 10:28:59 +00004612 doHelperCall. */
4613 vassert(rloc.pri == RLPri_V128SpRel);
4614 vassert(addToSp >= 16);
4615 HReg dst = lookupIRTemp(env, d->tmp);
4616 AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4617 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
4618 add_to_rsp(env, addToSp);
4619 return;
4620 }
sewardj82cc37c2013-08-16 08:32:15 +00004621 case Ity_V256: {
4622 /* See comments for Ity_V128. */
4623 vassert(rloc.pri == RLPri_V256SpRel);
4624 vassert(addToSp >= 32);
4625 HReg dstLo, dstHi;
4626 lookupIRTempPair(&dstHi, &dstLo, env, d->tmp);
4627 AMD64AMode* amLo = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4628 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstLo, amLo ));
4629 AMD64AMode* amHi = AMD64AMode_IR(rloc.spOff+16, hregAMD64_RSP());
4630 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstHi, amHi ));
4631 add_to_rsp(env, addToSp);
4632 return;
4633 }
sewardj74142b82013-08-08 10:28:59 +00004634 default:
4635 /*NOTREACHED*/
4636 vassert(0);
sewardjd0a12df2005-02-10 02:07:43 +00004637 }
4638 break;
4639 }
4640
4641 /* --------- MEM FENCE --------- */
sewardjc4356f02007-11-09 21:15:04 +00004642 case Ist_MBE:
4643 switch (stmt->Ist.MBE.event) {
4644 case Imbe_Fence:
4645 addInstr(env, AMD64Instr_MFence());
4646 return;
sewardjc4356f02007-11-09 21:15:04 +00004647 default:
4648 break;
4649 }
4650 break;
sewardjf8c37f72005-02-07 18:55:29 +00004651
sewardje9d8a262009-07-01 08:06:34 +00004652 /* --------- ACAS --------- */
4653 case Ist_CAS:
4654 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4655 /* "normal" singleton CAS */
4656 UChar sz;
4657 IRCAS* cas = stmt->Ist.CAS.details;
4658 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4659 /* get: cas->expd into %rax, and cas->data into %rbx */
4660 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4661 HReg rData = iselIntExpr_R(env, cas->dataLo);
4662 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4663 HReg rOld = lookupIRTemp(env, cas->oldLo);
4664 vassert(cas->expdHi == NULL);
4665 vassert(cas->dataHi == NULL);
4666 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
4667 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
4668 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
4669 switch (ty) {
4670 case Ity_I64: sz = 8; break;
4671 case Ity_I32: sz = 4; break;
4672 case Ity_I16: sz = 2; break;
4673 case Ity_I8: sz = 1; break;
4674 default: goto unhandled_cas;
4675 }
4676 addInstr(env, AMD64Instr_ACAS(am, sz));
sewardje357c672015-01-27 23:35:58 +00004677 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOld));
sewardje9d8a262009-07-01 08:06:34 +00004678 return;
4679 } else {
4680 /* double CAS */
4681 UChar sz;
4682 IRCAS* cas = stmt->Ist.CAS.details;
4683 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4684 /* only 32-bit and 64-bit allowed in this case */
4685 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4686 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4687 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4688 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4689 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4690 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4691 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4692 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4693 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4694 switch (ty) {
4695 case Ity_I64:
4696 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
4697 goto unhandled_cas; /* we'd have to generate
4698 cmpxchg16b, but the host
4699 doesn't support that */
4700 sz = 8;
4701 break;
4702 case Ity_I32:
4703 sz = 4;
4704 break;
4705 default:
4706 goto unhandled_cas;
4707 }
4708 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4709 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4710 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
4711 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
4712 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
4713 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
4714 addInstr(env, AMD64Instr_DACAS(am, sz));
sewardje357c672015-01-27 23:35:58 +00004715 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RDX(), rOldHi));
4716 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOldLo));
sewardje9d8a262009-07-01 08:06:34 +00004717 return;
4718 }
4719 unhandled_cas:
4720 break;
4721
sewardjd20b2902005-03-22 00:15:00 +00004722 /* --------- INSTR MARK --------- */
4723 /* Doesn't generate any executable code ... */
4724 case Ist_IMark:
4725 return;
4726
sewardj5a9ffab2005-05-12 17:55:01 +00004727 /* --------- ABI HINT --------- */
4728 /* These have no meaning (denotation in the IR) and so we ignore
4729 them ... if any actually made it this far. */
4730 case Ist_AbiHint:
4731 return;
4732
sewardjd20b2902005-03-22 00:15:00 +00004733 /* --------- NO-OP --------- */
4734 case Ist_NoOp:
4735 return;
4736
sewardjf8c37f72005-02-07 18:55:29 +00004737 /* --------- EXIT --------- */
4738 case Ist_Exit: {
sewardjf8c37f72005-02-07 18:55:29 +00004739 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4740 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00004741
4742 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4743 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
4744 hregAMD64_RBP());
4745
4746 /* Case: boring transfer to known address */
4747 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4748 if (env->chainingAllowed) {
4749 /* .. almost always true .. */
4750 /* Skip the event check at the dst if this is a forwards
4751 edge. */
4752 Bool toFastEP
4753 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4754 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4755 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4756 amRIP, cc, toFastEP));
4757 } else {
4758 /* .. very occasionally .. */
4759 /* We can't use chaining, so ask for an assisted transfer,
4760 as that's the only alternative that is allowable. */
4761 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4762 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
4763 }
4764 return;
4765 }
4766
4767 /* Case: assisted transfer to arbitrary address */
4768 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004769 /* Keep this list in sync with that in iselNext below */
4770 case Ijk_ClientReq:
4771 case Ijk_EmWarn:
4772 case Ijk_NoDecode:
4773 case Ijk_NoRedir:
4774 case Ijk_SigSEGV:
4775 case Ijk_SigTRAP:
4776 case Ijk_Sys_syscall:
sewardj3e5d82d2015-07-21 14:43:23 +00004777 case Ijk_Sys_int210:
sewardj05f5e012014-05-04 10:52:11 +00004778 case Ijk_InvalICache:
sewardj2f6902b2012-04-23 09:48:14 +00004779 case Ijk_Yield:
4780 {
sewardjc6f970f2012-04-02 21:54:49 +00004781 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4782 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
4783 return;
4784 }
4785 default:
4786 break;
4787 }
4788
4789 /* Do we ever expect to see any other kind? */
4790 goto stmt_fail;
sewardjf8c37f72005-02-07 18:55:29 +00004791 }
sewardjc33671d2005-02-01 20:30:00 +00004792
4793 default: break;
4794 }
sewardjaf1ceca2005-06-30 23:31:27 +00004795 stmt_fail:
sewardjc33671d2005-02-01 20:30:00 +00004796 ppIRStmt(stmt);
4797 vpanic("iselStmt(amd64)");
4798}
4799
4800
4801/*---------------------------------------------------------*/
4802/*--- ISEL: Basic block terminators (Nexts) ---*/
4803/*---------------------------------------------------------*/
4804
sewardjc6f970f2012-04-02 21:54:49 +00004805static void iselNext ( ISelEnv* env,
4806 IRExpr* next, IRJumpKind jk, Int offsIP )
sewardjf67eadf2005-02-03 03:53:52 +00004807{
sewardjf67eadf2005-02-03 03:53:52 +00004808 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00004809 vex_printf( "\n-- PUT(%d) = ", offsIP);
4810 ppIRExpr( next );
4811 vex_printf( "; exit-");
sewardjf67eadf2005-02-03 03:53:52 +00004812 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00004813 vex_printf( "\n");
sewardjf67eadf2005-02-03 03:53:52 +00004814 }
sewardjc6f970f2012-04-02 21:54:49 +00004815
4816 /* Case: boring transfer to known address */
4817 if (next->tag == Iex_Const) {
4818 IRConst* cdst = next->Iex.Const.con;
4819 vassert(cdst->tag == Ico_U64);
4820 if (jk == Ijk_Boring || jk == Ijk_Call) {
4821 /* Boring transfer to known address */
4822 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4823 if (env->chainingAllowed) {
4824 /* .. almost always true .. */
4825 /* Skip the event check at the dst if this is a forwards
4826 edge. */
4827 Bool toFastEP
4828 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4829 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4830 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
4831 amRIP, Acc_ALWAYS,
4832 toFastEP));
4833 } else {
4834 /* .. very occasionally .. */
4835 /* We can't use chaining, so ask for an indirect transfer,
4836 as that's the cheapest alternative that is
4837 allowable. */
4838 HReg r = iselIntExpr_R(env, next);
4839 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4840 Ijk_Boring));
4841 }
4842 return;
4843 }
4844 }
4845
4846 /* Case: call/return (==boring) transfer to any address */
4847 switch (jk) {
4848 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4849 HReg r = iselIntExpr_R(env, next);
4850 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4851 if (env->chainingAllowed) {
4852 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
4853 } else {
4854 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4855 Ijk_Boring));
4856 }
4857 return;
4858 }
4859 default:
4860 break;
4861 }
4862
sewardj2f6902b2012-04-23 09:48:14 +00004863 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00004864 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004865 /* Keep this list in sync with that for Ist_Exit above */
4866 case Ijk_ClientReq:
4867 case Ijk_EmWarn:
sewardj3d0e38e2012-04-21 07:38:29 +00004868 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00004869 case Ijk_NoRedir:
4870 case Ijk_SigSEGV:
4871 case Ijk_SigTRAP:
4872 case Ijk_Sys_syscall:
sewardj3e5d82d2015-07-21 14:43:23 +00004873 case Ijk_Sys_int210:
sewardj05f5e012014-05-04 10:52:11 +00004874 case Ijk_InvalICache:
sewardj2f6902b2012-04-23 09:48:14 +00004875 case Ijk_Yield: {
sewardjc6f970f2012-04-02 21:54:49 +00004876 HReg r = iselIntExpr_R(env, next);
4877 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4878 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
4879 return;
4880 }
4881 default:
4882 break;
4883 }
4884
4885 vex_printf( "\n-- PUT(%d) = ", offsIP);
4886 ppIRExpr( next );
4887 vex_printf( "; exit-");
4888 ppIRJumpKind(jk);
4889 vex_printf( "\n");
4890 vassert(0); // are we expecting any other kind?
sewardjc33671d2005-02-01 20:30:00 +00004891}
4892
4893
4894/*---------------------------------------------------------*/
4895/*--- Insn selector top-level ---*/
4896/*---------------------------------------------------------*/
4897
sewardjdd40fdf2006-12-24 02:20:24 +00004898/* Translate an entire SB to amd64 code. */
sewardjc33671d2005-02-01 20:30:00 +00004899
floriancacba8e2014-12-15 18:58:07 +00004900HInstrArray* iselSB_AMD64 ( const IRSB* bb,
sewardjc6f970f2012-04-02 21:54:49 +00004901 VexArch arch_host,
floriand8c64e02014-10-08 08:54:44 +00004902 const VexArchInfo* archinfo_host,
4903 const VexAbiInfo* vbi/*UNUSED*/,
sewardjc6f970f2012-04-02 21:54:49 +00004904 Int offs_Host_EvC_Counter,
4905 Int offs_Host_EvC_FailAddr,
4906 Bool chainingAllowed,
4907 Bool addProfInc,
floriandcd6d232015-01-02 17:32:21 +00004908 Addr max_ga )
sewardjc33671d2005-02-01 20:30:00 +00004909{
sewardjc6f970f2012-04-02 21:54:49 +00004910 Int i, j;
4911 HReg hreg, hregHI;
4912 ISelEnv* env;
4913 UInt hwcaps_host = archinfo_host->hwcaps;
4914 AMD64AMode *amCounter, *amFailAddr;
sewardjc33671d2005-02-01 20:30:00 +00004915
4916 /* sanity ... */
sewardj8f073592006-05-01 02:14:17 +00004917 vassert(arch_host == VexArchAMD64);
sewardj536fbab2010-07-29 15:39:05 +00004918 vassert(0 == (hwcaps_host
4919 & ~(VEX_HWCAPS_AMD64_SSE3
4920 | VEX_HWCAPS_AMD64_CX16
sewardjf350a422012-04-26 14:16:52 +00004921 | VEX_HWCAPS_AMD64_LZCNT
sewardj818c7302013-03-26 13:53:18 +00004922 | VEX_HWCAPS_AMD64_AVX
sewardjcc3d2192013-03-27 11:37:33 +00004923 | VEX_HWCAPS_AMD64_RDTSCP
4924 | VEX_HWCAPS_AMD64_BMI
4925 | VEX_HWCAPS_AMD64_AVX2)));
sewardjc33671d2005-02-01 20:30:00 +00004926
sewardj9b769162014-07-24 12:42:03 +00004927 /* Check that the host's endianness is as expected. */
4928 vassert(archinfo_host->endness == VexEndnessLE);
4929
sewardjc33671d2005-02-01 20:30:00 +00004930 /* Make up an initial environment to use. */
floriand8e3eca2015-03-13 12:46:49 +00004931 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00004932 env->vreg_ctr = 0;
4933
4934 /* Set up output code array. */
4935 env->code = newHInstrArray();
4936
4937 /* Copy BB's type env. */
4938 env->type_env = bb->tyenv;
4939
4940 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4941 change as we go along. */
4942 env->n_vregmap = bb->tyenv->types_used;
floriand8e3eca2015-03-13 12:46:49 +00004943 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4944 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00004945
4946 /* and finally ... */
sewardjc6f970f2012-04-02 21:54:49 +00004947 env->chainingAllowed = chainingAllowed;
4948 env->hwcaps = hwcaps_host;
4949 env->max_ga = max_ga;
sewardjc33671d2005-02-01 20:30:00 +00004950
4951 /* For each IR temporary, allocate a suitably-kinded virtual
4952 register. */
4953 j = 0;
4954 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00004955 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00004956 switch (bb->tyenv->types[i]) {
4957 case Ity_I1:
sewardjc4530ae2012-05-21 10:18:49 +00004958 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
sewardja5b50222015-03-26 07:18:32 +00004959 hreg = mkHReg(True, HRcInt64, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004960 break;
4961 case Ity_I128:
sewardja5b50222015-03-26 07:18:32 +00004962 hreg = mkHReg(True, HRcInt64, 0, j++);
4963 hregHI = mkHReg(True, HRcInt64, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004964 break;
sewardjc33671d2005-02-01 20:30:00 +00004965 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00004966 case Ity_F64:
sewardjc4530ae2012-05-21 10:18:49 +00004967 case Ity_V128:
sewardja5b50222015-03-26 07:18:32 +00004968 hreg = mkHReg(True, HRcVec128, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004969 break;
4970 case Ity_V256:
sewardja5b50222015-03-26 07:18:32 +00004971 hreg = mkHReg(True, HRcVec128, 0, j++);
4972 hregHI = mkHReg(True, HRcVec128, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004973 break;
4974 default:
4975 ppIRType(bb->tyenv->types[i]);
4976 vpanic("iselBB(amd64): IRTemp type");
sewardjc33671d2005-02-01 20:30:00 +00004977 }
4978 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00004979 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00004980 }
4981 env->vreg_ctr = j;
4982
sewardjc6f970f2012-04-02 21:54:49 +00004983 /* The very first instruction must be an event check. */
4984 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
4985 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
4986 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
4987
4988 /* Possibly a block counter increment (for profiling). At this
4989 point we don't know the address of the counter, so just pretend
4990 it is zero. It will have to be patched later, but before this
4991 translation is used, by a call to LibVEX_patchProfCtr. */
4992 if (addProfInc) {
4993 addInstr(env, AMD64Instr_ProfInc());
4994 }
4995
sewardjc33671d2005-02-01 20:30:00 +00004996 /* Ok, finally we can iterate over the statements. */
4997 for (i = 0; i < bb->stmts_used; i++)
4998 if (bb->stmts[i])
sewardjc6f970f2012-04-02 21:54:49 +00004999 iselStmt(env, bb->stmts[i]);
sewardjc33671d2005-02-01 20:30:00 +00005000
sewardjc6f970f2012-04-02 21:54:49 +00005001 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardjc33671d2005-02-01 20:30:00 +00005002
5003 /* record the number of vregs we used. */
5004 env->code->n_vregs = env->vreg_ctr;
5005 return env->code;
5006}
sewardja3e98302005-02-01 15:55:05 +00005007
5008
5009/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00005010/*--- end host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00005011/*---------------------------------------------------------------*/