blob: ecd57e796ea4787e069678b0f63945be8725d4ec [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
Elliott Hughesed398002017-06-21 14:41:24 -070010 Copyright (C) 2004-2017 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
sewardj69d98e32010-06-18 08:17:41 +000045#include "host_generic_simd128.h"
sewardjcc3d2192013-03-27 11:37:33 +000046#include "host_generic_simd256.h"
47#include "host_generic_maddf.h"
sewardjcef7d3e2009-07-02 12:21:59 +000048#include "host_amd64_defs.h"
sewardj1a01e652005-02-23 11:39:21 +000049
50
51/*---------------------------------------------------------*/
52/*--- x87/SSE control word stuff ---*/
53/*---------------------------------------------------------*/
54
55/* Vex-generated code expects to run with the FPU set as follows: all
56 exceptions masked, round-to-nearest, precision = 53 bits. This
57 corresponds to a FPU control word value of 0x027F.
58
59 Similarly the SSE control word (%mxcsr) should be 0x1F80.
60
61 %fpucw and %mxcsr should have these values on entry to
62 Vex-generated code, and should those values should be
63 unchanged at exit.
64*/
65
66#define DEFAULT_FPUCW 0x027F
67
68#define DEFAULT_MXCSR 0x1F80
69
70/* debugging only, do not use */
71/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000072
73
74/*---------------------------------------------------------*/
75/*--- misc helpers ---*/
76/*---------------------------------------------------------*/
77
78/* These are duplicated in guest-amd64/toIR.c */
79static IRExpr* unop ( IROp op, IRExpr* a )
80{
81 return IRExpr_Unop(op, a);
82}
83
84static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
85{
86 return IRExpr_Binop(op, a1, a2);
87}
88
sewardj05b3b6a2005-02-04 01:44:33 +000089static IRExpr* bind ( Int binder )
90{
91 return IRExpr_Binder(binder);
92}
sewardjc33671d2005-02-01 20:30:00 +000093
sewardj009230b2013-01-26 11:47:55 +000094static Bool isZeroU8 ( IRExpr* e )
95{
96 return e->tag == Iex_Const
97 && e->Iex.Const.con->tag == Ico_U8
98 && e->Iex.Const.con->Ico.U8 == 0;
99}
100
sewardjc33671d2005-02-01 20:30:00 +0000101
sewardjc33671d2005-02-01 20:30:00 +0000102/*---------------------------------------------------------*/
103/*--- ISelEnv ---*/
104/*---------------------------------------------------------*/
105
106/* This carries around:
107
108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
109 might encounter. This is computed before insn selection starts,
110 and does not change.
111
112 - A mapping from IRTemp to HReg. This tells the insn selector
113 which virtual register is associated with each IRTemp
114 temporary. This is computed before insn selection starts, and
115 does not change. We expect this mapping to map precisely the
116 same set of IRTemps as the type mapping does.
117
sewardj9b967672005-02-08 11:13:09 +0000118 - vregmap holds the primary register for the IRTemp.
119 - vregmapHI is only used for 128-bit integer-typed
120 IRTemps. It holds the identity of a second
121 64-bit virtual HReg, which holds the high half
122 of the value.
123
sewardjc6f970f2012-04-02 21:54:49 +0000124 - The host subarchitecture we are selecting insns for.
125 This is set at the start and does not change.
126
sewardjc33671d2005-02-01 20:30:00 +0000127 - The code array, that is, the insns selected so far.
128
129 - A counter, for generating new virtual registers.
130
sewardjc6f970f2012-04-02 21:54:49 +0000131 - A Bool for indicating whether we may generate chain-me
132 instructions for control flow transfers, or whether we must use
133 XAssisted.
134
135 - The maximum guest address of any guest insn in this block.
136 Actually, the address of the highest-addressed byte from any insn
137 in this block. Is set at the start and does not change. This is
138 used for detecting jumps which are definitely forward-edges from
139 this block, and therefore can be made (chained) to the fast entry
140 point of the destination, thereby avoiding the destination's
141 event check.
sewardjc33671d2005-02-01 20:30:00 +0000142
143 Note, this is all host-independent. (JRS 20050201: well, kinda
144 ... not completely. Compare with ISelEnv for X86.)
145*/
146
147typedef
148 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000149 /* Constant -- are set at the start and do not change. */
sewardjc33671d2005-02-01 20:30:00 +0000150 IRTypeEnv* type_env;
151
152 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000153 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000154 Int n_vregmap;
155
sewardj5117ce12006-01-27 21:20:15 +0000156 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000157
158 Bool chainingAllowed;
159 Addr64 max_ga;
160
161 /* These are modified as we go along. */
162 HInstrArray* code;
163 Int vreg_ctr;
sewardjc33671d2005-02-01 20:30:00 +0000164 }
165 ISelEnv;
166
167
168static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
169{
170 vassert(tmp >= 0);
171 vassert(tmp < env->n_vregmap);
172 return env->vregmap[tmp];
173}
174
sewardjc4530ae2012-05-21 10:18:49 +0000175static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
176 ISelEnv* env, IRTemp tmp )
sewardj9b967672005-02-08 11:13:09 +0000177{
178 vassert(tmp >= 0);
179 vassert(tmp < env->n_vregmap);
florian79efdc62013-02-11 00:47:35 +0000180 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
sewardj9b967672005-02-08 11:13:09 +0000181 *vrLO = env->vregmap[tmp];
182 *vrHI = env->vregmapHI[tmp];
183}
sewardj614b3fb2005-02-02 02:16:03 +0000184
185static void addInstr ( ISelEnv* env, AMD64Instr* instr )
186{
187 addHInstr(env->code, instr);
188 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjf355f6b2006-08-16 00:23:21 +0000189 ppAMD64Instr(instr, True);
sewardj614b3fb2005-02-02 02:16:03 +0000190 vex_printf("\n");
191 }
192}
193
sewardj8258a8c2005-02-02 03:11:24 +0000194static HReg newVRegI ( ISelEnv* env )
195{
sewardja5b50222015-03-26 07:18:32 +0000196 HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0/*enc*/, env->vreg_ctr);
sewardj8258a8c2005-02-02 03:11:24 +0000197 env->vreg_ctr++;
198 return reg;
199}
200
sewardj0852a132005-02-21 08:28:46 +0000201static HReg newVRegV ( ISelEnv* env )
202{
sewardja5b50222015-03-26 07:18:32 +0000203 HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
sewardj0852a132005-02-21 08:28:46 +0000204 env->vreg_ctr++;
205 return reg;
206}
sewardj614b3fb2005-02-02 02:16:03 +0000207
208
209/*---------------------------------------------------------*/
210/*--- ISEL: Forward declarations ---*/
211/*---------------------------------------------------------*/
212
213/* These are organised as iselXXX and iselXXX_wrk pairs. The
214 iselXXX_wrk do the real work, but are not to be called directly.
215 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
216 checks that all returned registers are virtual. You should not
217 call the _wrk version directly.
218*/
Elliott Hughesed398002017-06-21 14:41:24 -0700219static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e );
220static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000221
Elliott Hughesed398002017-06-21 14:41:24 -0700222static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e );
223static AMD64RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000224
Elliott Hughesed398002017-06-21 14:41:24 -0700225static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e );
226static AMD64RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000227
Elliott Hughesed398002017-06-21 14:41:24 -0700228static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e );
229static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000230
Elliott Hughesed398002017-06-21 14:41:24 -0700231static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e );
232static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000233
sewardjc4530ae2012-05-21 10:18:49 +0000234static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -0700235 ISelEnv* env, const IRExpr* e );
sewardjc4530ae2012-05-21 10:18:49 +0000236static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -0700237 ISelEnv* env, const IRExpr* e );
sewardj9b967672005-02-08 11:13:09 +0000238
Elliott Hughesed398002017-06-21 14:41:24 -0700239static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e );
240static AMD64CondCode iselCondCode ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000241
Elliott Hughesed398002017-06-21 14:41:24 -0700242static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e );
243static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000244
Elliott Hughesed398002017-06-21 14:41:24 -0700245static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e );
246static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000247
Elliott Hughesed398002017-06-21 14:41:24 -0700248static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e );
249static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000250
sewardjc4530ae2012-05-21 10:18:49 +0000251static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -0700252 ISelEnv* env, const IRExpr* e );
sewardjc4530ae2012-05-21 10:18:49 +0000253static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -0700254 ISelEnv* env, const IRExpr* e );
sewardjc4530ae2012-05-21 10:18:49 +0000255
sewardj614b3fb2005-02-02 02:16:03 +0000256
257/*---------------------------------------------------------*/
258/*--- ISEL: Misc helpers ---*/
259/*---------------------------------------------------------*/
260
261static Bool sane_AMode ( AMD64AMode* am )
262{
263 switch (am->tag) {
264 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000265 return
266 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
267 && (hregIsVirtual(am->Aam.IR.reg)
florian79efdc62013-02-11 00:47:35 +0000268 || sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
sewardj614b3fb2005-02-02 02:16:03 +0000269 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000270 return
271 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
272 && hregIsVirtual(am->Aam.IRRS.base)
273 && hregClass(am->Aam.IRRS.index) == HRcInt64
274 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000275 default:
276 vpanic("sane_AMode: unknown amd64 amode tag");
277 }
278}
279
280
281/* Can the lower 32 bits be signedly widened to produce the whole
282 64-bit value? In other words, are the top 33 bits either all 0 or
283 all 1 ? */
284static Bool fitsIn32Bits ( ULong x )
285{
florian108e03f2015-03-10 16:11:58 +0000286 Long y1;
287 y1 = x << 32;
sewardj614b3fb2005-02-02 02:16:03 +0000288 y1 >>=/*s*/ 32;
289 return toBool(x == y1);
290}
291
sewardjeb17e492007-08-25 23:07:44 +0000292/* Is this a 64-bit zero expression? */
293
294static Bool isZeroU64 ( IRExpr* e )
295{
296 return e->tag == Iex_Const
297 && e->Iex.Const.con->tag == Ico_U64
298 && e->Iex.Const.con->Ico.U64 == 0ULL;
299}
300
301static Bool isZeroU32 ( IRExpr* e )
302{
303 return e->tag == Iex_Const
304 && e->Iex.Const.con->tag == Ico_U32
305 && e->Iex.Const.con->Ico.U32 == 0;
306}
sewardj8258a8c2005-02-02 03:11:24 +0000307
308/* Make a int reg-reg move. */
309
310static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
311{
312 vassert(hregClass(src) == HRcInt64);
313 vassert(hregClass(dst) == HRcInt64);
314 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
315}
316
sewardjc4530ae2012-05-21 10:18:49 +0000317/* Make a vector (128 bit) reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000318
sewardj0852a132005-02-21 08:28:46 +0000319static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
320{
321 vassert(hregClass(src) == HRcVec128);
322 vassert(hregClass(dst) == HRcVec128);
323 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
324}
325
326/* Advance/retreat %rsp by n. */
327
328static void add_to_rsp ( ISelEnv* env, Int n )
329{
330 vassert(n > 0 && n < 256 && (n%8) == 0);
331 addInstr(env,
332 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
333 hregAMD64_RSP()));
334}
335
sewardj18303862005-02-21 12:36:54 +0000336static void sub_from_rsp ( ISelEnv* env, Int n )
337{
338 vassert(n > 0 && n < 256 && (n%8) == 0);
339 addInstr(env,
340 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
341 hregAMD64_RSP()));
342}
343
ded403e792010-04-02 14:15:58 +0000344/* Push 64-bit constants on the stack. */
345static void push_uimm64( ISelEnv* env, ULong uimm64 )
346{
347 /* If uimm64 can be expressed as the sign extension of its
348 lower 32 bits, we can do it the easy way. */
349 Long simm64 = (Long)uimm64;
florian108e03f2015-03-10 16:11:58 +0000350 if ( simm64 == ((Long)(uimm64 << 32) >> 32) ) {
ded403e792010-04-02 14:15:58 +0000351 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
352 } else {
353 HReg tmp = newVRegI(env);
354 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
355 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
356 }
357}
sewardj18303862005-02-21 12:36:54 +0000358
sewardj05b3b6a2005-02-04 01:44:33 +0000359
sewardj4d77a9c2007-08-25 23:21:08 +0000360/* Used only in doHelperCall. If possible, produce a single
361 instruction which computes 'e' into 'dst'. If not possible, return
362 NULL. */
363
364static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
365 HReg dst,
366 IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +0000367{
sewardj74142b82013-08-08 10:28:59 +0000368 /* Per comments in doHelperCall below, appearance of
florian90419562013-08-15 20:54:52 +0000369 Iex_VECRET implies ill-formed IR. */
370 vassert(e->tag != Iex_VECRET);
sewardj74142b82013-08-08 10:28:59 +0000371
372 /* In this case we give out a copy of the BaseBlock pointer. */
Elliott Hughesed398002017-06-21 14:41:24 -0700373 if (UNLIKELY(e->tag == Iex_GSPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000374 return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
375 }
376
sewardj4d77a9c2007-08-25 23:21:08 +0000377 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
378
379 if (e->tag == Iex_Const) {
380 vassert(e->Iex.Const.con->tag == Ico_U64);
381 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
382 return AMD64Instr_Alu64R(
383 Aalu_MOV,
384 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
385 dst
386 );
387 } else {
388 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
389 }
sewardj05b3b6a2005-02-04 01:44:33 +0000390 }
sewardj4d77a9c2007-08-25 23:21:08 +0000391
392 if (e->tag == Iex_RdTmp) {
393 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
394 return mk_iMOVsd_RR(src, dst);
395 }
396
397 if (e->tag == Iex_Get) {
398 vassert(e->Iex.Get.ty == Ity_I64);
399 return AMD64Instr_Alu64R(
400 Aalu_MOV,
401 AMD64RMI_Mem(
402 AMD64AMode_IR(e->Iex.Get.offset,
403 hregAMD64_RBP())),
404 dst);
405 }
406
407 if (e->tag == Iex_Unop
408 && e->Iex.Unop.op == Iop_32Uto64
409 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
410 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
sewardjca257bc2010-09-08 08:34:52 +0000411 return AMD64Instr_MovxLQ(False, src, dst);
sewardj4d77a9c2007-08-25 23:21:08 +0000412 }
413
414 if (0) { ppIRExpr(e); vex_printf("\n"); }
415
416 return NULL;
sewardj05b3b6a2005-02-04 01:44:33 +0000417}
418
419
sewardj74142b82013-08-08 10:28:59 +0000420/* Do a complete function call. |guard| is a Ity_Bit expression
sewardj05b3b6a2005-02-04 01:44:33 +0000421 indicating whether or not the call happens. If guard==NULL, the
sewardj74142b82013-08-08 10:28:59 +0000422 call is unconditional. |retloc| is set to indicate where the
423 return value is after the call. The caller (of this fn) must
424 generate code to add |stackAdjustAfterCall| to the stack pointer
425 after the call is done. */
sewardj05b3b6a2005-02-04 01:44:33 +0000426
427static
sewardj74142b82013-08-08 10:28:59 +0000428void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
429 /*OUT*/RetLoc* retloc,
430 ISelEnv* env,
431 IRExpr* guard,
432 IRCallee* cee, IRType retTy, IRExpr** args )
sewardj05b3b6a2005-02-04 01:44:33 +0000433{
434 AMD64CondCode cc;
435 HReg argregs[6];
436 HReg tmpregs[6];
sewardj4d77a9c2007-08-25 23:21:08 +0000437 AMD64Instr* fastinstrs[6];
sewardj74142b82013-08-08 10:28:59 +0000438 UInt n_args, i;
439
440 /* Set default returns. We'll update them later if needed. */
441 *stackAdjustAfterCall = 0;
442 *retloc = mk_RetLoc_INVALID();
443
444 /* These are used for cross-checking that IR-level constraints on
Elliott Hughesed398002017-06-21 14:41:24 -0700445 the use of IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
sewardj74142b82013-08-08 10:28:59 +0000446 UInt nVECRETs = 0;
Elliott Hughesed398002017-06-21 14:41:24 -0700447 UInt nGSPTRs = 0;
sewardj05b3b6a2005-02-04 01:44:33 +0000448
449 /* Marshal args for a call and do the call.
450
sewardj05b3b6a2005-02-04 01:44:33 +0000451 This function only deals with a tiny set of possibilities, which
452 cover all helpers in practice. The restrictions are that only
453 arguments in registers are supported, hence only 6x64 integer
454 bits in total can be passed. In fact the only supported arg
455 type is I64.
456
sewardj74142b82013-08-08 10:28:59 +0000457 The return type can be I{64,32,16,8} or V{128,256}. In the
458 latter two cases, it is expected that |args| will contain the
florian90419562013-08-15 20:54:52 +0000459 special node IRExpr_VECRET(), in which case this routine
sewardj74142b82013-08-08 10:28:59 +0000460 generates code to allocate space on the stack for the vector
461 return value. Since we are not passing any scalars on the
462 stack, it is enough to preallocate the return space before
463 marshalling any arguments, in this case.
464
Elliott Hughesed398002017-06-21 14:41:24 -0700465 |args| may also contain IRExpr_GSPTR(), in which case the
sewardj74142b82013-08-08 10:28:59 +0000466 value in %rbp is passed as the corresponding argument.
467
sewardj05b3b6a2005-02-04 01:44:33 +0000468 Generating code which is both efficient and correct when
469 parameters are to be passed in registers is difficult, for the
470 reasons elaborated in detail in comments attached to
471 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
472 of the method described in those comments.
473
474 The problem is split into two cases: the fast scheme and the
475 slow scheme. In the fast scheme, arguments are computed
476 directly into the target (real) registers. This is only safe
477 when we can be sure that computation of each argument will not
478 trash any real registers set by computation of any other
479 argument.
480
481 In the slow scheme, all args are first computed into vregs, and
482 once they are all done, they are moved to the relevant real
483 regs. This always gives correct code, but it also gives a bunch
484 of vreg-to-rreg moves which are usually redundant but are hard
485 for the register allocator to get rid of.
486
487 To decide which scheme to use, all argument expressions are
488 first examined. If they are all so simple that it is clear they
489 will be evaluated without use of any fixed registers, use the
490 fast scheme, else use the slow scheme. Note also that only
491 unconditional calls may use the fast scheme, since having to
492 compute a condition expression could itself trash real
sewardj74142b82013-08-08 10:28:59 +0000493 registers. Note that for simplicity, in the case where
florian90419562013-08-15 20:54:52 +0000494 IRExpr_VECRET() is present, we use the slow scheme. This is
sewardj74142b82013-08-08 10:28:59 +0000495 motivated by the desire to avoid any possible complexity
496 w.r.t. nested calls.
sewardj05b3b6a2005-02-04 01:44:33 +0000497
498 Note this requires being able to examine an expression and
499 determine whether or not evaluation of it might use a fixed
500 register. That requires knowledge of how the rest of this insn
501 selector works. Currently just the following 3 are regarded as
502 safe -- hopefully they cover the majority of arguments in
503 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
504 */
505
506 /* Note that the cee->regparms field is meaningless on AMD64 host
507 (since there is only one calling convention) and so we always
508 ignore it. */
sewardj05b3b6a2005-02-04 01:44:33 +0000509 n_args = 0;
510 for (i = 0; args[i]; i++)
511 n_args++;
512
sewardj74142b82013-08-08 10:28:59 +0000513 if (n_args > 6)
sewardj05b3b6a2005-02-04 01:44:33 +0000514 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
515
516 argregs[0] = hregAMD64_RDI();
517 argregs[1] = hregAMD64_RSI();
518 argregs[2] = hregAMD64_RDX();
519 argregs[3] = hregAMD64_RCX();
520 argregs[4] = hregAMD64_R8();
521 argregs[5] = hregAMD64_R9();
522
523 tmpregs[0] = tmpregs[1] = tmpregs[2] =
524 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
525
sewardj4d77a9c2007-08-25 23:21:08 +0000526 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
527 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
528
sewardj05b3b6a2005-02-04 01:44:33 +0000529 /* First decide which scheme (slow or fast) is to be used. First
530 assume the fast scheme, and select slow if any contraindications
531 (wow) appear. */
532
sewardj74142b82013-08-08 10:28:59 +0000533 /* We'll need space on the stack for the return value. Avoid
534 possible complications with nested calls by using the slow
535 scheme. */
536 if (retTy == Ity_V128 || retTy == Ity_V256)
537 goto slowscheme;
538
sewardj05b3b6a2005-02-04 01:44:33 +0000539 if (guard) {
540 if (guard->tag == Iex_Const
541 && guard->Iex.Const.con->tag == Ico_U1
542 && guard->Iex.Const.con->Ico.U1 == True) {
543 /* unconditional */
544 } else {
545 /* Not manifestly unconditional -- be conservative. */
sewardj4d77a9c2007-08-25 23:21:08 +0000546 goto slowscheme;
sewardj05b3b6a2005-02-04 01:44:33 +0000547 }
548 }
549
sewardj4d77a9c2007-08-25 23:21:08 +0000550 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
551 use the slow scheme. Because this is tentative, we can't call
552 addInstr (that is, commit to) any instructions until we're
553 handled all the arguments. So park the resulting instructions
554 in a buffer and emit that if we're successful. */
555
556 /* FAST SCHEME */
sewardj74142b82013-08-08 10:28:59 +0000557 /* In this loop, we process args that can be computed into the
558 destination (real) register with a single instruction, without
Elliott Hughesed398002017-06-21 14:41:24 -0700559 using any fixed regs. That also includes IRExpr_GSPTR(), but
florian90419562013-08-15 20:54:52 +0000560 not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can
561 never see IRExpr_VECRET() at this point, since the return-type
sewardj74142b82013-08-08 10:28:59 +0000562 check above should ensure all those cases use the slow scheme
563 instead. */
564 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000565 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000566 IRExpr* arg = args[i];
Elliott Hughesed398002017-06-21 14:41:24 -0700567 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
sewardj74142b82013-08-08 10:28:59 +0000568 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
569 }
570 fastinstrs[i]
571 = iselIntExpr_single_instruction( env, argregs[i], args[i] );
572 if (fastinstrs[i] == NULL)
sewardj4d77a9c2007-08-25 23:21:08 +0000573 goto slowscheme;
sewardj4d77a9c2007-08-25 23:21:08 +0000574 }
575
576 /* Looks like we're in luck. Emit the accumulated instructions and
577 move on to doing the call itself. */
sewardj74142b82013-08-08 10:28:59 +0000578 for (i = 0; i < n_args; i++)
sewardj4d77a9c2007-08-25 23:21:08 +0000579 addInstr(env, fastinstrs[i]);
580
581 /* Fast scheme only applies for unconditional calls. Hence: */
582 cc = Acc_ALWAYS;
583
584 goto handle_call;
585
586
587 /* SLOW SCHEME; move via temporaries */
588 slowscheme:
sewardj74142b82013-08-08 10:28:59 +0000589 {}
sewardjc4530ae2012-05-21 10:18:49 +0000590# if 0 /* debug only */
591 if (n_args > 0) {for (i = 0; args[i]; i++) {
592 ppIRExpr(args[i]); vex_printf(" "); }
593 vex_printf("\n");}
594# endif
sewardj4d77a9c2007-08-25 23:21:08 +0000595
sewardj74142b82013-08-08 10:28:59 +0000596 /* If we have a vector return type, allocate a place for it on the
597 stack and record its address. */
598 HReg r_vecRetAddr = INVALID_HREG;
599 if (retTy == Ity_V128) {
600 r_vecRetAddr = newVRegI(env);
601 sub_from_rsp(env, 16);
602 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
603 }
604 else if (retTy == Ity_V256) {
sewardj74142b82013-08-08 10:28:59 +0000605 r_vecRetAddr = newVRegI(env);
606 sub_from_rsp(env, 32);
607 addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
sewardj4d77a9c2007-08-25 23:21:08 +0000608 }
609
sewardj74142b82013-08-08 10:28:59 +0000610 vassert(n_args >= 0 && n_args <= 6);
sewardj4d77a9c2007-08-25 23:21:08 +0000611 for (i = 0; i < n_args; i++) {
sewardj74142b82013-08-08 10:28:59 +0000612 IRExpr* arg = args[i];
Elliott Hughesed398002017-06-21 14:41:24 -0700613 if (UNLIKELY(arg->tag == Iex_GSPTR)) {
sewardj74142b82013-08-08 10:28:59 +0000614 tmpregs[i] = newVRegI(env);
615 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
Elliott Hughesed398002017-06-21 14:41:24 -0700616 nGSPTRs++;
sewardj74142b82013-08-08 10:28:59 +0000617 }
florian90419562013-08-15 20:54:52 +0000618 else if (UNLIKELY(arg->tag == Iex_VECRET)) {
sewardj74142b82013-08-08 10:28:59 +0000619 /* We stashed the address of the return slot earlier, so just
620 retrieve it now. */
621 vassert(!hregIsInvalid(r_vecRetAddr));
622 tmpregs[i] = r_vecRetAddr;
623 nVECRETs++;
624 }
625 else {
626 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
627 tmpregs[i] = iselIntExpr_R(env, args[i]);
628 }
sewardj4d77a9c2007-08-25 23:21:08 +0000629 }
630
631 /* Now we can compute the condition. We can't do it earlier
632 because the argument computations could trash the condition
633 codes. Be a bit clever to handle the common case where the
634 guard is 1:Bit. */
635 cc = Acc_ALWAYS;
636 if (guard) {
637 if (guard->tag == Iex_Const
638 && guard->Iex.Const.con->tag == Ico_U1
639 && guard->Iex.Const.con->Ico.U1 == True) {
640 /* unconditional -- do nothing */
641 } else {
642 cc = iselCondCode( env, guard );
sewardj05b3b6a2005-02-04 01:44:33 +0000643 }
644 }
645
sewardj4d77a9c2007-08-25 23:21:08 +0000646 /* Move the args to their final destinations. */
sewardj74142b82013-08-08 10:28:59 +0000647 for (i = 0; i < n_args; i++) {
sewardj4d77a9c2007-08-25 23:21:08 +0000648 /* None of these insns, including any spill code that might
649 be generated, may alter the condition codes. */
650 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
sewardj05b3b6a2005-02-04 01:44:33 +0000651 }
652
sewardj4d77a9c2007-08-25 23:21:08 +0000653
sewardj74142b82013-08-08 10:28:59 +0000654 /* Do final checks, set the return values, and generate the call
655 instruction proper. */
sewardj4d77a9c2007-08-25 23:21:08 +0000656 handle_call:
sewardj74142b82013-08-08 10:28:59 +0000657
658 if (retTy == Ity_V128 || retTy == Ity_V256) {
659 vassert(nVECRETs == 1);
660 } else {
661 vassert(nVECRETs == 0);
662 }
663
Elliott Hughesed398002017-06-21 14:41:24 -0700664 vassert(nGSPTRs == 0 || nGSPTRs == 1);
sewardj74142b82013-08-08 10:28:59 +0000665
666 vassert(*stackAdjustAfterCall == 0);
667 vassert(is_RetLoc_INVALID(*retloc));
668 switch (retTy) {
669 case Ity_INVALID:
670 /* Function doesn't return a value. */
671 *retloc = mk_RetLoc_simple(RLPri_None);
672 break;
673 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
674 *retloc = mk_RetLoc_simple(RLPri_Int);
675 break;
676 case Ity_V128:
677 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
678 *stackAdjustAfterCall = 16;
679 break;
680 case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +0000681 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
682 *stackAdjustAfterCall = 32;
683 break;
684 default:
685 /* IR can denote other possible return types, but we don't
686 handle those here. */
687 vassert(0);
688 }
689
690 /* Finally, generate the call itself. This needs the *retloc value
691 set in the switch above, which is why it's at the end. */
692 addInstr(env,
florian93a09742015-01-07 20:14:48 +0000693 AMD64Instr_Call(cc, (Addr)cee->addr, n_args, *retloc));
sewardj05b3b6a2005-02-04 01:44:33 +0000694}
695
696
sewardj8d965312005-02-25 02:48:47 +0000697/* Given a guest-state array descriptor, an index expression and a
698 bias, generate an AMD64AMode holding the relevant guest state
699 offset. */
700
701static
sewardjdd40fdf2006-12-24 02:20:24 +0000702AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
sewardj8d965312005-02-25 02:48:47 +0000703 IRExpr* off, Int bias )
704{
705 HReg tmp, roff;
706 Int elemSz = sizeofIRType(descr->elemTy);
707 Int nElems = descr->nElems;
708
709 /* Throw out any cases not generated by an amd64 front end. In
710 theory there might be a day where we need to handle them -- if
711 we ever run non-amd64-guest on amd64 host. */
712
713 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
714 vpanic("genGuestArrayOffset(amd64 host)");
715
716 /* Compute off into a reg, %off. Then return:
717
718 movq %off, %tmp
719 addq $bias, %tmp (if bias != 0)
720 andq %tmp, 7
721 ... base(%rbp, %tmp, shift) ...
722 */
723 tmp = newVRegI(env);
724 roff = iselIntExpr_R(env, off);
725 addInstr(env, mk_iMOVsd_RR(roff, tmp));
726 if (bias != 0) {
727 /* Make sure the bias is sane, in the sense that there are
728 no significant bits above bit 30 in it. */
729 vassert(-10000 < bias && bias < 10000);
730 addInstr(env,
731 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
732 }
733 addInstr(env,
734 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
735 vassert(elemSz == 1 || elemSz == 8);
736 return
737 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
738 elemSz==8 ? 3 : 0);
739}
740
sewardj1a01e652005-02-23 11:39:21 +0000741
742/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
743static
744void set_SSE_rounding_default ( ISelEnv* env )
745{
746 /* pushq $DEFAULT_MXCSR
747 ldmxcsr 0(%rsp)
748 addq $8, %rsp
749 */
750 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
751 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
752 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
753 add_to_rsp(env, 8);
754}
755
sewardj25a85812005-05-08 23:03:48 +0000756/* Mess with the FPU's rounding mode: set to the default rounding mode
757 (DEFAULT_FPUCW). */
758static
759void set_FPU_rounding_default ( ISelEnv* env )
760{
761 /* movq $DEFAULT_FPUCW, -8(%rsp)
762 fldcw -8(%esp)
763 */
764 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
765 addInstr(env, AMD64Instr_Alu64M(
766 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
767 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
768}
sewardj1a01e652005-02-23 11:39:21 +0000769
770
771/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
772 expression denoting a value in the range 0 .. 3, indicating a round
773 mode encoded as per type IRRoundingMode. Set the SSE machinery to
774 have the same rounding.
775*/
776static
777void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
778{
779 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
780 both rounding bits == 0. If that wasn't the case, we couldn't
781 create a new rounding field simply by ORing the new value into
782 place. */
783
784 /* movq $3, %reg
785 andq [[mode]], %reg -- shouldn't be needed; paranoia
786 shlq $13, %reg
787 orq $DEFAULT_MXCSR, %reg
788 pushq %reg
789 ldmxcsr 0(%esp)
790 addq $8, %rsp
791 */
792 HReg reg = newVRegI(env);
793 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
794 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
795 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
796 iselIntExpr_RMI(env, mode), reg));
sewardj501a3392005-05-11 15:37:50 +0000797 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
sewardj1a01e652005-02-23 11:39:21 +0000798 addInstr(env, AMD64Instr_Alu64R(
799 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
800 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
801 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
802 add_to_rsp(env, 8);
803}
804
805
sewardj25a85812005-05-08 23:03:48 +0000806/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
807 expression denoting a value in the range 0 .. 3, indicating a round
808 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
809 the same rounding.
810*/
811static
812void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
813{
814 HReg rrm = iselIntExpr_R(env, mode);
815 HReg rrm2 = newVRegI(env);
816 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
817
818 /* movq %rrm, %rrm2
819 andq $3, %rrm2 -- shouldn't be needed; paranoia
820 shlq $10, %rrm2
821 orq $DEFAULT_FPUCW, %rrm2
822 movq %rrm2, -8(%rsp)
823 fldcw -8(%esp)
824 */
825 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
826 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
sewardj501a3392005-05-11 15:37:50 +0000827 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
sewardj25a85812005-05-08 23:03:48 +0000828 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
829 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
830 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
831 AMD64RI_Reg(rrm2), m8_rsp));
832 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
833}
sewardj8d965312005-02-25 02:48:47 +0000834
835
sewardjac530442005-05-11 16:13:37 +0000836/* Generate all-zeroes into a new vector register.
837*/
838static HReg generate_zeroes_V128 ( ISelEnv* env )
839{
840 HReg dst = newVRegV(env);
841 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
842 return dst;
843}
844
845/* Generate all-ones into a new vector register.
846*/
847static HReg generate_ones_V128 ( ISelEnv* env )
848{
849 HReg dst = newVRegV(env);
850 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
851 return dst;
852}
853
854
sewardj09717342005-05-05 21:34:02 +0000855/* Generate !src into a new vector register. Amazing that there isn't
856 a less crappy way to do this.
sewardj8d965312005-02-25 02:48:47 +0000857*/
858static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
859{
sewardjac530442005-05-11 16:13:37 +0000860 HReg dst = generate_ones_V128(env);
sewardj8d965312005-02-25 02:48:47 +0000861 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
862 return dst;
863}
864
865
sewardjacfbd7d2010-08-17 22:52:08 +0000866/* Expand the given byte into a 64-bit word, by cloning each bit
867 8 times. */
868static ULong bitmask8_to_bytemask64 ( UShort w8 )
869{
870 vassert(w8 == (w8 & 0xFF));
871 ULong w64 = 0;
872 Int i;
873 for (i = 0; i < 8; i++) {
874 if (w8 & (1<<i))
875 w64 |= (0xFFULL << (8 * i));
876 }
877 return w64;
878}
879
880
sewardj8258a8c2005-02-02 03:11:24 +0000881/*---------------------------------------------------------*/
882/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
883/*---------------------------------------------------------*/
884
885/* Select insns for an integer-typed expression, and add them to the
886 code list. Return a reg holding the result. This reg will be a
887 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
888 want to modify it, ask for a new vreg, copy it in there, and modify
889 the copy. The register allocator will do its best to map both
890 vregs to the same real register, so the copies will often disappear
891 later in the game.
892
893 This should handle expressions of 64, 32, 16 and 8-bit type. All
894 results are returned in a 64-bit register. For 32-, 16- and 8-bit
sewardje13074c2012-11-08 10:57:08 +0000895 expressions, the upper 32/48/56 bits are arbitrary, so you should
sewardj8258a8c2005-02-02 03:11:24 +0000896 mask or sign extend partial values if necessary.
897*/
898
Elliott Hughesed398002017-06-21 14:41:24 -0700899static HReg iselIntExpr_R ( ISelEnv* env, const IRExpr* e )
sewardj8258a8c2005-02-02 03:11:24 +0000900{
901 HReg r = iselIntExpr_R_wrk(env, e);
902 /* sanity checks ... */
903# if 0
904 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
905# endif
906 vassert(hregClass(r) == HRcInt64);
907 vassert(hregIsVirtual(r));
908 return r;
909}
910
911/* DO NOT CALL THIS DIRECTLY ! */
Elliott Hughesed398002017-06-21 14:41:24 -0700912static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
sewardj8258a8c2005-02-02 03:11:24 +0000913{
sewardje7905662005-05-09 18:15:21 +0000914 /* Used for unary/binary SIMD64 ops. */
915 HWord fn = 0;
sewardj8711f662005-05-09 17:52:56 +0000916 Bool second_is_UInt;
sewardje7905662005-05-09 18:15:21 +0000917
sewardj05b3b6a2005-02-04 01:44:33 +0000918 MatchInfo mi;
sewardj176ad2f2005-04-27 11:55:08 +0000919 DECLARE_PATTERN(p_1Uto8_64to1);
sewardjca257bc2010-09-08 08:34:52 +0000920 DECLARE_PATTERN(p_LDle8_then_8Uto64);
921 DECLARE_PATTERN(p_LDle16_then_16Uto64);
sewardj8258a8c2005-02-02 03:11:24 +0000922
923 IRType ty = typeOfIRExpr(env->type_env,e);
sewardj13f12a52011-05-03 07:51:49 +0000924 switch (ty) {
925 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
926 default: vassert(0);
927 }
sewardj8258a8c2005-02-02 03:11:24 +0000928
929 switch (e->tag) {
930
931 /* --------- TEMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +0000932 case Iex_RdTmp: {
933 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj8258a8c2005-02-02 03:11:24 +0000934 }
935
936 /* --------- LOAD --------- */
sewardjaf1ceca2005-06-30 23:31:27 +0000937 case Iex_Load: {
sewardj8258a8c2005-02-02 03:11:24 +0000938 HReg dst = newVRegI(env);
sewardjaf1ceca2005-06-30 23:31:27 +0000939 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
940
sewardje9d8a262009-07-01 08:06:34 +0000941 /* We can't handle big-endian loads, nor load-linked. */
sewardjaf1ceca2005-06-30 23:31:27 +0000942 if (e->Iex.Load.end != Iend_LE)
943 goto irreducible;
944
sewardjf67eadf2005-02-03 03:53:52 +0000945 if (ty == Ity_I64) {
946 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
947 AMD64RMI_Mem(amode), dst) );
948 return dst;
949 }
sewardj8258a8c2005-02-02 03:11:24 +0000950 if (ty == Ity_I32) {
951 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
952 return dst;
953 }
sewardj05b3b6a2005-02-04 01:44:33 +0000954 if (ty == Ity_I16) {
955 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
956 return dst;
957 }
sewardj7f039c42005-02-04 21:13:55 +0000958 if (ty == Ity_I8) {
959 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
960 return dst;
961 }
sewardj8258a8c2005-02-02 03:11:24 +0000962 break;
963 }
964
965 /* --------- BINARY OP --------- */
966 case Iex_Binop: {
967 AMD64AluOp aluOp;
968 AMD64ShiftOp shOp;
sewardj8711f662005-05-09 17:52:56 +0000969
sewardjeb17e492007-08-25 23:07:44 +0000970 /* Pattern: Sub64(0,x) */
971 /* and: Sub32(0,x) */
972 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
973 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
974 HReg dst = newVRegI(env);
975 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
976 addInstr(env, mk_iMOVsd_RR(reg,dst));
977 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
978 return dst;
979 }
980
sewardj8258a8c2005-02-02 03:11:24 +0000981 /* Is it an addition or logical style op? */
982 switch (e->Iex.Binop.op) {
983 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
984 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000985 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
986 aluOp = Aalu_SUB; break;
987 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
988 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000989 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000990 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000991 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
992 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000993 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000994 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000995 default:
996 aluOp = Aalu_INVALID; break;
997 }
998 /* For commutative ops we assume any literal
999 values are on the second operand. */
1000 if (aluOp != Aalu_INVALID) {
1001 HReg dst = newVRegI(env);
1002 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
1003 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1004 addInstr(env, mk_iMOVsd_RR(reg,dst));
1005 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
1006 return dst;
1007 }
1008
1009 /* Perhaps a shift op? */
1010 switch (e->Iex.Binop.op) {
1011 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
1012 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +00001013 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
1014 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +00001015 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
1016 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +00001017 default:
1018 shOp = Ash_INVALID; break;
1019 }
1020 if (shOp != Ash_INVALID) {
1021 HReg dst = newVRegI(env);
1022
1023 /* regL = the value to be shifted */
1024 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1025 addInstr(env, mk_iMOVsd_RR(regL,dst));
1026
1027 /* Do any necessary widening for 32/16/8 bit operands */
1028 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +00001029 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
1030 break;
sewardj85520e42005-02-19 15:22:38 +00001031 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +00001032 break;
sewardj85520e42005-02-19 15:22:38 +00001033 case Iop_Shr8:
1034 addInstr(env, AMD64Instr_Alu64R(
1035 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
1036 break;
1037 case Iop_Shr16:
1038 addInstr(env, AMD64Instr_Alu64R(
1039 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
1040 break;
sewardjb095fba2005-02-13 14:13:04 +00001041 case Iop_Shr32:
sewardjca257bc2010-09-08 08:34:52 +00001042 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
sewardjb095fba2005-02-13 14:13:04 +00001043 break;
sewardje83d9b22005-08-13 23:58:34 +00001044 case Iop_Sar8:
1045 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1046 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
1047 break;
1048 case Iop_Sar16:
1049 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
1050 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
1051 break;
sewardj05b3b6a2005-02-04 01:44:33 +00001052 case Iop_Sar32:
sewardjca257bc2010-09-08 08:34:52 +00001053 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
sewardj05b3b6a2005-02-04 01:44:33 +00001054 break;
1055 default:
sewardj909c06d2005-02-19 22:47:41 +00001056 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +00001057 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +00001058 }
1059
1060 /* Now consider the shift amount. If it's a literal, we
1061 can do a much better job than the general case. */
1062 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1063 /* assert that the IR is well-typed */
1064 Int nshift;
1065 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1066 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1067 vassert(nshift >= 0);
1068 if (nshift > 0)
1069 /* Can't allow nshift==0 since that means %cl */
sewardj501a3392005-05-11 15:37:50 +00001070 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001071 } else {
1072 /* General case; we have to force the amount into %cl. */
1073 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1074 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
sewardj501a3392005-05-11 15:37:50 +00001075 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
sewardj8258a8c2005-02-02 03:11:24 +00001076 }
1077 return dst;
1078 }
1079
sewardj8711f662005-05-09 17:52:56 +00001080 /* Deal with 64-bit SIMD binary ops */
1081 second_is_UInt = False;
1082 switch (e->Iex.Binop.op) {
1083 case Iop_Add8x8:
1084 fn = (HWord)h_generic_calc_Add8x8; break;
1085 case Iop_Add16x4:
1086 fn = (HWord)h_generic_calc_Add16x4; break;
1087 case Iop_Add32x2:
1088 fn = (HWord)h_generic_calc_Add32x2; break;
sewardja7ba8c42005-05-10 20:08:34 +00001089
1090 case Iop_Avg8Ux8:
1091 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1092 case Iop_Avg16Ux4:
1093 fn = (HWord)h_generic_calc_Avg16Ux4; break;
sewardj8711f662005-05-09 17:52:56 +00001094
1095 case Iop_CmpEQ8x8:
1096 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1097 case Iop_CmpEQ16x4:
1098 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1099 case Iop_CmpEQ32x2:
1100 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1101
1102 case Iop_CmpGT8Sx8:
1103 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1104 case Iop_CmpGT16Sx4:
1105 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1106 case Iop_CmpGT32Sx2:
1107 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1108
1109 case Iop_InterleaveHI8x8:
1110 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1111 case Iop_InterleaveLO8x8:
1112 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1113 case Iop_InterleaveHI16x4:
1114 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1115 case Iop_InterleaveLO16x4:
1116 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1117 case Iop_InterleaveHI32x2:
1118 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1119 case Iop_InterleaveLO32x2:
1120 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
sewardjd166e282008-02-06 11:42:45 +00001121 case Iop_CatOddLanes16x4:
1122 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1123 case Iop_CatEvenLanes16x4:
1124 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1125 case Iop_Perm8x8:
1126 fn = (HWord)h_generic_calc_Perm8x8; break;
sewardj8711f662005-05-09 17:52:56 +00001127
sewardja7ba8c42005-05-10 20:08:34 +00001128 case Iop_Max8Ux8:
1129 fn = (HWord)h_generic_calc_Max8Ux8; break;
1130 case Iop_Max16Sx4:
1131 fn = (HWord)h_generic_calc_Max16Sx4; break;
1132 case Iop_Min8Ux8:
1133 fn = (HWord)h_generic_calc_Min8Ux8; break;
1134 case Iop_Min16Sx4:
1135 fn = (HWord)h_generic_calc_Min16Sx4; break;
sewardj8711f662005-05-09 17:52:56 +00001136
1137 case Iop_Mul16x4:
1138 fn = (HWord)h_generic_calc_Mul16x4; break;
sewardjd166e282008-02-06 11:42:45 +00001139 case Iop_Mul32x2:
1140 fn = (HWord)h_generic_calc_Mul32x2; break;
sewardj8711f662005-05-09 17:52:56 +00001141 case Iop_MulHi16Sx4:
1142 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
sewardja7ba8c42005-05-10 20:08:34 +00001143 case Iop_MulHi16Ux4:
1144 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1145
sewardj8711f662005-05-09 17:52:56 +00001146 case Iop_QAdd8Sx8:
1147 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1148 case Iop_QAdd16Sx4:
1149 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1150 case Iop_QAdd8Ux8:
1151 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1152 case Iop_QAdd16Ux4:
1153 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1154
sewardj5f438dd2011-06-16 11:36:23 +00001155 case Iop_QNarrowBin32Sto16Sx4:
1156 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
1157 case Iop_QNarrowBin16Sto8Sx8:
1158 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
1159 case Iop_QNarrowBin16Sto8Ux8:
1160 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
sewardjad2c9ea2011-10-22 09:32:16 +00001161 case Iop_NarrowBin16to8x8:
1162 fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
1163 case Iop_NarrowBin32to16x4:
1164 fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
sewardj8711f662005-05-09 17:52:56 +00001165
1166 case Iop_QSub8Sx8:
1167 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1168 case Iop_QSub16Sx4:
1169 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1170 case Iop_QSub8Ux8:
1171 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1172 case Iop_QSub16Ux4:
1173 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1174
1175 case Iop_Sub8x8:
1176 fn = (HWord)h_generic_calc_Sub8x8; break;
1177 case Iop_Sub16x4:
1178 fn = (HWord)h_generic_calc_Sub16x4; break;
1179 case Iop_Sub32x2:
1180 fn = (HWord)h_generic_calc_Sub32x2; break;
1181
1182 case Iop_ShlN32x2:
1183 fn = (HWord)h_generic_calc_ShlN32x2;
1184 second_is_UInt = True;
1185 break;
1186 case Iop_ShlN16x4:
1187 fn = (HWord)h_generic_calc_ShlN16x4;
1188 second_is_UInt = True;
1189 break;
sewardjd166e282008-02-06 11:42:45 +00001190 case Iop_ShlN8x8:
1191 fn = (HWord)h_generic_calc_ShlN8x8;
1192 second_is_UInt = True;
1193 break;
sewardj8711f662005-05-09 17:52:56 +00001194 case Iop_ShrN32x2:
1195 fn = (HWord)h_generic_calc_ShrN32x2;
1196 second_is_UInt = True;
1197 break;
1198 case Iop_ShrN16x4:
1199 fn = (HWord)h_generic_calc_ShrN16x4;
1200 second_is_UInt = True;
1201 break;
1202 case Iop_SarN32x2:
1203 fn = (HWord)h_generic_calc_SarN32x2;
1204 second_is_UInt = True;
1205 break;
1206 case Iop_SarN16x4:
1207 fn = (HWord)h_generic_calc_SarN16x4;
1208 second_is_UInt = True;
1209 break;
sewardj02f79f12007-09-01 18:59:53 +00001210 case Iop_SarN8x8:
1211 fn = (HWord)h_generic_calc_SarN8x8;
1212 second_is_UInt = True;
1213 break;
sewardj8711f662005-05-09 17:52:56 +00001214
1215 default:
1216 fn = (HWord)0; break;
1217 }
1218 if (fn != (HWord)0) {
1219 /* Note: the following assumes all helpers are of signature
1220 ULong fn ( ULong, ULong ), and they are
1221 not marked as regparm functions.
1222 */
1223 HReg dst = newVRegI(env);
1224 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1225 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1226 if (second_is_UInt)
sewardjca257bc2010-09-08 08:34:52 +00001227 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
sewardj8711f662005-05-09 17:52:56 +00001228 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1229 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
sewardj74142b82013-08-08 10:28:59 +00001230 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
1231 mk_RetLoc_simple(RLPri_Int) ));
sewardj8711f662005-05-09 17:52:56 +00001232 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1233 return dst;
1234 }
1235
sewardj7de0d3c2005-02-13 02:26:41 +00001236 /* Handle misc other ops. */
1237
sewardj478646f2008-05-01 20:13:04 +00001238 if (e->Iex.Binop.op == Iop_Max32U) {
sewardj9cc2bbf2011-06-05 17:56:03 +00001239 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1240 HReg dst = newVRegI(env);
1241 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1242 addInstr(env, mk_iMOVsd_RR(src1, dst));
1243 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
sewardje357c672015-01-27 23:35:58 +00001244 addInstr(env, AMD64Instr_CMov64(Acc_B, src2, dst));
sewardj478646f2008-05-01 20:13:04 +00001245 return dst;
1246 }
1247
sewardj7de0d3c2005-02-13 02:26:41 +00001248 if (e->Iex.Binop.op == Iop_DivModS64to32
1249 || e->Iex.Binop.op == Iop_DivModU64to32) {
1250 /* 64 x 32 -> (32(rem),32(div)) division */
1251 /* Get the 64-bit operand into edx:eax, and the other into
1252 any old R/M. */
1253 HReg rax = hregAMD64_RAX();
1254 HReg rdx = hregAMD64_RDX();
1255 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00001256 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +00001257 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +00001258 /* Compute the left operand into a reg, and then
1259 put the top half in edx and the bottom in eax. */
1260 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +00001261 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1262 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj501a3392005-05-11 15:37:50 +00001263 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001264 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardjca257bc2010-09-08 08:34:52 +00001265 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
1266 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
sewardj501a3392005-05-11 15:37:50 +00001267 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001268 addInstr(env, mk_iMOVsd_RR(rax, dst));
1269 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1270 return dst;
1271 }
1272
1273 if (e->Iex.Binop.op == Iop_32HLto64) {
1274 HReg hi32 = newVRegI(env);
1275 HReg lo32 = newVRegI(env);
1276 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1277 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1278 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1279 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
sewardj501a3392005-05-11 15:37:50 +00001280 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
sewardjca257bc2010-09-08 08:34:52 +00001281 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
sewardj7de0d3c2005-02-13 02:26:41 +00001282 addInstr(env, AMD64Instr_Alu64R(
1283 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1284 return hi32;
1285 }
1286
sewardj85520e42005-02-19 15:22:38 +00001287 if (e->Iex.Binop.op == Iop_16HLto32) {
1288 HReg hi16 = newVRegI(env);
1289 HReg lo16 = newVRegI(env);
1290 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1291 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1292 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1293 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
sewardj501a3392005-05-11 15:37:50 +00001294 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
sewardj85520e42005-02-19 15:22:38 +00001295 addInstr(env, AMD64Instr_Alu64R(
1296 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1297 addInstr(env, AMD64Instr_Alu64R(
1298 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1299 return hi16;
1300 }
sewardj7de0d3c2005-02-13 02:26:41 +00001301
sewardja64f8ad2005-04-24 00:26:37 +00001302 if (e->Iex.Binop.op == Iop_8HLto16) {
1303 HReg hi8 = newVRegI(env);
1304 HReg lo8 = newVRegI(env);
1305 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1306 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1307 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1308 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
sewardj501a3392005-05-11 15:37:50 +00001309 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
sewardja64f8ad2005-04-24 00:26:37 +00001310 addInstr(env, AMD64Instr_Alu64R(
1311 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1312 addInstr(env, AMD64Instr_Alu64R(
1313 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1314 return hi8;
1315 }
sewardj85520e42005-02-19 15:22:38 +00001316
1317 if (e->Iex.Binop.op == Iop_MullS32
1318 || e->Iex.Binop.op == Iop_MullS16
1319 || e->Iex.Binop.op == Iop_MullS8
1320 || e->Iex.Binop.op == Iop_MullU32
1321 || e->Iex.Binop.op == Iop_MullU16
1322 || e->Iex.Binop.op == Iop_MullU8) {
1323 HReg a32 = newVRegI(env);
1324 HReg b32 = newVRegI(env);
1325 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1326 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1327 Int shift = 0;
1328 AMD64ShiftOp shr_op = Ash_SHR;
1329 switch (e->Iex.Binop.op) {
1330 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1331 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1332 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1333 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1334 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1335 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1336 default: vassert(0);
1337 }
1338
1339 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1340 addInstr(env, mk_iMOVsd_RR(b32s, b32));
sewardj501a3392005-05-11 15:37:50 +00001341 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1342 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1343 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1344 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
sewardj85520e42005-02-19 15:22:38 +00001345 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1346 return b32;
1347 }
1348
sewardj18303862005-02-21 12:36:54 +00001349 if (e->Iex.Binop.op == Iop_CmpF64) {
1350 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1351 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1352 HReg dst = newVRegI(env);
1353 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1354 /* Mask out irrelevant parts of the result so as to conform
1355 to the CmpF64 definition. */
1356 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1357 return dst;
1358 }
1359
sewardj6c299f32009-12-31 18:00:12 +00001360 if (e->Iex.Binop.op == Iop_F64toI32S
1361 || e->Iex.Binop.op == Iop_F64toI64S) {
1362 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001363 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1364 HReg dst = newVRegI(env);
1365 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001366 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001367 set_SSE_rounding_default(env);
1368 return dst;
1369 }
1370
sewardj8258a8c2005-02-02 03:11:24 +00001371 break;
1372 }
1373
sewardjf67eadf2005-02-03 03:53:52 +00001374 /* --------- UNARY OP --------- */
1375 case Iex_Unop: {
sewardj7f039c42005-02-04 21:13:55 +00001376
sewardj176ad2f2005-04-27 11:55:08 +00001377 /* 1Uto8(64to1(expr64)) */
sewardjca257bc2010-09-08 08:34:52 +00001378 {
1379 DEFINE_PATTERN( p_1Uto8_64to1,
1380 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1381 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
Elliott Hughesed398002017-06-21 14:41:24 -07001382 const IRExpr* expr64 = mi.bindee[0];
sewardjca257bc2010-09-08 08:34:52 +00001383 HReg dst = newVRegI(env);
1384 HReg src = iselIntExpr_R(env, expr64);
1385 addInstr(env, mk_iMOVsd_RR(src,dst) );
1386 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1387 AMD64RMI_Imm(1), dst));
sewardjf67eadf2005-02-03 03:53:52 +00001388 return dst;
1389 }
sewardjca257bc2010-09-08 08:34:52 +00001390 }
1391
1392 /* 8Uto64(LDle(expr64)) */
1393 {
1394 DEFINE_PATTERN(p_LDle8_then_8Uto64,
1395 unop(Iop_8Uto64,
1396 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1397 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
1398 HReg dst = newVRegI(env);
1399 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1400 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
1401 return dst;
1402 }
1403 }
1404
1405 /* 16Uto64(LDle(expr64)) */
1406 {
1407 DEFINE_PATTERN(p_LDle16_then_16Uto64,
1408 unop(Iop_16Uto64,
1409 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1410 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
1411 HReg dst = newVRegI(env);
1412 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1413 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
1414 return dst;
1415 }
1416 }
1417
sewardj9cc2bbf2011-06-05 17:56:03 +00001418 /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
1419 Use 32 bit arithmetic and let the default zero-extend rule
1420 do the 32Uto64 for free. */
1421 if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
1422 IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
1423 IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
1424 IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
1425 AMD64AluOp aluOp = Aalu_INVALID;
1426 switch (opi) {
1427 case Iop_Add32: aluOp = Aalu_ADD; break;
1428 case Iop_Sub32: aluOp = Aalu_SUB; break;
1429 case Iop_And32: aluOp = Aalu_AND; break;
1430 case Iop_Or32: aluOp = Aalu_OR; break;
1431 case Iop_Xor32: aluOp = Aalu_XOR; break;
1432 default: break;
1433 }
1434 if (aluOp != Aalu_INVALID) {
1435 /* For commutative ops we assume any literal values are on
1436 the second operand. */
1437 HReg dst = newVRegI(env);
1438 HReg reg = iselIntExpr_R(env, argL);
1439 AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
1440 addInstr(env, mk_iMOVsd_RR(reg,dst));
1441 addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
1442 return dst;
1443 }
1444 /* just fall through to normal handling for Iop_32Uto64 */
1445 }
1446
1447 /* Fallback cases */
sewardjca257bc2010-09-08 08:34:52 +00001448 switch (e->Iex.Unop.op) {
1449 case Iop_32Uto64:
sewardj05b3b6a2005-02-04 01:44:33 +00001450 case Iop_32Sto64: {
1451 HReg dst = newVRegI(env);
1452 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjca257bc2010-09-08 08:34:52 +00001453 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
1454 src, dst) );
sewardj05b3b6a2005-02-04 01:44:33 +00001455 return dst;
1456 }
sewardj9b967672005-02-08 11:13:09 +00001457 case Iop_128HIto64: {
1458 HReg rHi, rLo;
1459 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1460 return rHi; /* and abandon rLo */
1461 }
1462 case Iop_128to64: {
1463 HReg rHi, rLo;
1464 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1465 return rLo; /* and abandon rHi */
1466 }
sewardj85520e42005-02-19 15:22:38 +00001467 case Iop_8Uto16:
sewardjec93f982005-06-21 13:51:18 +00001468 case Iop_8Uto32:
sewardj176ad2f2005-04-27 11:55:08 +00001469 case Iop_8Uto64:
1470 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001471 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001472 HReg dst = newVRegI(env);
1473 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001474 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1475 || e->Iex.Unop.op==Iop_16Uto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001476 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001477 addInstr(env, mk_iMOVsd_RR(src,dst) );
1478 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1479 AMD64RMI_Imm(mask), dst));
1480 return dst;
1481 }
sewardj85520e42005-02-19 15:22:38 +00001482 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001483 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001484 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001485 case Iop_16Sto32:
1486 case Iop_16Sto64: {
1487 HReg dst = newVRegI(env);
1488 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001489 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1490 || e->Iex.Unop.op==Iop_16Sto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001491 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001492 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001493 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1494 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj486074e2005-02-08 20:10:04 +00001495 return dst;
1496 }
sewardj85520e42005-02-19 15:22:38 +00001497 case Iop_Not8:
1498 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001499 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001500 case Iop_Not64: {
1501 HReg dst = newVRegI(env);
1502 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1503 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001504 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
sewardjd0a12df2005-02-10 02:07:43 +00001505 return dst;
1506 }
de5a70f5c2010-04-01 23:08:59 +00001507 case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001508 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001509 case Iop_64HIto32: {
1510 HReg dst = newVRegI(env);
1511 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1512 Int shift = 0;
1513 switch (e->Iex.Unop.op) {
sewardj9ba870d2010-04-02 11:29:23 +00001514 case Iop_16HIto8: shift = 8; break;
sewardj85520e42005-02-19 15:22:38 +00001515 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001516 case Iop_64HIto32: shift = 32; break;
1517 default: vassert(0);
1518 }
1519 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001520 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
sewardj7de0d3c2005-02-13 02:26:41 +00001521 return dst;
1522 }
sewardj176ad2f2005-04-27 11:55:08 +00001523 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001524 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001525 case Iop_1Uto8: {
1526 HReg dst = newVRegI(env);
1527 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1528 addInstr(env, AMD64Instr_Set64(cond,dst));
1529 return dst;
1530 }
sewardja64f8ad2005-04-24 00:26:37 +00001531 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001532 case Iop_1Sto16:
1533 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001534 case Iop_1Sto64: {
1535 /* could do better than this, but for now ... */
1536 HReg dst = newVRegI(env);
1537 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1538 addInstr(env, AMD64Instr_Set64(cond,dst));
sewardj501a3392005-05-11 15:37:50 +00001539 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1540 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
sewardj42322b52005-04-20 22:57:11 +00001541 return dst;
1542 }
sewardjf53b7352005-04-06 20:01:56 +00001543 case Iop_Ctz64: {
1544 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1545 HReg dst = newVRegI(env);
1546 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1547 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1548 return dst;
1549 }
sewardj537cab02005-04-07 02:03:52 +00001550 case Iop_Clz64: {
1551 /* Count leading zeroes. Do 'bsrq' to establish the index
1552 of the highest set bit, and subtract that value from
1553 63. */
1554 HReg tmp = newVRegI(env);
1555 HReg dst = newVRegI(env);
1556 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1557 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1558 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1559 AMD64RMI_Imm(63), dst));
1560 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1561 AMD64RMI_Reg(tmp), dst));
1562 return dst;
1563 }
sewardjeb17e492007-08-25 23:07:44 +00001564
1565 case Iop_CmpwNEZ64: {
sewardj176ad2f2005-04-27 11:55:08 +00001566 HReg dst = newVRegI(env);
sewardjeb17e492007-08-25 23:07:44 +00001567 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1568 addInstr(env, mk_iMOVsd_RR(src,dst));
sewardj501a3392005-05-11 15:37:50 +00001569 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
sewardjeb17e492007-08-25 23:07:44 +00001570 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1571 AMD64RMI_Reg(src), dst));
1572 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1573 return dst;
1574 }
1575
1576 case Iop_CmpwNEZ32: {
1577 HReg src = newVRegI(env);
1578 HReg dst = newVRegI(env);
1579 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1580 addInstr(env, mk_iMOVsd_RR(pre,src));
sewardjca257bc2010-09-08 08:34:52 +00001581 addInstr(env, AMD64Instr_MovxLQ(False, src, src));
sewardjeb17e492007-08-25 23:07:44 +00001582 addInstr(env, mk_iMOVsd_RR(src,dst));
1583 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1584 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1585 AMD64RMI_Reg(src), dst));
1586 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1587 return dst;
1588 }
1589
1590 case Iop_Left8:
1591 case Iop_Left16:
1592 case Iop_Left32:
1593 case Iop_Left64: {
1594 HReg dst = newVRegI(env);
1595 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1596 addInstr(env, mk_iMOVsd_RR(src, dst));
1597 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1598 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
sewardj176ad2f2005-04-27 11:55:08 +00001599 return dst;
1600 }
sewardj537cab02005-04-07 02:03:52 +00001601
sewardj478fe702005-04-23 01:15:47 +00001602 case Iop_V128to32: {
1603 HReg dst = newVRegI(env);
1604 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1605 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1606 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1607 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1608 return dst;
1609 }
sewardj1a01e652005-02-23 11:39:21 +00001610
1611 /* V128{HI}to64 */
1612 case Iop_V128HIto64:
1613 case Iop_V128to64: {
sewardj1a01e652005-02-23 11:39:21 +00001614 HReg dst = newVRegI(env);
sewardjc4530ae2012-05-21 10:18:49 +00001615 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16;
1616 HReg rsp = hregAMD64_RSP();
sewardj1a01e652005-02-23 11:39:21 +00001617 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
sewardjc4530ae2012-05-21 10:18:49 +00001618 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1619 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1620 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1621 16, vec, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00001622 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
sewardjc4530ae2012-05-21 10:18:49 +00001623 AMD64RMI_Mem(off_rsp), dst ));
1624 return dst;
1625 }
1626
1627 case Iop_V256to64_0: case Iop_V256to64_1:
1628 case Iop_V256to64_2: case Iop_V256to64_3: {
1629 HReg vHi, vLo, vec;
1630 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
1631 /* Do the first part of the selection by deciding which of
1632 the 128 bit registers do look at, and second part using
1633 the same scheme as for V128{HI}to64 above. */
1634 Int off = 0;
1635 switch (e->Iex.Unop.op) {
1636 case Iop_V256to64_0: vec = vLo; off = -16; break;
1637 case Iop_V256to64_1: vec = vLo; off = -8; break;
1638 case Iop_V256to64_2: vec = vHi; off = -16; break;
1639 case Iop_V256to64_3: vec = vHi; off = -8; break;
1640 default: vassert(0);
1641 }
1642 HReg dst = newVRegI(env);
1643 HReg rsp = hregAMD64_RSP();
1644 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1645 AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
1646 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1647 16, vec, m16_rsp));
1648 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1649 AMD64RMI_Mem(off_rsp), dst ));
sewardj1a01e652005-02-23 11:39:21 +00001650 return dst;
1651 }
1652
sewardj924215b2005-03-26 21:50:31 +00001653 /* ReinterpF64asI64(e) */
1654 /* Given an IEEE754 double, produce an I64 with the same bit
1655 pattern. */
1656 case Iop_ReinterpF64asI64: {
1657 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1658 HReg dst = newVRegI(env);
1659 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1660 /* paranoia */
1661 set_SSE_rounding_default(env);
1662 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1663 addInstr(env, AMD64Instr_Alu64R(
1664 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1665 return dst;
1666 }
1667
sewardj79501112008-07-29 09:48:26 +00001668 /* ReinterpF32asI32(e) */
1669 /* Given an IEEE754 single, produce an I64 with the same bit
1670 pattern in the lower half. */
1671 case Iop_ReinterpF32asI32: {
1672 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1673 HReg dst = newVRegI(env);
1674 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1675 /* paranoia */
1676 set_SSE_rounding_default(env);
1677 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1678 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1679 return dst;
1680 }
1681
sewardj85520e42005-02-19 15:22:38 +00001682 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001683 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001684 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001685 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001686 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001687 case Iop_64to32:
1688 /* These are no-ops. */
1689 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001690
sewardje13074c2012-11-08 10:57:08 +00001691 case Iop_GetMSBs8x8: {
1692 /* Note: the following assumes the helper is of
1693 signature
1694 UInt fn ( ULong ), and is not a regparm fn.
1695 */
1696 HReg dst = newVRegI(env);
1697 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1698 fn = (HWord)h_generic_calc_GetMSBs8x8;
1699 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardjcfe046e2013-01-17 14:23:53 +00001700 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001701 1, mk_RetLoc_simple(RLPri_Int) ));
sewardje13074c2012-11-08 10:57:08 +00001702 /* MovxLQ is not exactly the right thing here. We just
1703 need to get the bottom 8 bits of RAX into dst, and zero
1704 out everything else. Assuming that the helper returns
1705 a UInt with the top 24 bits zeroed out, it'll do,
1706 though. */
1707 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1708 return dst;
1709 }
1710
sewardj78a20592012-12-13 18:29:56 +00001711 case Iop_GetMSBs8x16: {
1712 /* Note: the following assumes the helper is of signature
1713 UInt fn ( ULong w64hi, ULong w64Lo ),
1714 and is not a regparm fn. */
1715 HReg dst = newVRegI(env);
1716 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1717 HReg rsp = hregAMD64_RSP();
1718 fn = (HWord)h_generic_calc_GetMSBs8x16;
1719 AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp);
1720 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
1721 addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
1722 16, vec, m16_rsp));
1723 /* hi 64 bits into RDI -- the first arg */
1724 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1725 AMD64RMI_Mem(m8_rsp),
1726 hregAMD64_RDI() )); /* 1st arg */
1727 /* lo 64 bits into RSI -- the 2nd arg */
1728 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1729 AMD64RMI_Mem(m16_rsp),
1730 hregAMD64_RSI() )); /* 2nd arg */
sewardjcfe046e2013-01-17 14:23:53 +00001731 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00001732 2, mk_RetLoc_simple(RLPri_Int) ));
sewardj78a20592012-12-13 18:29:56 +00001733 /* MovxLQ is not exactly the right thing here. We just
sewardj9213c612012-12-19 08:39:11 +00001734 need to get the bottom 16 bits of RAX into dst, and zero
sewardj78a20592012-12-13 18:29:56 +00001735 out everything else. Assuming that the helper returns
sewardj9213c612012-12-19 08:39:11 +00001736 a UInt with the top 16 bits zeroed out, it'll do,
sewardj78a20592012-12-13 18:29:56 +00001737 though. */
1738 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1739 return dst;
1740 }
1741
sewardjf67eadf2005-02-03 03:53:52 +00001742 default:
1743 break;
1744 }
sewardje7905662005-05-09 18:15:21 +00001745
1746 /* Deal with unary 64-bit SIMD ops. */
1747 switch (e->Iex.Unop.op) {
1748 case Iop_CmpNEZ32x2:
1749 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1750 case Iop_CmpNEZ16x4:
1751 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1752 case Iop_CmpNEZ8x8:
1753 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1754 default:
1755 fn = (HWord)0; break;
1756 }
1757 if (fn != (HWord)0) {
1758 /* Note: the following assumes all helpers are of
1759 signature
1760 ULong fn ( ULong ), and they are
1761 not marked as regparm functions.
1762 */
1763 HReg dst = newVRegI(env);
1764 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1765 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
sewardj74142b82013-08-08 10:28:59 +00001766 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
1767 mk_RetLoc_simple(RLPri_Int) ));
sewardje7905662005-05-09 18:15:21 +00001768 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1769 return dst;
1770 }
1771
sewardjf67eadf2005-02-03 03:53:52 +00001772 break;
1773 }
sewardj8258a8c2005-02-02 03:11:24 +00001774
1775 /* --------- GET --------- */
1776 case Iex_Get: {
1777 if (ty == Ity_I64) {
1778 HReg dst = newVRegI(env);
1779 addInstr(env, AMD64Instr_Alu64R(
1780 Aalu_MOV,
1781 AMD64RMI_Mem(
1782 AMD64AMode_IR(e->Iex.Get.offset,
1783 hregAMD64_RBP())),
1784 dst));
1785 return dst;
1786 }
1787 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1788 HReg dst = newVRegI(env);
1789 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001790 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001791 False,
1792 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1793 dst));
1794 return dst;
1795 }
1796 break;
1797 }
1798
sewardj8d965312005-02-25 02:48:47 +00001799 case Iex_GetI: {
1800 AMD64AMode* am
1801 = genGuestArrayOffset(
1802 env, e->Iex.GetI.descr,
1803 e->Iex.GetI.ix, e->Iex.GetI.bias );
1804 HReg dst = newVRegI(env);
1805 if (ty == Ity_I8) {
1806 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1807 return dst;
1808 }
sewardj1e015d82005-04-23 23:41:46 +00001809 if (ty == Ity_I64) {
1810 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1811 return dst;
1812 }
sewardj8d965312005-02-25 02:48:47 +00001813 break;
1814 }
sewardj05b3b6a2005-02-04 01:44:33 +00001815
1816 /* --------- CCALL --------- */
1817 case Iex_CCall: {
1818 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001819 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001820
sewardjcfe046e2013-01-17 14:23:53 +00001821 /* be very restrictive for now. Only 64-bit ints allowed for
sewardj74142b82013-08-08 10:28:59 +00001822 args, and 64 or 32 bits for return type. */
sewardje8aaa872005-07-07 13:12:04 +00001823 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
sewardj05b3b6a2005-02-04 01:44:33 +00001824 goto irreducible;
1825
sewardj7f039c42005-02-04 21:13:55 +00001826 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00001827 UInt addToSp = 0;
1828 RetLoc rloc = mk_RetLoc_INVALID();
1829 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1830 e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
1831 vassert(is_sane_RetLoc(rloc));
1832 vassert(rloc.pri == RLPri_Int);
1833 vassert(addToSp == 0);
sewardj05b3b6a2005-02-04 01:44:33 +00001834
sewardje8aaa872005-07-07 13:12:04 +00001835 /* Move to dst, and zero out the top 32 bits if the result type is
1836 Ity_I32. Probably overkill, but still .. */
1837 if (e->Iex.CCall.retty == Ity_I64)
1838 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1839 else
sewardjca257bc2010-09-08 08:34:52 +00001840 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
sewardje8aaa872005-07-07 13:12:04 +00001841
sewardj05b3b6a2005-02-04 01:44:33 +00001842 return dst;
1843 }
1844
sewardj7f039c42005-02-04 21:13:55 +00001845 /* --------- LITERAL --------- */
1846 /* 64/32/16/8-bit literals */
1847 case Iex_Const:
1848 if (ty == Ity_I64) {
1849 HReg r = newVRegI(env);
1850 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1851 return r;
1852 } else {
1853 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1854 HReg r = newVRegI(env);
1855 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1856 return r;
1857 }
sewardj05b3b6a2005-02-04 01:44:33 +00001858
1859 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00001860 case Iex_ITE: { // VFD
sewardj009230b2013-01-26 11:47:55 +00001861 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
florian99dd03e2013-01-29 03:56:06 +00001862 && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
1863 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
sewardje357c672015-01-27 23:35:58 +00001864 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
sewardj009230b2013-01-26 11:47:55 +00001865 HReg dst = newVRegI(env);
florian99dd03e2013-01-29 03:56:06 +00001866 addInstr(env, mk_iMOVsd_RR(r1,dst));
1867 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00001868 addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst));
1869 return dst;
sewardj05b3b6a2005-02-04 01:44:33 +00001870 }
1871 break;
1872 }
sewardj8258a8c2005-02-02 03:11:24 +00001873
sewardjf4c803b2006-09-11 11:07:34 +00001874 /* --------- TERNARY OP --------- */
1875 case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001876 IRTriop *triop = e->Iex.Triop.details;
sewardjf4c803b2006-09-11 11:07:34 +00001877 /* C3210 flags following FPU partial remainder (fprem), both
1878 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001879 if (triop->op == Iop_PRemC3210F64
1880 || triop->op == Iop_PRem1C3210F64) {
sewardjf4c803b2006-09-11 11:07:34 +00001881 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00001882 HReg arg1 = iselDblExpr(env, triop->arg2);
1883 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardjf4c803b2006-09-11 11:07:34 +00001884 HReg dst = newVRegI(env);
1885 addInstr(env, AMD64Instr_A87Free(2));
1886
1887 /* one arg -> top of x87 stack */
1888 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001889 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001890
1891 /* other arg -> top of x87 stack */
1892 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00001893 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardjf4c803b2006-09-11 11:07:34 +00001894
florian420bfa92012-06-02 20:29:22 +00001895 switch (triop->op) {
sewardjf4c803b2006-09-11 11:07:34 +00001896 case Iop_PRemC3210F64:
1897 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1898 break;
sewardj4970e4e2008-10-11 10:07:55 +00001899 case Iop_PRem1C3210F64:
1900 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1901 break;
sewardjf4c803b2006-09-11 11:07:34 +00001902 default:
1903 vassert(0);
1904 }
1905 /* Ignore the result, and instead make off with the FPU's
1906 C3210 flags (in the status word). */
1907 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1908 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1909 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1910 return dst;
1911 }
1912 break;
1913 }
1914
sewardj8258a8c2005-02-02 03:11:24 +00001915 default:
1916 break;
1917 } /* switch (e->tag) */
1918
1919 /* We get here if no pattern matched. */
1920 irreducible:
1921 ppIRExpr(e);
1922 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1923}
sewardj614b3fb2005-02-02 02:16:03 +00001924
1925
1926/*---------------------------------------------------------*/
1927/*--- ISEL: Integer expression auxiliaries ---*/
1928/*---------------------------------------------------------*/
1929
1930/* --------------------- AMODEs --------------------- */
1931
1932/* Return an AMode which computes the value of the specified
1933 expression, possibly also adding insns to the code list as a
1934 result. The expression may only be a 32-bit one.
1935*/
1936
Elliott Hughesed398002017-06-21 14:41:24 -07001937static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, const IRExpr* e )
sewardj8258a8c2005-02-02 03:11:24 +00001938{
1939 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1940 vassert(sane_AMode(am));
1941 return am;
1942}
1943
1944/* DO NOT CALL THIS DIRECTLY ! */
Elliott Hughesed398002017-06-21 14:41:24 -07001945static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, const IRExpr* e )
sewardj8258a8c2005-02-02 03:11:24 +00001946{
sewardj05b3b6a2005-02-04 01:44:33 +00001947 MatchInfo mi;
1948 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001949 IRType ty = typeOfIRExpr(env->type_env,e);
1950 vassert(ty == Ity_I64);
1951
sewardj05b3b6a2005-02-04 01:44:33 +00001952 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1953 /* bind0 bind1 bind2 bind3 */
1954 DEFINE_PATTERN(p_complex,
1955 binop( Iop_Add64,
1956 binop( Iop_Add64,
1957 bind(0),
1958 binop(Iop_Shl64, bind(1), bind(2))
1959 ),
1960 bind(3)
1961 )
1962 );
1963 if (matchIRExpr(&mi, p_complex, e)) {
Elliott Hughesed398002017-06-21 14:41:24 -07001964 const IRExpr* expr1 = mi.bindee[0];
1965 const IRExpr* expr2 = mi.bindee[1];
1966 const IRExpr* imm8 = mi.bindee[2];
1967 const IRExpr* simm32 = mi.bindee[3];
sewardj05b3b6a2005-02-04 01:44:33 +00001968 if (imm8->tag == Iex_Const
1969 && imm8->Iex.Const.con->tag == Ico_U8
1970 && imm8->Iex.Const.con->Ico.U8 < 4
1971 /* imm8 is OK, now check simm32 */
1972 && simm32->tag == Iex_Const
1973 && simm32->Iex.Const.con->tag == Ico_U64
1974 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1975 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001976 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001977 HReg r1 = iselIntExpr_R(env, expr1);
1978 HReg r2 = iselIntExpr_R(env, expr2);
1979 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1980 return AMD64AMode_IRRS(offset, r1, r2, shift);
1981 }
1982 }
1983
sewardj8258a8c2005-02-02 03:11:24 +00001984 /* Add64(expr1, Shl64(expr2, imm)) */
1985 if (e->tag == Iex_Binop
1986 && e->Iex.Binop.op == Iop_Add64
1987 && e->Iex.Binop.arg2->tag == Iex_Binop
1988 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1989 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1990 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1991 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1992 if (shift == 1 || shift == 2 || shift == 3) {
1993 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1994 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1995 return AMD64AMode_IRRS(0, r1, r2, shift);
1996 }
1997 }
1998
1999 /* Add64(expr,i) */
2000 if (e->tag == Iex_Binop
2001 && e->Iex.Binop.op == Iop_Add64
2002 && e->Iex.Binop.arg2->tag == Iex_Const
2003 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
2004 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
2005 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2006 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00002007 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00002008 r1
2009 );
2010 }
2011
2012 /* Doesn't match anything in particular. Generate it into
2013 a register and use that. */
2014 {
2015 HReg r1 = iselIntExpr_R(env, e);
2016 return AMD64AMode_IR(0, r1);
2017 }
2018}
sewardj614b3fb2005-02-02 02:16:03 +00002019
2020
2021/* --------------------- RMIs --------------------- */
2022
2023/* Similarly, calculate an expression into an X86RMI operand. As with
2024 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
2025
Elliott Hughesed398002017-06-21 14:41:24 -07002026static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, const IRExpr* e )
sewardj614b3fb2005-02-02 02:16:03 +00002027{
2028 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
2029 /* sanity checks ... */
2030 switch (rmi->tag) {
2031 case Armi_Imm:
2032 return rmi;
2033 case Armi_Reg:
2034 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
2035 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
2036 return rmi;
2037 case Armi_Mem:
2038 vassert(sane_AMode(rmi->Armi.Mem.am));
2039 return rmi;
2040 default:
2041 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
2042 }
2043}
2044
2045/* DO NOT CALL THIS DIRECTLY ! */
Elliott Hughesed398002017-06-21 14:41:24 -07002046static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, const IRExpr* e )
sewardj614b3fb2005-02-02 02:16:03 +00002047{
2048 IRType ty = typeOfIRExpr(env->type_env,e);
2049 vassert(ty == Ity_I64 || ty == Ity_I32
2050 || ty == Ity_I16 || ty == Ity_I8);
2051
2052 /* special case: immediate 64/32/16/8 */
2053 if (e->tag == Iex_Const) {
2054 switch (e->Iex.Const.con->tag) {
2055 case Ico_U64:
2056 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002057 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00002058 }
2059 break;
2060 case Ico_U32:
2061 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
2062 case Ico_U16:
2063 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
2064 case Ico_U8:
2065 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
2066 default:
2067 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2068 }
2069 }
2070
2071 /* special case: 64-bit GET */
2072 if (e->tag == Iex_Get && ty == Ity_I64) {
2073 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2074 hregAMD64_RBP()));
2075 }
2076
sewardj0852a132005-02-21 08:28:46 +00002077 /* special case: 64-bit load from memory */
sewardje9d8a262009-07-01 08:06:34 +00002078 if (e->tag == Iex_Load && ty == Ity_I64
sewardje768e922009-11-26 17:17:37 +00002079 && e->Iex.Load.end == Iend_LE) {
sewardjaf1ceca2005-06-30 23:31:27 +00002080 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj0852a132005-02-21 08:28:46 +00002081 return AMD64RMI_Mem(am);
2082 }
sewardj614b3fb2005-02-02 02:16:03 +00002083
2084 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00002085 {
2086 HReg r = iselIntExpr_R ( env, e );
2087 return AMD64RMI_Reg(r);
2088 }
sewardj614b3fb2005-02-02 02:16:03 +00002089}
2090
2091
sewardjf67eadf2005-02-03 03:53:52 +00002092/* --------------------- RIs --------------------- */
2093
2094/* Calculate an expression into an AMD64RI operand. As with
2095 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2096 bits. */
2097
Elliott Hughesed398002017-06-21 14:41:24 -07002098static AMD64RI* iselIntExpr_RI ( ISelEnv* env, const IRExpr* e )
sewardjf67eadf2005-02-03 03:53:52 +00002099{
2100 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
2101 /* sanity checks ... */
2102 switch (ri->tag) {
2103 case Ari_Imm:
2104 return ri;
sewardj80d6e6d2008-05-28 09:40:29 +00002105 case Ari_Reg:
sewardjf67eadf2005-02-03 03:53:52 +00002106 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
2107 vassert(hregIsVirtual(ri->Ari.Reg.reg));
2108 return ri;
2109 default:
2110 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2111 }
2112}
2113
2114/* DO NOT CALL THIS DIRECTLY ! */
Elliott Hughesed398002017-06-21 14:41:24 -07002115static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, const IRExpr* e )
sewardjf67eadf2005-02-03 03:53:52 +00002116{
2117 IRType ty = typeOfIRExpr(env->type_env,e);
2118 vassert(ty == Ity_I64 || ty == Ity_I32
2119 || ty == Ity_I16 || ty == Ity_I8);
2120
2121 /* special case: immediate */
2122 if (e->tag == Iex_Const) {
2123 switch (e->Iex.Const.con->tag) {
2124 case Ico_U64:
2125 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002126 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00002127 }
2128 break;
2129 case Ico_U32:
2130 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2131 case Ico_U16:
2132 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2133 case Ico_U8:
2134 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2135 default:
2136 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2137 }
2138 }
2139
2140 /* default case: calculate into a register and return that */
2141 {
2142 HReg r = iselIntExpr_R ( env, e );
2143 return AMD64RI_Reg(r);
2144 }
2145}
2146
2147
sewardj05b3b6a2005-02-04 01:44:33 +00002148/* --------------------- RMs --------------------- */
2149
2150/* Similarly, calculate an expression into an AMD64RM operand. As
2151 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2152 bits. */
2153
Elliott Hughesed398002017-06-21 14:41:24 -07002154static AMD64RM* iselIntExpr_RM ( ISelEnv* env, const IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +00002155{
2156 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2157 /* sanity checks ... */
2158 switch (rm->tag) {
2159 case Arm_Reg:
2160 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2161 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2162 return rm;
2163 case Arm_Mem:
2164 vassert(sane_AMode(rm->Arm.Mem.am));
2165 return rm;
2166 default:
2167 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2168 }
2169}
2170
2171/* DO NOT CALL THIS DIRECTLY ! */
Elliott Hughesed398002017-06-21 14:41:24 -07002172static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, const IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +00002173{
2174 IRType ty = typeOfIRExpr(env->type_env,e);
2175 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2176
2177 /* special case: 64-bit GET */
2178 if (e->tag == Iex_Get && ty == Ity_I64) {
2179 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2180 hregAMD64_RBP()));
2181 }
2182
2183 /* special case: load from memory */
2184
2185 /* default case: calculate into a register and return that */
2186 {
2187 HReg r = iselIntExpr_R ( env, e );
2188 return AMD64RM_Reg(r);
2189 }
2190}
2191
2192
2193/* --------------------- CONDCODE --------------------- */
2194
2195/* Generate code to evaluated a bit-typed expression, returning the
2196 condition code which would correspond when the expression would
2197 notionally have returned 1. */
2198
Elliott Hughesed398002017-06-21 14:41:24 -07002199static AMD64CondCode iselCondCode ( ISelEnv* env, const IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +00002200{
2201 /* Uh, there's nothing we can sanity check here, unfortunately. */
2202 return iselCondCode_wrk(env,e);
2203}
2204
2205/* DO NOT CALL THIS DIRECTLY ! */
Elliott Hughesed398002017-06-21 14:41:24 -07002206static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, const IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +00002207{
sewardjf8c37f72005-02-07 18:55:29 +00002208 MatchInfo mi;
sewardj0af46ab2005-04-26 01:52:29 +00002209
sewardj05b3b6a2005-02-04 01:44:33 +00002210 vassert(e);
2211 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2212
sewardj176ad2f2005-04-27 11:55:08 +00002213 /* var */
sewardjdd40fdf2006-12-24 02:20:24 +00002214 if (e->tag == Iex_RdTmp) {
2215 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj176ad2f2005-04-27 11:55:08 +00002216 HReg dst = newVRegI(env);
2217 addInstr(env, mk_iMOVsd_RR(r64,dst));
2218 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2219 return Acc_NZ;
2220 }
2221
sewardj109e9352005-07-19 08:42:56 +00002222 /* Constant 1:Bit */
2223 if (e->tag == Iex_Const) {
2224 HReg r;
2225 vassert(e->Iex.Const.con->tag == Ico_U1);
2226 vassert(e->Iex.Const.con->Ico.U1 == True
2227 || e->Iex.Const.con->Ico.U1 == False);
2228 r = newVRegI(env);
2229 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2230 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2231 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2232 }
sewardj486074e2005-02-08 20:10:04 +00002233
2234 /* Not1(...) */
2235 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2236 /* Generate code for the arg, and negate the test condition */
2237 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2238 }
2239
sewardj176ad2f2005-04-27 11:55:08 +00002240 /* --- patterns rooted at: 64to1 --- */
2241
sewardj176ad2f2005-04-27 11:55:08 +00002242 /* 64to1 */
2243 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
sewardj501a3392005-05-11 15:37:50 +00002244 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2245 addInstr(env, AMD64Instr_Test64(1,reg));
sewardjf8c37f72005-02-07 18:55:29 +00002246 return Acc_NZ;
2247 }
2248
florianc862f282012-07-19 17:22:33 +00002249 /* --- patterns rooted at: 32to1 --- */
2250
2251 /* 32to1 */
2252 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
2253 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2254 addInstr(env, AMD64Instr_Test64(1,reg));
2255 return Acc_NZ;
2256 }
2257
sewardj176ad2f2005-04-27 11:55:08 +00002258 /* --- patterns rooted at: CmpNEZ8 --- */
2259
2260 /* CmpNEZ8(x) */
2261 if (e->tag == Iex_Unop
2262 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2263 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002264 addInstr(env, AMD64Instr_Test64(0xFF,r));
sewardj176ad2f2005-04-27 11:55:08 +00002265 return Acc_NZ;
2266 }
2267
sewardj86ec28b2005-04-27 13:39:35 +00002268 /* --- patterns rooted at: CmpNEZ16 --- */
2269
2270 /* CmpNEZ16(x) */
2271 if (e->tag == Iex_Unop
2272 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2273 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002274 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
sewardj86ec28b2005-04-27 13:39:35 +00002275 return Acc_NZ;
2276 }
2277
sewardj176ad2f2005-04-27 11:55:08 +00002278 /* --- patterns rooted at: CmpNEZ32 --- */
2279
2280 /* CmpNEZ32(x) */
2281 if (e->tag == Iex_Unop
2282 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2283 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj176ad2f2005-04-27 11:55:08 +00002284 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
sewardj9cc2bbf2011-06-05 17:56:03 +00002285 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
sewardj176ad2f2005-04-27 11:55:08 +00002286 return Acc_NZ;
2287 }
2288
2289 /* --- patterns rooted at: CmpNEZ64 --- */
2290
sewardj0bc78ab2005-05-11 22:47:32 +00002291 /* CmpNEZ64(Or64(x,y)) */
2292 {
2293 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2294 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2295 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2296 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2297 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2298 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2299 HReg tmp = newVRegI(env);
2300 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2301 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2302 return Acc_NZ;
2303 }
2304 }
2305
sewardj176ad2f2005-04-27 11:55:08 +00002306 /* CmpNEZ64(x) */
2307 if (e->tag == Iex_Unop
2308 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2309 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2310 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2311 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2312 return Acc_NZ;
2313 }
2314
2315 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2316
sewardj42322b52005-04-20 22:57:11 +00002317 /* CmpEQ8 / CmpNE8 */
2318 if (e->tag == Iex_Binop
2319 && (e->Iex.Binop.op == Iop_CmpEQ8
sewardj1fb8c922009-07-12 12:56:53 +00002320 || e->Iex.Binop.op == Iop_CmpNE8
2321 || e->Iex.Binop.op == Iop_CasCmpEQ8
2322 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
sewardj009230b2013-01-26 11:47:55 +00002323 if (isZeroU8(e->Iex.Binop.arg2)) {
2324 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2325 addInstr(env, AMD64Instr_Test64(0xFF,r1));
2326 switch (e->Iex.Binop.op) {
2327 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2328 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2329 default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
2330 }
2331 } else {
2332 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2333 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2334 HReg r = newVRegI(env);
2335 addInstr(env, mk_iMOVsd_RR(r1,r));
2336 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2337 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2338 switch (e->Iex.Binop.op) {
2339 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2340 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2341 default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
2342 }
sewardj42322b52005-04-20 22:57:11 +00002343 }
2344 }
2345
sewardj0af46ab2005-04-26 01:52:29 +00002346 /* CmpEQ16 / CmpNE16 */
2347 if (e->tag == Iex_Binop
2348 && (e->Iex.Binop.op == Iop_CmpEQ16
sewardj1fb8c922009-07-12 12:56:53 +00002349 || e->Iex.Binop.op == Iop_CmpNE16
2350 || e->Iex.Binop.op == Iop_CasCmpEQ16
2351 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
sewardj0af46ab2005-04-26 01:52:29 +00002352 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2353 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2354 HReg r = newVRegI(env);
2355 addInstr(env, mk_iMOVsd_RR(r1,r));
2356 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2357 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2358 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002359 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2360 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002361 default: vpanic("iselCondCode(amd64): CmpXX16");
2362 }
2363 }
2364
sewardj50d89bf2011-01-10 15:10:48 +00002365 /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
2366 Saves a "movq %rax, %tmp" compared to the default route. */
2367 if (e->tag == Iex_Binop
2368 && e->Iex.Binop.op == Iop_CmpNE64
2369 && e->Iex.Binop.arg1->tag == Iex_CCall
2370 && e->Iex.Binop.arg2->tag == Iex_Const) {
2371 IRExpr* cal = e->Iex.Binop.arg1;
2372 IRExpr* con = e->Iex.Binop.arg2;
2373 HReg tmp = newVRegI(env);
2374 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
2375 vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
2376 vassert(con->Iex.Const.con->tag == Ico_U64);
2377 /* Marshal args, do the call. */
sewardj74142b82013-08-08 10:28:59 +00002378 UInt addToSp = 0;
2379 RetLoc rloc = mk_RetLoc_INVALID();
2380 doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2381 cal->Iex.CCall.cee,
2382 cal->Iex.CCall.retty, cal->Iex.CCall.args );
2383 vassert(is_sane_RetLoc(rloc));
2384 vassert(rloc.pri == RLPri_Int);
2385 vassert(addToSp == 0);
2386 /* */
sewardj50d89bf2011-01-10 15:10:48 +00002387 addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
2388 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
2389 AMD64RMI_Reg(hregAMD64_RAX()), tmp));
2390 return Acc_NZ;
2391 }
2392
sewardjd0a12df2005-02-10 02:07:43 +00002393 /* Cmp*64*(x,y) */
2394 if (e->tag == Iex_Binop
2395 && (e->Iex.Binop.op == Iop_CmpEQ64
2396 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00002397 || e->Iex.Binop.op == Iop_CmpLT64S
2398 || e->Iex.Binop.op == Iop_CmpLT64U
2399 || e->Iex.Binop.op == Iop_CmpLE64S
sewardja9e4a802005-12-26 19:33:55 +00002400 || e->Iex.Binop.op == Iop_CmpLE64U
sewardj1fb8c922009-07-12 12:56:53 +00002401 || e->Iex.Binop.op == Iop_CasCmpEQ64
sewardje13074c2012-11-08 10:57:08 +00002402 || e->Iex.Binop.op == Iop_CasCmpNE64
2403 || e->Iex.Binop.op == Iop_ExpCmpNE64)) {
sewardjd0a12df2005-02-10 02:07:43 +00002404 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2405 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2406 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2407 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002408 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
sewardje13074c2012-11-08 10:57:08 +00002409 case Iop_CmpNE64:
2410 case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002411 case Iop_CmpLT64S: return Acc_L;
2412 case Iop_CmpLT64U: return Acc_B;
2413 case Iop_CmpLE64S: return Acc_LE;
sewardja9e4a802005-12-26 19:33:55 +00002414 case Iop_CmpLE64U: return Acc_BE;
sewardjd0a12df2005-02-10 02:07:43 +00002415 default: vpanic("iselCondCode(amd64): CmpXX64");
2416 }
2417 }
2418
sewardj9cc2bbf2011-06-05 17:56:03 +00002419 /* Cmp*32*(x,y) */
2420 if (e->tag == Iex_Binop
2421 && (e->Iex.Binop.op == Iop_CmpEQ32
2422 || e->Iex.Binop.op == Iop_CmpNE32
2423 || e->Iex.Binop.op == Iop_CmpLT32S
2424 || e->Iex.Binop.op == Iop_CmpLT32U
2425 || e->Iex.Binop.op == Iop_CmpLE32S
2426 || e->Iex.Binop.op == Iop_CmpLE32U
2427 || e->Iex.Binop.op == Iop_CasCmpEQ32
sewardj009230b2013-01-26 11:47:55 +00002428 || e->Iex.Binop.op == Iop_CasCmpNE32
2429 || e->Iex.Binop.op == Iop_ExpCmpNE32)) {
sewardj9cc2bbf2011-06-05 17:56:03 +00002430 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2431 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2432 addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
2433 switch (e->Iex.Binop.op) {
2434 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
sewardj009230b2013-01-26 11:47:55 +00002435 case Iop_CmpNE32:
2436 case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ;
sewardj9cc2bbf2011-06-05 17:56:03 +00002437 case Iop_CmpLT32S: return Acc_L;
2438 case Iop_CmpLT32U: return Acc_B;
2439 case Iop_CmpLE32S: return Acc_LE;
2440 case Iop_CmpLE32U: return Acc_BE;
2441 default: vpanic("iselCondCode(amd64): CmpXX32");
2442 }
2443 }
2444
sewardj05b3b6a2005-02-04 01:44:33 +00002445 ppIRExpr(e);
2446 vpanic("iselCondCode(amd64)");
2447}
2448
2449
sewardj9b967672005-02-08 11:13:09 +00002450/*---------------------------------------------------------*/
2451/*--- ISEL: Integer expressions (128 bit) ---*/
2452/*---------------------------------------------------------*/
2453
2454/* Compute a 128-bit value into a register pair, which is returned as
2455 the first two parameters. As with iselIntExpr_R, these may be
2456 either real or virtual regs; in any case they must not be changed
2457 by subsequent code emitted by the caller. */
2458
2459static void iselInt128Expr ( HReg* rHi, HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -07002460 ISelEnv* env, const IRExpr* e )
sewardj9b967672005-02-08 11:13:09 +00002461{
2462 iselInt128Expr_wrk(rHi, rLo, env, e);
2463# if 0
2464 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2465# endif
2466 vassert(hregClass(*rHi) == HRcInt64);
2467 vassert(hregIsVirtual(*rHi));
2468 vassert(hregClass(*rLo) == HRcInt64);
2469 vassert(hregIsVirtual(*rLo));
2470}
2471
2472/* DO NOT CALL THIS DIRECTLY ! */
2473static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -07002474 ISelEnv* env, const IRExpr* e )
sewardj9b967672005-02-08 11:13:09 +00002475{
sewardj9b967672005-02-08 11:13:09 +00002476 vassert(e);
2477 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2478
sewardj9b967672005-02-08 11:13:09 +00002479 /* read 128-bit IRTemp */
sewardjdd40fdf2006-12-24 02:20:24 +00002480 if (e->tag == Iex_RdTmp) {
sewardjc4530ae2012-05-21 10:18:49 +00002481 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
sewardj9b967672005-02-08 11:13:09 +00002482 return;
2483 }
2484
sewardj9b967672005-02-08 11:13:09 +00002485 /* --------- BINARY ops --------- */
2486 if (e->tag == Iex_Binop) {
2487 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002488 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002489 case Iop_MullU64:
2490 case Iop_MullS64: {
2491 /* get one operand into %rax, and the other into a R/M.
2492 Need to make an educated guess about which is better in
2493 which. */
2494 HReg tLo = newVRegI(env);
2495 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002496 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002497 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2498 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2499 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
sewardj501a3392005-05-11 15:37:50 +00002500 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
sewardj9b967672005-02-08 11:13:09 +00002501 /* Result is now in RDX:RAX. Tell the caller. */
2502 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2503 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2504 *rHi = tHi;
2505 *rLo = tLo;
2506 return;
2507 }
sewardj7de0d3c2005-02-13 02:26:41 +00002508
sewardja6b93d12005-02-17 09:28:28 +00002509 /* 128 x 64 -> (64(rem),64(div)) division */
2510 case Iop_DivModU128to64:
2511 case Iop_DivModS128to64: {
2512 /* Get the 128-bit operand into rdx:rax, and the other into
2513 any old R/M. */
2514 HReg sHi, sLo;
2515 HReg tLo = newVRegI(env);
2516 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002517 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002518 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2519 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2520 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2521 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2522 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2523 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2524 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2525 *rHi = tHi;
2526 *rLo = tLo;
2527 return;
2528 }
2529
2530 /* 64HLto128(e1,e2) */
2531 case Iop_64HLto128:
2532 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2533 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2534 return;
2535
sewardj9b967672005-02-08 11:13:09 +00002536 default:
2537 break;
2538 }
2539 } /* if (e->tag == Iex_Binop) */
2540
sewardj9b967672005-02-08 11:13:09 +00002541 ppIRExpr(e);
2542 vpanic("iselInt128Expr");
2543}
2544
2545
sewardj8d965312005-02-25 02:48:47 +00002546/*---------------------------------------------------------*/
2547/*--- ISEL: Floating point expressions (32 bit) ---*/
2548/*---------------------------------------------------------*/
2549
2550/* Nothing interesting here; really just wrappers for
2551 64-bit stuff. */
2552
Elliott Hughesed398002017-06-21 14:41:24 -07002553static HReg iselFltExpr ( ISelEnv* env, const IRExpr* e )
sewardj8d965312005-02-25 02:48:47 +00002554{
2555 HReg r = iselFltExpr_wrk( env, e );
2556# if 0
2557 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2558# endif
2559 vassert(hregClass(r) == HRcVec128);
2560 vassert(hregIsVirtual(r));
2561 return r;
2562}
2563
2564/* DO NOT CALL THIS DIRECTLY */
Elliott Hughesed398002017-06-21 14:41:24 -07002565static HReg iselFltExpr_wrk ( ISelEnv* env, const IRExpr* e )
sewardj8d965312005-02-25 02:48:47 +00002566{
2567 IRType ty = typeOfIRExpr(env->type_env,e);
2568 vassert(ty == Ity_F32);
2569
sewardjdd40fdf2006-12-24 02:20:24 +00002570 if (e->tag == Iex_RdTmp) {
2571 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardjc49ce232005-02-25 13:03:03 +00002572 }
2573
sewardje768e922009-11-26 17:17:37 +00002574 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardjc49ce232005-02-25 13:03:03 +00002575 AMD64AMode* am;
2576 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002577 vassert(e->Iex.Load.ty == Ity_F32);
2578 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardjc49ce232005-02-25 13:03:03 +00002579 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2580 return res;
2581 }
sewardj8d965312005-02-25 02:48:47 +00002582
2583 if (e->tag == Iex_Binop
2584 && e->Iex.Binop.op == Iop_F64toF32) {
2585 /* Although the result is still held in a standard SSE register,
2586 we need to round it to reflect the loss of accuracy/range
2587 entailed in casting it to a 32-bit float. */
2588 HReg dst = newVRegV(env);
2589 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2590 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2591 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2592 set_SSE_rounding_default( env );
2593 return dst;
2594 }
2595
sewardjc49ce232005-02-25 13:03:03 +00002596 if (e->tag == Iex_Get) {
2597 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2598 hregAMD64_RBP() );
2599 HReg res = newVRegV(env);
2600 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2601 return res;
2602 }
2603
sewardj5992bd02005-05-11 02:13:42 +00002604 if (e->tag == Iex_Unop
2605 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2606 /* Given an I32, produce an IEEE754 float with the same bit
2607 pattern. */
2608 HReg dst = newVRegV(env);
2609 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2610 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2611 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2612 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2613 return dst;
2614 }
sewardj8d965312005-02-25 02:48:47 +00002615
sewardjd15b5972010-06-27 09:06:34 +00002616 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2617 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2618 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
2619 HReg dst = newVRegV(env);
2620
2621 /* rf now holds the value to be rounded. The first thing to do
2622 is set the FPU's rounding mode accordingly. */
2623
2624 /* Set host x87 rounding mode */
2625 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2626
2627 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
2628 addInstr(env, AMD64Instr_A87Free(1));
2629 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
2630 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2631 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
2632 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
2633
2634 /* Restore default x87 rounding. */
2635 set_FPU_rounding_default( env );
2636
2637 return dst;
2638 }
2639
sewardjcc3d2192013-03-27 11:37:33 +00002640 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) {
2641 /* Sigh ... very rough code. Could do much better. */
2642 /* Get the 128-bit literal 00---0 10---0 into a register
2643 and xor it with the value to be negated. */
2644 HReg r1 = newVRegI(env);
2645 HReg dst = newVRegV(env);
2646 HReg tmp = newVRegV(env);
2647 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2648 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
2649 addInstr(env, mk_vMOVsd_RR(src,tmp));
2650 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2651 addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 ));
2652 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
2653 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
2654 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
2655 add_to_rsp(env, 16);
2656 return dst;
2657 }
2658
2659 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) {
2660 IRQop *qop = e->Iex.Qop.details;
2661 HReg dst = newVRegV(env);
2662 HReg argX = iselFltExpr(env, qop->arg2);
2663 HReg argY = iselFltExpr(env, qop->arg3);
2664 HReg argZ = iselFltExpr(env, qop->arg4);
2665 /* XXXROUNDINGFIXME */
2666 /* set roundingmode here */
2667 /* subq $16, %rsp -- make a space*/
2668 sub_from_rsp(env, 16);
2669 /* Prepare 4 arg regs:
2670 leaq 0(%rsp), %rdi
2671 leaq 4(%rsp), %rsi
2672 leaq 8(%rsp), %rdx
2673 leaq 12(%rsp), %rcx
2674 */
2675 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2676 hregAMD64_RDI()));
2677 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()),
2678 hregAMD64_RSI()));
2679 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2680 hregAMD64_RDX()));
2681 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()),
2682 hregAMD64_RCX()));
2683 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2684 movss %argX, 0(%rsi)
2685 movss %argY, 0(%rdx)
2686 movss %argZ, 0(%rcx)
2687 */
2688 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX,
2689 AMD64AMode_IR(0, hregAMD64_RSI())));
2690 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY,
2691 AMD64AMode_IR(0, hregAMD64_RDX())));
2692 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ,
2693 AMD64AMode_IR(0, hregAMD64_RCX())));
2694 /* call the helper */
2695 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2696 (ULong)(HWord)h_generic_calc_MAddF32,
sewardj74142b82013-08-08 10:28:59 +00002697 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002698 /* fetch the result from memory, using %r_argp, which the
2699 register allocator will keep alive across the call. */
2700 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst,
2701 AMD64AMode_IR(0, hregAMD64_RSP())));
2702 /* and finally, clear the space */
2703 add_to_rsp(env, 16);
2704 return dst;
2705 }
2706
sewardj8d965312005-02-25 02:48:47 +00002707 ppIRExpr(e);
2708 vpanic("iselFltExpr_wrk");
2709}
sewardj18303862005-02-21 12:36:54 +00002710
2711
2712/*---------------------------------------------------------*/
2713/*--- ISEL: Floating point expressions (64 bit) ---*/
2714/*---------------------------------------------------------*/
2715
2716/* Compute a 64-bit floating point value into the lower half of an xmm
2717 register, the identity of which is returned. As with
2718 iselIntExpr_R, the returned reg will be virtual, and it must not be
2719 changed by subsequent code emitted by the caller.
2720*/
2721
2722/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2723
2724 Type S (1 bit) E (11 bits) F (52 bits)
2725 ---- --------- ----------- -----------
2726 signalling NaN u 2047 (max) .0uuuuu---u
2727 (with at least
2728 one 1 bit)
2729 quiet NaN u 2047 (max) .1uuuuu---u
2730
2731 negative infinity 1 2047 (max) .000000---0
2732
2733 positive infinity 0 2047 (max) .000000---0
2734
2735 negative zero 1 0 .000000---0
2736
2737 positive zero 0 0 .000000---0
2738*/
2739
Elliott Hughesed398002017-06-21 14:41:24 -07002740static HReg iselDblExpr ( ISelEnv* env, const IRExpr* e )
sewardj18303862005-02-21 12:36:54 +00002741{
2742 HReg r = iselDblExpr_wrk( env, e );
2743# if 0
2744 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2745# endif
2746 vassert(hregClass(r) == HRcVec128);
2747 vassert(hregIsVirtual(r));
2748 return r;
2749}
2750
2751/* DO NOT CALL THIS DIRECTLY */
Elliott Hughesed398002017-06-21 14:41:24 -07002752static HReg iselDblExpr_wrk ( ISelEnv* env, const IRExpr* e )
sewardj18303862005-02-21 12:36:54 +00002753{
2754 IRType ty = typeOfIRExpr(env->type_env,e);
2755 vassert(e);
2756 vassert(ty == Ity_F64);
2757
sewardjdd40fdf2006-12-24 02:20:24 +00002758 if (e->tag == Iex_RdTmp) {
2759 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj18303862005-02-21 12:36:54 +00002760 }
2761
sewardj8d965312005-02-25 02:48:47 +00002762 if (e->tag == Iex_Const) {
2763 union { ULong u64; Double f64; } u;
2764 HReg res = newVRegV(env);
2765 HReg tmp = newVRegI(env);
2766 vassert(sizeof(u) == 8);
2767 vassert(sizeof(u.u64) == 8);
2768 vassert(sizeof(u.f64) == 8);
2769
2770 if (e->Iex.Const.con->tag == Ico_F64) {
2771 u.f64 = e->Iex.Const.con->Ico.F64;
2772 }
2773 else if (e->Iex.Const.con->tag == Ico_F64i) {
2774 u.u64 = e->Iex.Const.con->Ico.F64i;
2775 }
2776 else
2777 vpanic("iselDblExpr(amd64): const");
2778
2779 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2780 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2781 addInstr(env, AMD64Instr_SseLdSt(
2782 True/*load*/, 8, res,
2783 AMD64AMode_IR(0, hregAMD64_RSP())
2784 ));
2785 add_to_rsp(env, 8);
2786 return res;
2787 }
sewardj9da16972005-02-21 13:58:26 +00002788
sewardje768e922009-11-26 17:17:37 +00002789 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj9da16972005-02-21 13:58:26 +00002790 AMD64AMode* am;
2791 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002792 vassert(e->Iex.Load.ty == Ity_F64);
2793 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj9da16972005-02-21 13:58:26 +00002794 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2795 return res;
2796 }
sewardj18303862005-02-21 12:36:54 +00002797
2798 if (e->tag == Iex_Get) {
2799 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2800 hregAMD64_RBP() );
2801 HReg res = newVRegV(env);
2802 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2803 return res;
2804 }
2805
sewardj8d965312005-02-25 02:48:47 +00002806 if (e->tag == Iex_GetI) {
2807 AMD64AMode* am
2808 = genGuestArrayOffset(
2809 env, e->Iex.GetI.descr,
2810 e->Iex.GetI.ix, e->Iex.GetI.bias );
2811 HReg res = newVRegV(env);
2812 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2813 return res;
2814 }
2815
sewardj4796d662006-02-05 16:06:26 +00002816 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00002817 IRTriop *triop = e->Iex.Triop.details;
sewardj137015d2005-03-27 04:01:15 +00002818 AMD64SseOp op = Asse_INVALID;
florian420bfa92012-06-02 20:29:22 +00002819 switch (triop->op) {
sewardj137015d2005-03-27 04:01:15 +00002820 case Iop_AddF64: op = Asse_ADDF; break;
2821 case Iop_SubF64: op = Asse_SUBF; break;
2822 case Iop_MulF64: op = Asse_MULF; break;
2823 case Iop_DivF64: op = Asse_DIVF; break;
2824 default: break;
2825 }
2826 if (op != Asse_INVALID) {
2827 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002828 HReg argL = iselDblExpr(env, triop->arg2);
2829 HReg argR = iselDblExpr(env, triop->arg3);
sewardj137015d2005-03-27 04:01:15 +00002830 addInstr(env, mk_vMOVsd_RR(argL, dst));
sewardj4796d662006-02-05 16:06:26 +00002831 /* XXXROUNDINGFIXME */
2832 /* set roundingmode here */
sewardj137015d2005-03-27 04:01:15 +00002833 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2834 return dst;
2835 }
2836 }
2837
sewardjcc3d2192013-03-27 11:37:33 +00002838 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) {
2839 IRQop *qop = e->Iex.Qop.details;
2840 HReg dst = newVRegV(env);
2841 HReg argX = iselDblExpr(env, qop->arg2);
2842 HReg argY = iselDblExpr(env, qop->arg3);
2843 HReg argZ = iselDblExpr(env, qop->arg4);
2844 /* XXXROUNDINGFIXME */
2845 /* set roundingmode here */
2846 /* subq $32, %rsp -- make a space*/
2847 sub_from_rsp(env, 32);
2848 /* Prepare 4 arg regs:
2849 leaq 0(%rsp), %rdi
2850 leaq 8(%rsp), %rsi
2851 leaq 16(%rsp), %rdx
2852 leaq 24(%rsp), %rcx
2853 */
2854 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
2855 hregAMD64_RDI()));
2856 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
2857 hregAMD64_RSI()));
2858 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()),
2859 hregAMD64_RDX()));
2860 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()),
2861 hregAMD64_RCX()));
2862 /* Store the three args, at (%rsi), (%rdx) and (%rcx):
2863 movsd %argX, 0(%rsi)
2864 movsd %argY, 0(%rdx)
2865 movsd %argZ, 0(%rcx)
2866 */
2867 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX,
2868 AMD64AMode_IR(0, hregAMD64_RSI())));
2869 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY,
2870 AMD64AMode_IR(0, hregAMD64_RDX())));
2871 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ,
2872 AMD64AMode_IR(0, hregAMD64_RCX())));
2873 /* call the helper */
2874 addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
2875 (ULong)(HWord)h_generic_calc_MAddF64,
sewardj74142b82013-08-08 10:28:59 +00002876 4, mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00002877 /* fetch the result from memory, using %r_argp, which the
2878 register allocator will keep alive across the call. */
2879 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst,
2880 AMD64AMode_IR(0, hregAMD64_RSP())));
2881 /* and finally, clear the space */
2882 add_to_rsp(env, 32);
2883 return dst;
2884 }
2885
sewardjb183b852006-02-03 16:08:03 +00002886 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
sewardj25a85812005-05-08 23:03:48 +00002887 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2888 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2889 HReg dst = newVRegV(env);
2890
2891 /* rf now holds the value to be rounded. The first thing to do
2892 is set the FPU's rounding mode accordingly. */
2893
2894 /* Set host x87 rounding mode */
2895 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2896
2897 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2898 addInstr(env, AMD64Instr_A87Free(1));
sewardjd15b5972010-06-27 09:06:34 +00002899 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002900 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
sewardjd15b5972010-06-27 09:06:34 +00002901 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002902 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2903
2904 /* Restore default x87 rounding. */
2905 set_FPU_rounding_default( env );
2906
2907 return dst;
2908 }
2909
florian420bfa92012-06-02 20:29:22 +00002910 IRTriop *triop = e->Iex.Triop.details;
sewardj4796d662006-02-05 16:06:26 +00002911 if (e->tag == Iex_Triop
florian420bfa92012-06-02 20:29:22 +00002912 && (triop->op == Iop_ScaleF64
2913 || triop->op == Iop_AtanF64
2914 || triop->op == Iop_Yl2xF64
2915 || triop->op == Iop_Yl2xp1F64
2916 || triop->op == Iop_PRemF64
2917 || triop->op == Iop_PRem1F64)
sewardj25a85812005-05-08 23:03:48 +00002918 ) {
2919 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
florian420bfa92012-06-02 20:29:22 +00002920 HReg arg1 = iselDblExpr(env, triop->arg2);
2921 HReg arg2 = iselDblExpr(env, triop->arg3);
sewardj25a85812005-05-08 23:03:48 +00002922 HReg dst = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00002923 Bool arg2first = toBool(triop->op == Iop_ScaleF64
2924 || triop->op == Iop_PRemF64
2925 || triop->op == Iop_PRem1F64);
sewardj25a85812005-05-08 23:03:48 +00002926 addInstr(env, AMD64Instr_A87Free(2));
2927
2928 /* one arg -> top of x87 stack */
2929 addInstr(env, AMD64Instr_SseLdSt(
2930 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002931 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002932
2933 /* other arg -> top of x87 stack */
2934 addInstr(env, AMD64Instr_SseLdSt(
2935 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
sewardjd15b5972010-06-27 09:06:34 +00002936 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002937
2938 /* do it */
sewardj4796d662006-02-05 16:06:26 +00002939 /* XXXROUNDINGFIXME */
2940 /* set roundingmode here */
florian420bfa92012-06-02 20:29:22 +00002941 switch (triop->op) {
sewardj25a85812005-05-08 23:03:48 +00002942 case Iop_ScaleF64:
2943 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2944 break;
2945 case Iop_AtanF64:
2946 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2947 break;
2948 case Iop_Yl2xF64:
2949 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2950 break;
sewardj5e205372005-05-09 02:57:08 +00002951 case Iop_Yl2xp1F64:
2952 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2953 break;
sewardjf4c803b2006-09-11 11:07:34 +00002954 case Iop_PRemF64:
2955 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2956 break;
sewardj4970e4e2008-10-11 10:07:55 +00002957 case Iop_PRem1F64:
2958 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
2959 break;
sewardj25a85812005-05-08 23:03:48 +00002960 default:
2961 vassert(0);
2962 }
2963
2964 /* save result */
sewardjd15b5972010-06-27 09:06:34 +00002965 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00002966 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2967 return dst;
2968 }
sewardj1a01e652005-02-23 11:39:21 +00002969
sewardj6c299f32009-12-31 18:00:12 +00002970 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002971 HReg dst = newVRegV(env);
2972 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2973 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2974 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2975 set_SSE_rounding_default( env );
2976 return dst;
2977 }
2978
sewardj6c299f32009-12-31 18:00:12 +00002979 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00002980 HReg dst = newVRegV(env);
2981 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2982 set_SSE_rounding_default( env );
2983 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2984 return dst;
2985 }
2986
sewardj137015d2005-03-27 04:01:15 +00002987 if (e->tag == Iex_Unop
2988 && (e->Iex.Unop.op == Iop_NegF64
2989 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00002990 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00002991 /* Get the 128-bit literal 00---0 10---0 into a register
2992 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00002993 HReg r1 = newVRegI(env);
2994 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00002995 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00002996 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2997 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00002998 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00002999 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3000 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3001 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00003002 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3003
3004 if (e->Iex.Unop.op == Iop_NegF64)
3005 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3006 else
3007 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3008
sewardj8d965312005-02-25 02:48:47 +00003009 add_to_rsp(env, 16);
3010 return dst;
3011 }
3012
sewardj4796d662006-02-05 16:06:26 +00003013 if (e->tag == Iex_Binop) {
sewardj25a85812005-05-08 23:03:48 +00003014 A87FpOp fpop = Afp_INVALID;
sewardj4796d662006-02-05 16:06:26 +00003015 switch (e->Iex.Binop.op) {
sewardj25a85812005-05-08 23:03:48 +00003016 case Iop_SqrtF64: fpop = Afp_SQRT; break;
sewardj5e205372005-05-09 02:57:08 +00003017 case Iop_SinF64: fpop = Afp_SIN; break;
3018 case Iop_CosF64: fpop = Afp_COS; break;
3019 case Iop_TanF64: fpop = Afp_TAN; break;
sewardj25a85812005-05-08 23:03:48 +00003020 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3021 default: break;
3022 }
3023 if (fpop != Afp_INVALID) {
3024 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00003025 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
sewardj25a85812005-05-08 23:03:48 +00003026 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00003027 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
sewardj25a85812005-05-08 23:03:48 +00003028 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
sewardj5e205372005-05-09 02:57:08 +00003029 addInstr(env, AMD64Instr_A87Free(nNeeded));
sewardjd15b5972010-06-27 09:06:34 +00003030 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
sewardj4796d662006-02-05 16:06:26 +00003031 /* XXXROUNDINGFIXME */
3032 /* set roundingmode here */
sewardje9c51c92014-04-30 22:50:34 +00003033 /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition
3034 codes. I don't think that matters, since this insn
3035 selector never generates such an instruction intervening
3036 between an flag-setting instruction and a flag-using
3037 instruction. */
sewardj25a85812005-05-08 23:03:48 +00003038 addInstr(env, AMD64Instr_A87FpOp(fpop));
sewardjd15b5972010-06-27 09:06:34 +00003039 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
sewardj25a85812005-05-08 23:03:48 +00003040 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3041 return dst;
3042 }
3043 }
sewardjc49ce232005-02-25 13:03:03 +00003044
3045 if (e->tag == Iex_Unop) {
3046 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00003047//.. case Iop_I32toF64: {
3048//.. HReg dst = newVRegF(env);
3049//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3050//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3051//.. set_FPU_rounding_default(env);
3052//.. addInstr(env, X86Instr_FpLdStI(
3053//.. True/*load*/, 4, dst,
3054//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00003055//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00003056//.. return dst;
3057//.. }
sewardj924215b2005-03-26 21:50:31 +00003058 case Iop_ReinterpI64asF64: {
3059 /* Given an I64, produce an IEEE754 double with the same
3060 bit pattern. */
3061 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3062 HReg dst = newVRegV(env);
3063 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3064 /* paranoia */
3065 set_SSE_rounding_default(env);
3066 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3067 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3068 return dst;
3069 }
sewardjc49ce232005-02-25 13:03:03 +00003070 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00003071 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00003072 HReg f64 = newVRegV(env);
3073 /* this shouldn't be necessary, but be paranoid ... */
3074 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00003075 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00003076 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3077 return f64;
3078 }
3079 default:
3080 break;
3081 }
3082 }
sewardj8d965312005-02-25 02:48:47 +00003083
3084 /* --------- MULTIPLEX --------- */
florian99dd03e2013-01-29 03:56:06 +00003085 if (e->tag == Iex_ITE) { // VFD
3086 HReg r1, r0, dst;
sewardj8d965312005-02-25 02:48:47 +00003087 vassert(ty == Ity_F64);
florian99dd03e2013-01-29 03:56:06 +00003088 vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1);
3089 r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3090 r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
sewardj8d965312005-02-25 02:48:47 +00003091 dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003092 addInstr(env, mk_vMOVsd_RR(r1,dst));
3093 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003094 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardj8d965312005-02-25 02:48:47 +00003095 return dst;
3096 }
sewardj18303862005-02-21 12:36:54 +00003097
3098 ppIRExpr(e);
3099 vpanic("iselDblExpr_wrk");
3100}
sewardjc2bcb6f2005-02-07 00:17:12 +00003101
sewardj0852a132005-02-21 08:28:46 +00003102
3103/*---------------------------------------------------------*/
3104/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3105/*---------------------------------------------------------*/
3106
Elliott Hughesed398002017-06-21 14:41:24 -07003107static HReg iselVecExpr ( ISelEnv* env, const IRExpr* e )
sewardj0852a132005-02-21 08:28:46 +00003108{
3109 HReg r = iselVecExpr_wrk( env, e );
3110# if 0
3111 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3112# endif
3113 vassert(hregClass(r) == HRcVec128);
3114 vassert(hregIsVirtual(r));
3115 return r;
3116}
3117
3118
3119/* DO NOT CALL THIS DIRECTLY */
Elliott Hughesed398002017-06-21 14:41:24 -07003120static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e )
sewardj0852a132005-02-21 08:28:46 +00003121{
sewardj69d98e32010-06-18 08:17:41 +00003122 HWord fn = 0; /* address of helper fn, if required */
3123 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00003124 AMD64SseOp op = Asse_INVALID;
3125 IRType ty = typeOfIRExpr(env->type_env,e);
3126 vassert(e);
3127 vassert(ty == Ity_V128);
3128
sewardjdd40fdf2006-12-24 02:20:24 +00003129 if (e->tag == Iex_RdTmp) {
3130 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj0852a132005-02-21 08:28:46 +00003131 }
3132
3133 if (e->tag == Iex_Get) {
3134 HReg dst = newVRegV(env);
3135 addInstr(env, AMD64Instr_SseLdSt(
3136 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00003137 16,
sewardj0852a132005-02-21 08:28:46 +00003138 dst,
3139 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3140 )
3141 );
3142 return dst;
3143 }
3144
sewardje768e922009-11-26 17:17:37 +00003145 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj1a01e652005-02-23 11:39:21 +00003146 HReg dst = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00003147 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj1a01e652005-02-23 11:39:21 +00003148 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3149 return dst;
3150 }
3151
3152 if (e->tag == Iex_Const) {
3153 HReg dst = newVRegV(env);
3154 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj9ba870d2010-04-02 11:29:23 +00003155 switch (e->Iex.Const.con->Ico.V128) {
3156 case 0x0000:
3157 dst = generate_zeroes_V128(env);
sewardjacfbd7d2010-08-17 22:52:08 +00003158 break;
sewardj9ba870d2010-04-02 11:29:23 +00003159 case 0xFFFF:
3160 dst = generate_ones_V128(env);
sewardj9ba870d2010-04-02 11:29:23 +00003161 break;
sewardjacfbd7d2010-08-17 22:52:08 +00003162 default: {
3163 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3164 /* do push_uimm64 twice, first time for the high-order half. */
3165 push_uimm64(env, bitmask8_to_bytemask64(
3166 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
3167 ));
3168 push_uimm64(env, bitmask8_to_bytemask64(
3169 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
3170 ));
3171 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3172 add_to_rsp(env, 16);
3173 break;
3174 }
sewardj1a01e652005-02-23 11:39:21 +00003175 }
sewardj9ba870d2010-04-02 11:29:23 +00003176 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003177 }
sewardj0852a132005-02-21 08:28:46 +00003178
3179 if (e->tag == Iex_Unop) {
3180 switch (e->Iex.Unop.op) {
3181
sewardj8d965312005-02-25 02:48:47 +00003182 case Iop_NotV128: {
3183 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3184 return do_sse_NotV128(env, arg);
3185 }
3186
sewardj09717342005-05-05 21:34:02 +00003187 case Iop_CmpNEZ64x2: {
3188 /* We can use SSE2 instructions for this. */
3189 /* Ideally, we want to do a 64Ix2 comparison against zero of
3190 the operand. Problem is no such insn exists. Solution
3191 therefore is to do a 32Ix4 comparison instead, and bitwise-
3192 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3193 let the not'd result of this initial comparison be a:b:c:d.
3194 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3195 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3196 giving the required result.
3197
3198 The required selection sequence is 2,3,0,1, which
3199 according to Intel's documentation means the pshufd
3200 literal value is 0xB1, that is,
3201 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3202 */
3203 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
sewardjac530442005-05-11 16:13:37 +00003204 HReg tmp = generate_zeroes_V128(env);
sewardj09717342005-05-05 21:34:02 +00003205 HReg dst = newVRegV(env);
sewardj09717342005-05-05 21:34:02 +00003206 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3207 tmp = do_sse_NotV128(env, tmp);
3208 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3209 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3210 return dst;
3211 }
3212
sewardjac530442005-05-11 16:13:37 +00003213 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3214 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3215 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3216 do_CmpNEZ_vector:
3217 {
3218 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3219 HReg tmp = newVRegV(env);
3220 HReg zero = generate_zeroes_V128(env);
3221 HReg dst;
3222 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3223 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3224 dst = do_sse_NotV128(env, tmp);
3225 return dst;
3226 }
sewardja7ba8c42005-05-10 20:08:34 +00003227
sewardj1ddee212014-08-24 14:00:19 +00003228 case Iop_RecipEst32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3229 case Iop_RSqrtEst32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
sewardja7ba8c42005-05-10 20:08:34 +00003230 do_32Fx4_unary:
3231 {
3232 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3233 HReg dst = newVRegV(env);
3234 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3235 return dst;
3236 }
3237
sewardj1ddee212014-08-24 14:00:19 +00003238 case Iop_RecipEst32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3239 case Iop_RSqrtEst32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3240 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
sewardja7ba8c42005-05-10 20:08:34 +00003241 do_32F0x4_unary:
3242 {
3243 /* A bit subtle. We have to copy the arg to the result
3244 register first, because actually doing the SSE scalar insn
3245 leaves the upper 3/4 of the destination register
3246 unchanged. Whereas the required semantics of these
3247 primops is that the upper 3/4 is simply copied in from the
3248 argument. */
3249 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3250 HReg dst = newVRegV(env);
3251 addInstr(env, mk_vMOVsd_RR(arg, dst));
3252 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3253 return dst;
3254 }
3255
sewardj0852a132005-02-21 08:28:46 +00003256 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3257 do_64F0x2_unary:
3258 {
3259 /* A bit subtle. We have to copy the arg to the result
3260 register first, because actually doing the SSE scalar insn
3261 leaves the upper half of the destination register
3262 unchanged. Whereas the required semantics of these
3263 primops is that the upper half is simply copied in from the
3264 argument. */
3265 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3266 HReg dst = newVRegV(env);
3267 addInstr(env, mk_vMOVsd_RR(arg, dst));
3268 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3269 return dst;
3270 }
3271
sewardj8d965312005-02-25 02:48:47 +00003272 case Iop_32UtoV128: {
3273 HReg dst = newVRegV(env);
3274 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3275 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3276 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3277 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3278 return dst;
3279 }
sewardj0852a132005-02-21 08:28:46 +00003280
3281 case Iop_64UtoV128: {
3282 HReg dst = newVRegV(env);
3283 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3284 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3285 addInstr(env, AMD64Instr_Push(rmi));
3286 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3287 add_to_rsp(env, 8);
3288 return dst;
3289 }
3290
sewardj4b1cc832012-06-13 11:10:20 +00003291 case Iop_V256toV128_0:
3292 case Iop_V256toV128_1: {
3293 HReg vHi, vLo;
3294 iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
3295 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
3296 }
3297
sewardj0852a132005-02-21 08:28:46 +00003298 default:
3299 break;
3300 } /* switch (e->Iex.Unop.op) */
3301 } /* if (e->tag == Iex_Unop) */
3302
3303 if (e->tag == Iex_Binop) {
3304 switch (e->Iex.Binop.op) {
3305
sewardjcd4637e2015-04-07 09:07:51 +00003306 case Iop_Sqrt64Fx2:
3307 case Iop_Sqrt32Fx4: {
3308 /* :: (rmode, vec) -> vec */
3309 HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
3310 HReg dst = newVRegV(env);
3311 /* XXXROUNDINGFIXME */
3312 /* set roundingmode here */
3313 addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
3314 ? AMD64Instr_Sse64Fx2 : AMD64Instr_Sse32Fx4)
3315 (Asse_SQRTF, arg, dst));
3316 return dst;
3317 }
3318
sewardjc4530ae2012-05-21 10:18:49 +00003319 /* FIXME: could we generate MOVQ here? */
sewardj18303862005-02-21 12:36:54 +00003320 case Iop_SetV128lo64: {
3321 HReg dst = newVRegV(env);
3322 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3323 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003324 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3325 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3326 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3327 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3328 return dst;
3329 }
3330
sewardjc4530ae2012-05-21 10:18:49 +00003331 /* FIXME: could we generate MOVD here? */
sewardj478fe702005-04-23 01:15:47 +00003332 case Iop_SetV128lo32: {
3333 HReg dst = newVRegV(env);
3334 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3335 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3336 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3337 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3338 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3339 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003340 return dst;
3341 }
3342
sewardj1a01e652005-02-23 11:39:21 +00003343 case Iop_64HLtoV128: {
sewardjc4530ae2012-05-21 10:18:49 +00003344 HReg rsp = hregAMD64_RSP();
3345 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
3346 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
3347 AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1);
3348 AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2);
3349 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp));
3350 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003351 HReg dst = newVRegV(env);
sewardjc4530ae2012-05-21 10:18:49 +00003352 /* One store-forwarding stall coming up, oh well :-( */
3353 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp));
sewardj1a01e652005-02-23 11:39:21 +00003354 return dst;
3355 }
3356
sewardj432f8b62005-05-10 02:50:05 +00003357 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3358 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3359 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
sewardjb9282632005-11-05 02:33:25 +00003360 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003361 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3362 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003363 do_32Fx4:
3364 {
3365 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3366 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3367 HReg dst = newVRegV(env);
3368 addInstr(env, mk_vMOVsd_RR(argL, dst));
3369 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3370 return dst;
3371 }
3372
sewardj97628592005-05-10 22:42:54 +00003373 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3374 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3375 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
sewardjb9282632005-11-05 02:33:25 +00003376 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
sewardj5992bd02005-05-11 02:13:42 +00003377 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3378 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003379 do_64Fx2:
3380 {
3381 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3382 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3383 HReg dst = newVRegV(env);
3384 addInstr(env, mk_vMOVsd_RR(argL, dst));
3385 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3386 return dst;
3387 }
sewardj8d965312005-02-25 02:48:47 +00003388
sewardj432f8b62005-05-10 02:50:05 +00003389 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003390 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardj4c328cf2005-05-05 12:05:54 +00003391 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
sewardjb9282632005-11-05 02:33:25 +00003392 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003393 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003394 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003395 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3396 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003397 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3398 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3399 do_32F0x4: {
3400 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3401 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3402 HReg dst = newVRegV(env);
3403 addInstr(env, mk_vMOVsd_RR(argL, dst));
3404 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3405 return dst;
3406 }
3407
sewardj137015d2005-03-27 04:01:15 +00003408 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003409 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003410 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardjb9282632005-11-05 02:33:25 +00003411 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003412 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3413 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003414 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003415 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003416 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3417 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3418 do_64F0x2: {
3419 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3420 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3421 HReg dst = newVRegV(env);
3422 addInstr(env, mk_vMOVsd_RR(argL, dst));
3423 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3424 return dst;
3425 }
3426
sewardj5f438dd2011-06-16 11:36:23 +00003427 case Iop_QNarrowBin32Sto16Sx8:
sewardj97628592005-05-10 22:42:54 +00003428 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003429 case Iop_QNarrowBin16Sto8Sx16:
sewardj97628592005-05-10 22:42:54 +00003430 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
sewardj5f438dd2011-06-16 11:36:23 +00003431 case Iop_QNarrowBin16Sto8Ux16:
sewardj97628592005-05-10 22:42:54 +00003432 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3433
3434 case Iop_InterleaveHI8x16:
3435 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3436 case Iop_InterleaveHI16x8:
3437 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3438 case Iop_InterleaveHI32x4:
3439 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3440 case Iop_InterleaveHI64x2:
3441 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3442
3443 case Iop_InterleaveLO8x16:
3444 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3445 case Iop_InterleaveLO16x8:
3446 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3447 case Iop_InterleaveLO32x4:
3448 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3449 case Iop_InterleaveLO64x2:
3450 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3451
sewardj1a01e652005-02-23 11:39:21 +00003452 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003453 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003454 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003455 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003456 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003457 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003458 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003459 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3460 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3461 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3462 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3463 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3464 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3465 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3466 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3467 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3468 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3469 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3470 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
sewardjadffcef2005-05-11 00:03:06 +00003471 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3472 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3473 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3474 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3475 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3476 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3477 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003478 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3479 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3480 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003481 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003482 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3483 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3484 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3485 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003486 do_SseReRg: {
3487 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3488 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3489 HReg dst = newVRegV(env);
3490 if (arg1isEReg) {
sewardj9da16972005-02-21 13:58:26 +00003491 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3492 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3493 } else {
3494 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3495 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3496 }
3497 return dst;
3498 }
3499
sewardjadffcef2005-05-11 00:03:06 +00003500 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3501 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3502 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3503 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3504 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3505 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3506 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
sewardj09717342005-05-05 21:34:02 +00003507 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3508 do_SseShift: {
3509 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3510 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3511 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3512 HReg ereg = newVRegV(env);
3513 HReg dst = newVRegV(env);
3514 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3515 addInstr(env, AMD64Instr_Push(rmi));
3516 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3517 addInstr(env, mk_vMOVsd_RR(greg, dst));
3518 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3519 add_to_rsp(env, 16);
3520 return dst;
3521 }
sewardj0852a132005-02-21 08:28:46 +00003522
sewardj69d98e32010-06-18 08:17:41 +00003523 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
3524 goto do_SseAssistedBinary;
3525 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
3526 goto do_SseAssistedBinary;
3527 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
3528 goto do_SseAssistedBinary;
3529 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
3530 goto do_SseAssistedBinary;
3531 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
3532 goto do_SseAssistedBinary;
3533 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
3534 goto do_SseAssistedBinary;
3535 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
3536 goto do_SseAssistedBinary;
3537 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
3538 goto do_SseAssistedBinary;
3539 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
3540 goto do_SseAssistedBinary;
sewardjd8815622011-10-19 15:24:01 +00003541 case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2;
3542 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003543 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
3544 goto do_SseAssistedBinary;
sewardjd8bca7e2012-06-20 11:46:19 +00003545 case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4;
3546 goto do_SseAssistedBinary;
sewardj5f438dd2011-06-16 11:36:23 +00003547 case Iop_QNarrowBin32Sto16Ux8:
3548 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
sewardj2260b992011-06-15 16:05:07 +00003549 goto do_SseAssistedBinary;
sewardjad2c9ea2011-10-22 09:32:16 +00003550 case Iop_NarrowBin16to8x16:
3551 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3552 goto do_SseAssistedBinary;
3553 case Iop_NarrowBin32to16x8:
3554 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3555 goto do_SseAssistedBinary;
sewardj69d98e32010-06-18 08:17:41 +00003556 do_SseAssistedBinary: {
3557 /* RRRufff! RRRufff code is what we're generating here. Oh
3558 well. */
3559 vassert(fn != 0);
3560 HReg dst = newVRegV(env);
3561 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3562 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3563 HReg argp = newVRegI(env);
3564 /* subq $112, %rsp -- make a space*/
3565 sub_from_rsp(env, 112);
3566 /* leaq 48(%rsp), %r_argp -- point into it */
3567 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3568 argp));
3569 /* andq $-16, %r_argp -- 16-align the pointer */
3570 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3571 AMD64RMI_Imm( ~(UInt)15 ),
3572 argp));
3573 /* Prepare 3 arg regs:
3574 leaq 0(%r_argp), %rdi
3575 leaq 16(%r_argp), %rsi
3576 leaq 32(%r_argp), %rdx
3577 */
3578 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3579 hregAMD64_RDI()));
3580 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3581 hregAMD64_RSI()));
3582 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
3583 hregAMD64_RDX()));
3584 /* Store the two args, at (%rsi) and (%rdx):
3585 movupd %argL, 0(%rsi)
3586 movupd %argR, 0(%rdx)
3587 */
3588 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3589 AMD64AMode_IR(0, hregAMD64_RSI())));
3590 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
3591 AMD64AMode_IR(0, hregAMD64_RDX())));
3592 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003593 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003594 3, mk_RetLoc_simple(RLPri_None) ));
sewardj69d98e32010-06-18 08:17:41 +00003595 /* fetch the result from memory, using %r_argp, which the
3596 register allocator will keep alive across the call. */
3597 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3598 AMD64AMode_IR(0, argp)));
3599 /* and finally, clear the space */
3600 add_to_rsp(env, 112);
3601 return dst;
3602 }
3603
sewardj0874bee2011-01-17 10:32:18 +00003604 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
3605 goto do_SseAssistedVectorAndScalar;
3606 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
3607 goto do_SseAssistedVectorAndScalar;
3608 do_SseAssistedVectorAndScalar: {
3609 /* RRRufff! RRRufff code is what we're generating here. Oh
3610 well. */
3611 vassert(fn != 0);
3612 HReg dst = newVRegV(env);
3613 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3614 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3615 HReg argp = newVRegI(env);
3616 /* subq $112, %rsp -- make a space*/
3617 sub_from_rsp(env, 112);
3618 /* leaq 48(%rsp), %r_argp -- point into it */
3619 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3620 argp));
3621 /* andq $-16, %r_argp -- 16-align the pointer */
3622 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3623 AMD64RMI_Imm( ~(UInt)15 ),
3624 argp));
3625 /* Prepare 2 vector arg regs:
3626 leaq 0(%r_argp), %rdi
3627 leaq 16(%r_argp), %rsi
3628 */
3629 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3630 hregAMD64_RDI()));
3631 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3632 hregAMD64_RSI()));
3633 /* Store the vector arg, at (%rsi):
3634 movupd %argL, 0(%rsi)
3635 */
3636 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3637 AMD64AMode_IR(0, hregAMD64_RSI())));
3638 /* And get the scalar value into rdx */
3639 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
3640
3641 /* call the helper */
sewardjcfe046e2013-01-17 14:23:53 +00003642 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
sewardj74142b82013-08-08 10:28:59 +00003643 3, mk_RetLoc_simple(RLPri_None) ));
sewardj0874bee2011-01-17 10:32:18 +00003644 /* fetch the result from memory, using %r_argp, which the
3645 register allocator will keep alive across the call. */
3646 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3647 AMD64AMode_IR(0, argp)));
3648 /* and finally, clear the space */
3649 add_to_rsp(env, 112);
3650 return dst;
3651 }
3652
sewardj0852a132005-02-21 08:28:46 +00003653 default:
3654 break;
3655 } /* switch (e->Iex.Binop.op) */
3656 } /* if (e->tag == Iex_Binop) */
3657
sewardj9571dc02014-01-26 18:34:23 +00003658 if (e->tag == Iex_Triop) {
3659 IRTriop *triop = e->Iex.Triop.details;
3660 switch (triop->op) {
3661
3662 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm;
3663 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm;
3664 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm;
3665 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm;
3666 do_64Fx2_w_rm:
3667 {
3668 HReg argL = iselVecExpr(env, triop->arg2);
3669 HReg argR = iselVecExpr(env, triop->arg3);
3670 HReg dst = newVRegV(env);
3671 addInstr(env, mk_vMOVsd_RR(argL, dst));
3672 /* XXXROUNDINGFIXME */
3673 /* set roundingmode here */
3674 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3675 return dst;
3676 }
3677
3678 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm;
3679 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm;
3680 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm;
3681 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm;
3682 do_32Fx4_w_rm:
3683 {
3684 HReg argL = iselVecExpr(env, triop->arg2);
3685 HReg argR = iselVecExpr(env, triop->arg3);
3686 HReg dst = newVRegV(env);
3687 addInstr(env, mk_vMOVsd_RR(argL, dst));
3688 /* XXXROUNDINGFIXME */
3689 /* set roundingmode here */
3690 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3691 return dst;
3692 }
3693
3694 default:
3695 break;
3696 } /* switch (triop->op) */
3697 } /* if (e->tag == Iex_Triop) */
3698
florian99dd03e2013-01-29 03:56:06 +00003699 if (e->tag == Iex_ITE) { // VFD
3700 HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
3701 HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
sewardjadffcef2005-05-11 00:03:06 +00003702 HReg dst = newVRegV(env);
florian99dd03e2013-01-29 03:56:06 +00003703 addInstr(env, mk_vMOVsd_RR(r1,dst));
floriane6be61f2013-02-01 16:11:51 +00003704 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
sewardj009230b2013-01-26 11:47:55 +00003705 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
sewardjadffcef2005-05-11 00:03:06 +00003706 return dst;
3707 }
3708
sewardjacfbd7d2010-08-17 22:52:08 +00003709 //vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003710 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
sewardj5117ce12006-01-27 21:20:15 +00003711 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
sewardj0852a132005-02-21 08:28:46 +00003712 ppIRExpr(e);
3713 vpanic("iselVecExpr_wrk");
3714}
sewardjc33671d2005-02-01 20:30:00 +00003715
3716
3717/*---------------------------------------------------------*/
sewardjc4530ae2012-05-21 10:18:49 +00003718/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
3719/*---------------------------------------------------------*/
3720
sewardj56c30312012-06-12 08:45:39 +00003721static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -07003722 ISelEnv* env, const IRExpr* e )
sewardjc4530ae2012-05-21 10:18:49 +00003723{
3724 iselDVecExpr_wrk( rHi, rLo, env, e );
3725# if 0
3726 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3727# endif
3728 vassert(hregClass(*rHi) == HRcVec128);
3729 vassert(hregClass(*rLo) == HRcVec128);
3730 vassert(hregIsVirtual(*rHi));
3731 vassert(hregIsVirtual(*rLo));
3732}
3733
3734
3735/* DO NOT CALL THIS DIRECTLY */
sewardj56c30312012-06-12 08:45:39 +00003736static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
Elliott Hughesed398002017-06-21 14:41:24 -07003737 ISelEnv* env, const IRExpr* e )
sewardjc4530ae2012-05-21 10:18:49 +00003738{
sewardjcc3d2192013-03-27 11:37:33 +00003739 HWord fn = 0; /* address of helper fn, if required */
sewardjc4530ae2012-05-21 10:18:49 +00003740 vassert(e);
3741 IRType ty = typeOfIRExpr(env->type_env,e);
3742 vassert(ty == Ity_V256);
3743
sewardj56c30312012-06-12 08:45:39 +00003744 AMD64SseOp op = Asse_INVALID;
3745
sewardjc4530ae2012-05-21 10:18:49 +00003746 /* read 256-bit IRTemp */
3747 if (e->tag == Iex_RdTmp) {
3748 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3749 return;
3750 }
3751
3752 if (e->tag == Iex_Get) {
3753 HReg vHi = newVRegV(env);
3754 HReg vLo = newVRegV(env);
3755 HReg rbp = hregAMD64_RBP();
3756 AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp);
3757 AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp);
3758 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3759 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3760 *rHi = vHi;
3761 *rLo = vLo;
3762 return;
3763 }
3764
3765 if (e->tag == Iex_Load) {
3766 HReg vHi = newVRegV(env);
3767 HReg vLo = newVRegV(env);
3768 HReg rA = iselIntExpr_R(env, e->Iex.Load.addr);
3769 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
3770 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
3771 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
3772 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
3773 *rHi = vHi;
3774 *rLo = vLo;
3775 return;
3776 }
3777
sewardj37a505b2012-06-29 15:28:24 +00003778 if (e->tag == Iex_Const) {
3779 vassert(e->Iex.Const.con->tag == Ico_V256);
3780 switch (e->Iex.Const.con->Ico.V256) {
3781 case 0x00000000: {
3782 HReg vHi = generate_zeroes_V128(env);
3783 HReg vLo = newVRegV(env);
3784 addInstr(env, mk_vMOVsd_RR(vHi, vLo));
3785 *rHi = vHi;
3786 *rLo = vLo;
3787 return;
3788 }
3789 default:
3790 break; /* give up. Until such time as is necessary. */
3791 }
3792 }
3793
sewardj2a2bda92012-06-14 23:32:02 +00003794 if (e->tag == Iex_Unop) {
3795 switch (e->Iex.Unop.op) {
3796
3797 case Iop_NotV256: {
3798 HReg argHi, argLo;
3799 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3800 *rHi = do_sse_NotV128(env, argHi);
3801 *rLo = do_sse_NotV128(env, argLo);
3802 return;
3803 }
3804
sewardj1ddee212014-08-24 14:00:19 +00003805 case Iop_RecipEst32Fx8: op = Asse_RCPF; goto do_32Fx8_unary;
3806 case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary;
3807 case Iop_RSqrtEst32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary;
sewardj66becf32012-06-18 23:15:16 +00003808 do_32Fx8_unary:
3809 {
3810 HReg argHi, argLo;
3811 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3812 HReg dstHi = newVRegV(env);
3813 HReg dstLo = newVRegV(env);
3814 addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi));
3815 addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo));
3816 *rHi = dstHi;
3817 *rLo = dstLo;
3818 return;
3819 }
3820
3821 case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary;
3822 do_64Fx4_unary:
3823 {
3824 HReg argHi, argLo;
3825 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3826 HReg dstHi = newVRegV(env);
3827 HReg dstLo = newVRegV(env);
3828 addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi));
3829 addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo));
3830 *rHi = dstHi;
3831 *rLo = dstLo;
3832 return;
3833 }
3834
sewardj23db8a02012-06-25 07:46:18 +00003835 case Iop_CmpNEZ64x4: {
3836 /* We can use SSE2 instructions for this. */
3837 /* Same scheme as Iop_CmpNEZ64x2, except twice as wide
3838 (obviously). See comment on Iop_CmpNEZ64x2 for
3839 explanation of what's going on here. */
3840 HReg argHi, argLo;
3841 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3842 HReg tmpHi = generate_zeroes_V128(env);
3843 HReg tmpLo = newVRegV(env);
3844 addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo));
3845 HReg dstHi = newVRegV(env);
3846 HReg dstLo = newVRegV(env);
3847 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi));
3848 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo));
3849 tmpHi = do_sse_NotV128(env, tmpHi);
3850 tmpLo = do_sse_NotV128(env, tmpLo);
3851 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi));
3852 addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo));
3853 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi));
3854 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo));
3855 *rHi = dstHi;
3856 *rLo = dstLo;
3857 return;
3858 }
3859
3860 case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
sewardjcc3d2192013-03-27 11:37:33 +00003861 case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3862 case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
sewardj23db8a02012-06-25 07:46:18 +00003863 do_CmpNEZ_vector:
3864 {
3865 HReg argHi, argLo;
3866 iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
3867 HReg tmpHi = newVRegV(env);
3868 HReg tmpLo = newVRegV(env);
3869 HReg zero = generate_zeroes_V128(env);
3870 HReg dstHi, dstLo;
3871 addInstr(env, mk_vMOVsd_RR(argHi, tmpHi));
3872 addInstr(env, mk_vMOVsd_RR(argLo, tmpLo));
3873 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi));
3874 addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo));
3875 dstHi = do_sse_NotV128(env, tmpHi);
3876 dstLo = do_sse_NotV128(env, tmpLo);
3877 *rHi = dstHi;
3878 *rLo = dstLo;
3879 return;
3880 }
3881
sewardj2a2bda92012-06-14 23:32:02 +00003882 default:
3883 break;
3884 } /* switch (e->Iex.Unop.op) */
3885 } /* if (e->tag == Iex_Unop) */
3886
sewardj56c30312012-06-12 08:45:39 +00003887 if (e->tag == Iex_Binop) {
3888 switch (e->Iex.Binop.op) {
3889
sewardj8eb7ae82012-06-24 14:00:27 +00003890 case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4;
3891 case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4;
sewardj56c30312012-06-12 08:45:39 +00003892 do_64Fx4:
3893 {
3894 HReg argLhi, argLlo, argRhi, argRlo;
3895 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3896 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3897 HReg dstHi = newVRegV(env);
3898 HReg dstLo = newVRegV(env);
3899 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3900 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3901 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
3902 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
3903 *rHi = dstHi;
3904 *rLo = dstLo;
3905 return;
3906 }
3907
sewardj8eb7ae82012-06-24 14:00:27 +00003908 case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8;
3909 case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8;
sewardj56c30312012-06-12 08:45:39 +00003910 do_32Fx8:
3911 {
3912 HReg argLhi, argLlo, argRhi, argRlo;
3913 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3914 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3915 HReg dstHi = newVRegV(env);
3916 HReg dstLo = newVRegV(env);
3917 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3918 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3919 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
3920 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
3921 *rHi = dstHi;
3922 *rLo = dstLo;
3923 return;
3924 }
3925
sewardj4b1cc832012-06-13 11:10:20 +00003926 case Iop_AndV256: op = Asse_AND; goto do_SseReRg;
sewardj2a2bda92012-06-14 23:32:02 +00003927 case Iop_OrV256: op = Asse_OR; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003928 case Iop_XorV256: op = Asse_XOR; goto do_SseReRg;
sewardjcc3d2192013-03-27 11:37:33 +00003929 case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg;
3930 case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg;
3931 case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg;
3932 case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg;
3933 case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg;
3934 case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg;
3935 case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg;
3936 case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg;
3937 case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg;
3938 case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg;
3939 case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg;
3940 case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg;
3941 case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg;
3942 case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg;
3943 case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg;
3944 case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg;
3945 case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg;
3946 case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg;
3947 case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg;
3948 case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg;
3949 case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg;
3950 case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg;
3951 case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg;
3952 case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg;
3953 case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg;
3954 case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg;
3955 case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg;
3956 case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg;
3957 case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg;
3958 case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg;
3959 case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg;
sewardj4b1cc832012-06-13 11:10:20 +00003960 do_SseReRg:
3961 {
3962 HReg argLhi, argLlo, argRhi, argRlo;
3963 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
3964 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
3965 HReg dstHi = newVRegV(env);
3966 HReg dstLo = newVRegV(env);
3967 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
3968 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
3969 addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi));
3970 addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo));
3971 *rHi = dstHi;
3972 *rLo = dstLo;
3973 return;
3974 }
3975
sewardjcc3d2192013-03-27 11:37:33 +00003976 case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift;
3977 case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift;
3978 case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift;
3979 case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift;
3980 case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift;
3981 case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift;
3982 case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift;
3983 case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift;
3984 do_SseShift: {
3985 HReg gregHi, gregLo;
3986 iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1);
3987 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3988 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3989 HReg ereg = newVRegV(env);
3990 HReg dstHi = newVRegV(env);
3991 HReg dstLo = newVRegV(env);
3992 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3993 addInstr(env, AMD64Instr_Push(rmi));
3994 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3995 addInstr(env, mk_vMOVsd_RR(gregHi, dstHi));
3996 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi));
3997 addInstr(env, mk_vMOVsd_RR(gregLo, dstLo));
3998 addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo));
3999 add_to_rsp(env, 16);
4000 *rHi = dstHi;
4001 *rLo = dstLo;
4002 return;
4003 }
4004
sewardj4b1cc832012-06-13 11:10:20 +00004005 case Iop_V128HLtoV256: {
4006 *rHi = iselVecExpr(env, e->Iex.Binop.arg1);
4007 *rLo = iselVecExpr(env, e->Iex.Binop.arg2);
4008 return;
4009 }
4010
sewardjcc3d2192013-03-27 11:37:33 +00004011 case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4;
4012 goto do_SseAssistedBinary;
4013 case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4;
4014 goto do_SseAssistedBinary;
4015 case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4;
4016 goto do_SseAssistedBinary;
4017 case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4;
4018 goto do_SseAssistedBinary;
4019 case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4;
4020 goto do_SseAssistedBinary;
4021 case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8;
4022 goto do_SseAssistedBinary;
4023 case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8;
4024 goto do_SseAssistedBinary;
4025 case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16;
4026 goto do_SseAssistedBinary;
4027 case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16;
4028 goto do_SseAssistedBinary;
4029 case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2;
4030 goto do_SseAssistedBinary;
4031 case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2;
4032 goto do_SseAssistedBinary;
4033 do_SseAssistedBinary: {
4034 /* RRRufff! RRRufff code is what we're generating here. Oh
4035 well. */
4036 vassert(fn != 0);
4037 HReg dstHi = newVRegV(env);
4038 HReg dstLo = newVRegV(env);
4039 HReg argLhi, argLlo, argRhi, argRlo;
4040 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4041 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4042 HReg argp = newVRegI(env);
4043 /* subq $160, %rsp -- make a space*/
4044 sub_from_rsp(env, 160);
4045 /* leaq 48(%rsp), %r_argp -- point into it */
4046 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4047 argp));
4048 /* andq $-16, %r_argp -- 16-align the pointer */
4049 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4050 AMD64RMI_Imm( ~(UInt)15 ),
4051 argp));
4052 /* Prepare 3 arg regs:
4053 leaq 0(%r_argp), %rdi
4054 leaq 16(%r_argp), %rsi
4055 leaq 32(%r_argp), %rdx
4056 */
4057 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4058 hregAMD64_RDI()));
4059 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
4060 hregAMD64_RSI()));
4061 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4062 hregAMD64_RDX()));
4063 /* Store the two high args, at (%rsi) and (%rdx):
4064 movupd %argLhi, 0(%rsi)
4065 movupd %argRhi, 0(%rdx)
4066 */
4067 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4068 AMD64AMode_IR(0, hregAMD64_RSI())));
4069 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4070 AMD64AMode_IR(0, hregAMD64_RDX())));
4071 /* Store the two low args, at 48(%rsi) and 48(%rdx):
4072 movupd %argLlo, 48(%rsi)
4073 movupd %argRlo, 48(%rdx)
4074 */
4075 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4076 AMD64AMode_IR(48, hregAMD64_RSI())));
4077 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4078 AMD64AMode_IR(48, hregAMD64_RDX())));
4079 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004080 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4081 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004082 /* Prepare 3 arg regs:
4083 leaq 48(%r_argp), %rdi
4084 leaq 64(%r_argp), %rsi
4085 leaq 80(%r_argp), %rdx
4086 */
4087 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp),
4088 hregAMD64_RDI()));
4089 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4090 hregAMD64_RSI()));
4091 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp),
4092 hregAMD64_RDX()));
4093 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004094 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4095 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004096 /* fetch the result from memory, using %r_argp, which the
4097 register allocator will keep alive across the call. */
4098 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4099 AMD64AMode_IR(0, argp)));
4100 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4101 AMD64AMode_IR(48, argp)));
4102 /* and finally, clear the space */
4103 add_to_rsp(env, 160);
4104 *rHi = dstHi;
4105 *rLo = dstLo;
4106 return;
4107 }
4108
4109 case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8;
4110 goto do_SseAssistedBinary256;
4111 do_SseAssistedBinary256: {
4112 /* RRRufff! RRRufff code is what we're generating here. Oh
4113 well. */
4114 vassert(fn != 0);
4115 HReg dstHi = newVRegV(env);
4116 HReg dstLo = newVRegV(env);
4117 HReg argLhi, argLlo, argRhi, argRlo;
4118 iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
4119 iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
4120 HReg argp = newVRegI(env);
4121 /* subq $160, %rsp -- make a space*/
4122 sub_from_rsp(env, 160);
4123 /* leaq 48(%rsp), %r_argp -- point into it */
4124 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
4125 argp));
4126 /* andq $-16, %r_argp -- 16-align the pointer */
4127 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
4128 AMD64RMI_Imm( ~(UInt)15 ),
4129 argp));
4130 /* Prepare 3 arg regs:
4131 leaq 0(%r_argp), %rdi
4132 leaq 32(%r_argp), %rsi
4133 leaq 64(%r_argp), %rdx
4134 */
4135 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
4136 hregAMD64_RDI()));
4137 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
4138 hregAMD64_RSI()));
4139 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
4140 hregAMD64_RDX()));
4141 /* Store the two args, at (%rsi) and (%rdx):
4142 movupd %argLlo, 0(%rsi)
4143 movupd %argLhi, 16(%rsi)
4144 movupd %argRlo, 0(%rdx)
4145 movupd %argRhi, 16(%rdx)
4146 */
4147 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
4148 AMD64AMode_IR(0, hregAMD64_RSI())));
4149 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
4150 AMD64AMode_IR(16, hregAMD64_RSI())));
4151 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
4152 AMD64AMode_IR(0, hregAMD64_RDX())));
4153 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
4154 AMD64AMode_IR(16, hregAMD64_RDX())));
4155 /* call the helper */
sewardj74142b82013-08-08 10:28:59 +00004156 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
4157 mk_RetLoc_simple(RLPri_None) ));
sewardjcc3d2192013-03-27 11:37:33 +00004158 /* fetch the result from memory, using %r_argp, which the
4159 register allocator will keep alive across the call. */
4160 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
4161 AMD64AMode_IR(0, argp)));
4162 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
4163 AMD64AMode_IR(16, argp)));
4164 /* and finally, clear the space */
4165 add_to_rsp(env, 160);
4166 *rHi = dstHi;
4167 *rLo = dstLo;
4168 return;
4169 }
4170
sewardj56c30312012-06-12 08:45:39 +00004171 default:
4172 break;
4173 } /* switch (e->Iex.Binop.op) */
4174 } /* if (e->tag == Iex_Binop) */
4175
sewardj9571dc02014-01-26 18:34:23 +00004176 if (e->tag == Iex_Triop) {
4177 IRTriop *triop = e->Iex.Triop.details;
4178 switch (triop->op) {
4179
4180 case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm;
4181 case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm;
4182 case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm;
4183 case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm;
4184 do_64Fx4_w_rm:
4185 {
4186 HReg argLhi, argLlo, argRhi, argRlo;
4187 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
4188 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
4189 HReg dstHi = newVRegV(env);
4190 HReg dstLo = newVRegV(env);
4191 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
4192 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
4193 /* XXXROUNDINGFIXME */
4194 /* set roundingmode here */
4195 addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
4196 addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
4197 *rHi = dstHi;
4198 *rLo = dstLo;
4199 return;
4200 }
4201
4202 case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm;
4203 case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm;
4204 case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm;
4205 case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm;
4206 do_32Fx8_w_rm:
4207 {
4208 HReg argLhi, argLlo, argRhi, argRlo;
4209 iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
4210 iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
4211 HReg dstHi = newVRegV(env);
4212 HReg dstLo = newVRegV(env);
4213 addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
4214 addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
4215 /* XXXROUNDINGFIXME */
4216 /* set roundingmode here */
4217 addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
4218 addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
4219 *rHi = dstHi;
4220 *rLo = dstLo;
4221 return;
4222 }
4223
4224 default:
4225 break;
4226 } /* switch (triop->op) */
4227 } /* if (e->tag == Iex_Triop) */
4228
4229
florian96d7cc32012-06-01 20:41:24 +00004230 if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) {
sewardjc4530ae2012-05-21 10:18:49 +00004231 HReg rsp = hregAMD64_RSP();
4232 HReg vHi = newVRegV(env);
4233 HReg vLo = newVRegV(env);
4234 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
4235 AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
4236 /* arg1 is the most significant (Q3), arg4 the least (Q0) */
4237 /* Get all the args into regs, before messing with the stack. */
florian96d7cc32012-06-01 20:41:24 +00004238 AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1);
4239 AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2);
4240 AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3);
4241 AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4);
sewardjc4530ae2012-05-21 10:18:49 +00004242 /* less significant lane (Q2) at the lower address (-16(rsp)) */
4243 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp));
4244 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp));
4245 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp));
4246 /* and then the lower half .. */
4247 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp));
4248 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp));
4249 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp));
4250 *rHi = vHi;
4251 *rLo = vLo;
4252 return;
4253 }
4254
sewardjcc3d2192013-03-27 11:37:33 +00004255 if (e->tag == Iex_ITE) {
4256 HReg r1Hi, r1Lo, r0Hi, r0Lo;
4257 iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue);
4258 iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse);
4259 HReg dstHi = newVRegV(env);
4260 HReg dstLo = newVRegV(env);
4261 addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi));
4262 addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo));
4263 AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
4264 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi));
4265 addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo));
4266 *rHi = dstHi;
4267 *rLo = dstLo;
4268 return;
4269 }
4270
sewardjc4530ae2012-05-21 10:18:49 +00004271 //avx_fail:
4272 vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n",
4273 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
4274 ppIRExpr(e);
4275 vpanic("iselDVecExpr_wrk");
4276}
4277
4278
4279/*---------------------------------------------------------*/
sewardjc33671d2005-02-01 20:30:00 +00004280/*--- ISEL: Statements ---*/
4281/*---------------------------------------------------------*/
4282
4283static void iselStmt ( ISelEnv* env, IRStmt* stmt )
4284{
4285 if (vex_traceflags & VEX_TRACE_VCODE) {
4286 vex_printf("\n-- ");
4287 ppIRStmt(stmt);
4288 vex_printf("\n");
4289 }
4290
4291 switch (stmt->tag) {
4292
sewardjbdea5502015-01-27 23:17:02 +00004293 /* --------- LOADG (guarded load) --------- */
4294 case Ist_LoadG: {
4295 IRLoadG* lg = stmt->Ist.LoadG.details;
4296 if (lg->end != Iend_LE)
4297 goto stmt_fail;
4298
4299 UChar szB = 0; /* invalid */
4300 switch (lg->cvt) {
sewardj70dbeb02015-08-12 11:15:53 +00004301 case ILGop_Ident32: szB = 4; break;
4302 case ILGop_Ident64: szB = 8; break;
4303 case ILGop_IdentV128: szB = 16; break;
sewardjbdea5502015-01-27 23:17:02 +00004304 default: break;
4305 }
4306 if (szB == 0)
4307 goto stmt_fail;
4308
sewardj70dbeb02015-08-12 11:15:53 +00004309 AMD64AMode* amAddr
4310 = iselIntExpr_AMode(env, lg->addr);
4311 HReg rAlt
4312 = szB == 16 ? iselVecExpr(env, lg->alt)
4313 : iselIntExpr_R(env, lg->alt);
4314 HReg rDst
4315 = lookupIRTemp(env, lg->dst);
4316
sewardjbdea5502015-01-27 23:17:02 +00004317 /* Get the alt value into the dst. We'll do a conditional load
4318 which overwrites it -- or not -- with loaded data. */
sewardj70dbeb02015-08-12 11:15:53 +00004319 if (szB == 16) {
4320 addInstr(env, mk_vMOVsd_RR(rAlt, rDst));
4321 } else {
4322 addInstr(env, mk_iMOVsd_RR(rAlt, rDst));
4323 }
sewardjbdea5502015-01-27 23:17:02 +00004324 AMD64CondCode cc = iselCondCode(env, lg->guard);
sewardj70dbeb02015-08-12 11:15:53 +00004325 if (szB == 16) {
4326 addInstr(env, AMD64Instr_SseCLoad(cc, amAddr, rDst));
4327 } else {
4328 addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst));
4329 }
sewardjbdea5502015-01-27 23:17:02 +00004330 return;
4331 }
4332
sewardj6f1ec582015-01-28 10:52:36 +00004333 /* --------- STOREG (guarded store) --------- */
4334 case Ist_StoreG: {
4335 IRStoreG* sg = stmt->Ist.StoreG.details;
4336 if (sg->end != Iend_LE)
4337 goto stmt_fail;
4338
4339 UChar szB = 0; /* invalid */
4340 switch (typeOfIRExpr(env->type_env, sg->data)) {
sewardj70dbeb02015-08-12 11:15:53 +00004341 case Ity_I32: szB = 4; break;
4342 case Ity_I64: szB = 8; break;
4343 case Ity_V128: szB = 16; break;
sewardj6f1ec582015-01-28 10:52:36 +00004344 default: break;
4345 }
4346 if (szB == 0)
4347 goto stmt_fail;
4348
sewardj70dbeb02015-08-12 11:15:53 +00004349 AMD64AMode* amAddr
4350 = iselIntExpr_AMode(env, sg->addr);
4351 HReg rSrc
4352 = szB == 16 ? iselVecExpr(env, sg->data)
4353 : iselIntExpr_R(env, sg->data);
4354 AMD64CondCode cc
4355 = iselCondCode(env, sg->guard);
4356 if (szB == 16) {
4357 addInstr(env, AMD64Instr_SseCStore(cc, rSrc, amAddr));
4358 } else {
4359 addInstr(env, AMD64Instr_CStore(cc, szB, rSrc, amAddr));
4360 }
sewardj6f1ec582015-01-28 10:52:36 +00004361 return;
4362 }
4363
sewardj05b3b6a2005-02-04 01:44:33 +00004364 /* --------- STORE --------- */
sewardjaf1ceca2005-06-30 23:31:27 +00004365 case Ist_Store: {
sewardje9d8a262009-07-01 08:06:34 +00004366 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
4367 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
4368 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00004369
sewardje768e922009-11-26 17:17:37 +00004370 if (tya != Ity_I64 || end != Iend_LE)
sewardjaf1ceca2005-06-30 23:31:27 +00004371 goto stmt_fail;
4372
sewardj31191072005-02-05 18:24:47 +00004373 if (tyd == Ity_I64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004374 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004375 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
sewardj31191072005-02-05 18:24:47 +00004376 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
4377 return;
4378 }
sewardj05b3b6a2005-02-04 01:44:33 +00004379 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004380 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004381 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
sewardj428fabd2005-03-21 03:11:17 +00004382 addInstr(env, AMD64Instr_Store(
4383 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
4384 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00004385 return;
4386 }
sewardj8d965312005-02-25 02:48:47 +00004387 if (tyd == Ity_F64) {
sewardjbf0d86c2007-11-26 23:18:52 +00004388 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004389 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
sewardj8d965312005-02-25 02:48:47 +00004390 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
4391 return;
4392 }
sewardjc49ce232005-02-25 13:03:03 +00004393 if (tyd == Ity_F32) {
sewardjbf0d86c2007-11-26 23:18:52 +00004394 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004395 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
sewardjc49ce232005-02-25 13:03:03 +00004396 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
4397 return;
4398 }
sewardj0852a132005-02-21 08:28:46 +00004399 if (tyd == Ity_V128) {
sewardjbf0d86c2007-11-26 23:18:52 +00004400 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00004401 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
sewardj18303862005-02-21 12:36:54 +00004402 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00004403 return;
4404 }
sewardjc4530ae2012-05-21 10:18:49 +00004405 if (tyd == Ity_V256) {
4406 HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
4407 AMD64AMode* am0 = AMD64AMode_IR(0, rA);
4408 AMD64AMode* am16 = AMD64AMode_IR(16, rA);
4409 HReg vHi, vLo;
4410 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data);
4411 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4412 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4413 return;
4414 }
sewardj0852a132005-02-21 08:28:46 +00004415 break;
sewardj05b3b6a2005-02-04 01:44:33 +00004416 }
sewardjf67eadf2005-02-03 03:53:52 +00004417
4418 /* --------- PUT --------- */
4419 case Ist_Put: {
4420 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
4421 if (ty == Ity_I64) {
4422 /* We're going to write to memory, so compute the RHS into an
4423 AMD64RI. */
4424 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
4425 addInstr(env,
4426 AMD64Instr_Alu64M(
4427 Aalu_MOV,
4428 ri,
4429 AMD64AMode_IR(stmt->Ist.Put.offset,
4430 hregAMD64_RBP())
4431 ));
4432 return;
4433 }
sewardjf67eadf2005-02-03 03:53:52 +00004434 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
4435 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
4436 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00004437 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00004438 r,
4439 AMD64AMode_IR(stmt->Ist.Put.offset,
4440 hregAMD64_RBP())));
4441 return;
4442 }
sewardj8d965312005-02-25 02:48:47 +00004443 if (ty == Ity_F32) {
4444 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
4445 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
4446 set_SSE_rounding_default(env); /* paranoia */
4447 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
4448 return;
4449 }
sewardj1a01e652005-02-23 11:39:21 +00004450 if (ty == Ity_F64) {
4451 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
4452 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
4453 hregAMD64_RBP() );
4454 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
4455 return;
4456 }
sewardjc4530ae2012-05-21 10:18:49 +00004457 if (ty == Ity_V128) {
4458 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
4459 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
4460 hregAMD64_RBP());
4461 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
4462 return;
4463 }
4464 if (ty == Ity_V256) {
4465 HReg vHi, vLo;
4466 iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data);
4467 HReg rbp = hregAMD64_RBP();
4468 AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp);
4469 AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp);
4470 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
4471 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
4472 return;
4473 }
sewardjf67eadf2005-02-03 03:53:52 +00004474 break;
4475 }
4476
sewardj8d965312005-02-25 02:48:47 +00004477 /* --------- Indexed PUT --------- */
4478 case Ist_PutI: {
floriand6f38b32012-05-31 15:46:18 +00004479 IRPutI *puti = stmt->Ist.PutI.details;
4480
sewardj8d965312005-02-25 02:48:47 +00004481 AMD64AMode* am
4482 = genGuestArrayOffset(
floriand6f38b32012-05-31 15:46:18 +00004483 env, puti->descr,
4484 puti->ix, puti->bias );
sewardj8d965312005-02-25 02:48:47 +00004485
floriand6f38b32012-05-31 15:46:18 +00004486 IRType ty = typeOfIRExpr(env->type_env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004487 if (ty == Ity_F64) {
floriand6f38b32012-05-31 15:46:18 +00004488 HReg val = iselDblExpr(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004489 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
4490 return;
4491 }
4492 if (ty == Ity_I8) {
floriand6f38b32012-05-31 15:46:18 +00004493 HReg r = iselIntExpr_R(env, puti->data);
sewardj8d965312005-02-25 02:48:47 +00004494 addInstr(env, AMD64Instr_Store( 1, r, am ));
4495 return;
4496 }
sewardj1e015d82005-04-23 23:41:46 +00004497 if (ty == Ity_I64) {
floriand6f38b32012-05-31 15:46:18 +00004498 AMD64RI* ri = iselIntExpr_RI(env, puti->data);
sewardj1e015d82005-04-23 23:41:46 +00004499 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
4500 return;
4501 }
sewardj8d965312005-02-25 02:48:47 +00004502 break;
4503 }
sewardj614b3fb2005-02-02 02:16:03 +00004504
4505 /* --------- TMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +00004506 case Ist_WrTmp: {
4507 IRTemp tmp = stmt->Ist.WrTmp.tmp;
sewardj614b3fb2005-02-02 02:16:03 +00004508 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj6ce1a232007-03-31 19:12:38 +00004509
4510 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
4511 compute it into an AMode and then use LEA. This usually
4512 produces fewer instructions, often because (for memcheck
4513 created IR) we get t = address-expression, (t is later used
4514 twice) and so doing this naturally turns address-expression
4515 back into an AMD64 amode. */
4516 if (ty == Ity_I64
4517 && stmt->Ist.WrTmp.data->tag == Iex_Binop
4518 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
4519 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
4520 HReg dst = lookupIRTemp(env, tmp);
4521 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
4522 /* Hmm, iselIntExpr_AMode wimped out and just computed the
4523 value into a register. Just emit a normal reg-reg move
4524 so reg-alloc can coalesce it away in the usual way. */
4525 HReg src = am->Aam.IR.reg;
4526 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
4527 } else {
4528 addInstr(env, AMD64Instr_Lea64(am,dst));
4529 }
4530 return;
4531 }
4532
sewardj9b967672005-02-08 11:13:09 +00004533 if (ty == Ity_I64 || ty == Ity_I32
4534 || ty == Ity_I16 || ty == Ity_I8) {
sewardjdd40fdf2006-12-24 02:20:24 +00004535 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
sewardj614b3fb2005-02-02 02:16:03 +00004536 HReg dst = lookupIRTemp(env, tmp);
4537 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
4538 return;
4539 }
sewardj9b967672005-02-08 11:13:09 +00004540 if (ty == Ity_I128) {
4541 HReg rHi, rLo, dstHi, dstLo;
sewardjdd40fdf2006-12-24 02:20:24 +00004542 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
sewardjc4530ae2012-05-21 10:18:49 +00004543 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
sewardj9b967672005-02-08 11:13:09 +00004544 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
4545 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
4546 return;
4547 }
sewardja5bd0af2005-03-24 20:40:12 +00004548 if (ty == Ity_I1) {
sewardjdd40fdf2006-12-24 02:20:24 +00004549 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
sewardja5bd0af2005-03-24 20:40:12 +00004550 HReg dst = lookupIRTemp(env, tmp);
4551 addInstr(env, AMD64Instr_Set64(cond, dst));
4552 return;
4553 }
sewardj18303862005-02-21 12:36:54 +00004554 if (ty == Ity_F64) {
4555 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004556 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004557 addInstr(env, mk_vMOVsd_RR(src, dst));
4558 return;
4559 }
sewardjc49ce232005-02-25 13:03:03 +00004560 if (ty == Ity_F32) {
4561 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004562 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
sewardjc49ce232005-02-25 13:03:03 +00004563 addInstr(env, mk_vMOVsd_RR(src, dst));
4564 return;
4565 }
sewardj0852a132005-02-21 08:28:46 +00004566 if (ty == Ity_V128) {
4567 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00004568 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00004569 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00004570 return;
4571 }
sewardjc4530ae2012-05-21 10:18:49 +00004572 if (ty == Ity_V256) {
4573 HReg rHi, rLo, dstHi, dstLo;
4574 iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
4575 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
4576 addInstr(env, mk_vMOVsd_RR(rHi,dstHi) );
4577 addInstr(env, mk_vMOVsd_RR(rLo,dstLo) );
4578 return;
4579 }
sewardj614b3fb2005-02-02 02:16:03 +00004580 break;
4581 }
4582
sewardjd0a12df2005-02-10 02:07:43 +00004583 /* --------- Call to DIRTY helper --------- */
4584 case Ist_Dirty: {
sewardjd0a12df2005-02-10 02:07:43 +00004585 IRDirty* d = stmt->Ist.Dirty.details;
sewardjd0a12df2005-02-10 02:07:43 +00004586
sewardjcfe046e2013-01-17 14:23:53 +00004587 /* Figure out the return type, if any. */
4588 IRType retty = Ity_INVALID;
4589 if (d->tmp != IRTemp_INVALID)
4590 retty = typeOfIRTemp(env->type_env, d->tmp);
4591
sewardj74142b82013-08-08 10:28:59 +00004592 /* Throw out any return types we don't know about. */
4593 Bool retty_ok = False;
sewardjcfe046e2013-01-17 14:23:53 +00004594 switch (retty) {
4595 case Ity_INVALID: /* function doesn't return anything */
sewardj74142b82013-08-08 10:28:59 +00004596 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj82cc37c2013-08-16 08:32:15 +00004597 case Ity_V128: case Ity_V256:
sewardj74142b82013-08-08 10:28:59 +00004598 retty_ok = True; break;
sewardjcfe046e2013-01-17 14:23:53 +00004599 default:
4600 break;
4601 }
sewardj74142b82013-08-08 10:28:59 +00004602 if (!retty_ok)
sewardjcfe046e2013-01-17 14:23:53 +00004603 break; /* will go to stmt_fail: */
4604
sewardj74142b82013-08-08 10:28:59 +00004605 /* Marshal args, do the call, and set the return value to
4606 0x555..555 if this is a conditional call that returns a value
4607 and the call is skipped. */
4608 UInt addToSp = 0;
4609 RetLoc rloc = mk_RetLoc_INVALID();
4610 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
4611 vassert(is_sane_RetLoc(rloc));
sewardjd0a12df2005-02-10 02:07:43 +00004612
4613 /* Now figure out what to do with the returned value, if any. */
sewardj74142b82013-08-08 10:28:59 +00004614 switch (retty) {
4615 case Ity_INVALID: {
4616 /* No return value. Nothing to do. */
4617 vassert(d->tmp == IRTemp_INVALID);
4618 vassert(rloc.pri == RLPri_None);
4619 vassert(addToSp == 0);
4620 return;
4621 }
4622 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
4623 /* The returned value is in %rax. Park it in the register
4624 associated with tmp. */
4625 vassert(rloc.pri == RLPri_Int);
4626 vassert(addToSp == 0);
4627 HReg dst = lookupIRTemp(env, d->tmp);
4628 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
4629 return;
4630 }
4631 case Ity_V128: {
sewardj54eea4e2013-09-02 13:17:49 +00004632 /* The returned value is on the stack, and rloc.spOff
4633 tells us where. Fish it off the stack and then move
4634 the stack pointer upwards to clear it, as directed by
sewardj74142b82013-08-08 10:28:59 +00004635 doHelperCall. */
4636 vassert(rloc.pri == RLPri_V128SpRel);
4637 vassert(addToSp >= 16);
4638 HReg dst = lookupIRTemp(env, d->tmp);
4639 AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4640 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
4641 add_to_rsp(env, addToSp);
4642 return;
4643 }
sewardj82cc37c2013-08-16 08:32:15 +00004644 case Ity_V256: {
4645 /* See comments for Ity_V128. */
4646 vassert(rloc.pri == RLPri_V256SpRel);
4647 vassert(addToSp >= 32);
4648 HReg dstLo, dstHi;
4649 lookupIRTempPair(&dstHi, &dstLo, env, d->tmp);
4650 AMD64AMode* amLo = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
4651 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstLo, amLo ));
4652 AMD64AMode* amHi = AMD64AMode_IR(rloc.spOff+16, hregAMD64_RSP());
4653 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstHi, amHi ));
4654 add_to_rsp(env, addToSp);
4655 return;
4656 }
sewardj74142b82013-08-08 10:28:59 +00004657 default:
4658 /*NOTREACHED*/
4659 vassert(0);
sewardjd0a12df2005-02-10 02:07:43 +00004660 }
4661 break;
4662 }
4663
4664 /* --------- MEM FENCE --------- */
sewardjc4356f02007-11-09 21:15:04 +00004665 case Ist_MBE:
4666 switch (stmt->Ist.MBE.event) {
4667 case Imbe_Fence:
4668 addInstr(env, AMD64Instr_MFence());
4669 return;
sewardjc4356f02007-11-09 21:15:04 +00004670 default:
4671 break;
4672 }
4673 break;
sewardjf8c37f72005-02-07 18:55:29 +00004674
sewardje9d8a262009-07-01 08:06:34 +00004675 /* --------- ACAS --------- */
4676 case Ist_CAS:
4677 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
4678 /* "normal" singleton CAS */
4679 UChar sz;
4680 IRCAS* cas = stmt->Ist.CAS.details;
4681 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4682 /* get: cas->expd into %rax, and cas->data into %rbx */
4683 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4684 HReg rData = iselIntExpr_R(env, cas->dataLo);
4685 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
4686 HReg rOld = lookupIRTemp(env, cas->oldLo);
4687 vassert(cas->expdHi == NULL);
4688 vassert(cas->dataHi == NULL);
4689 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
4690 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
4691 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
4692 switch (ty) {
4693 case Ity_I64: sz = 8; break;
4694 case Ity_I32: sz = 4; break;
4695 case Ity_I16: sz = 2; break;
4696 case Ity_I8: sz = 1; break;
4697 default: goto unhandled_cas;
4698 }
4699 addInstr(env, AMD64Instr_ACAS(am, sz));
sewardje357c672015-01-27 23:35:58 +00004700 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOld));
sewardje9d8a262009-07-01 08:06:34 +00004701 return;
4702 } else {
4703 /* double CAS */
4704 UChar sz;
4705 IRCAS* cas = stmt->Ist.CAS.details;
4706 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4707 /* only 32-bit and 64-bit allowed in this case */
4708 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4709 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4710 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4711 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4712 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4713 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4714 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4715 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4716 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4717 switch (ty) {
4718 case Ity_I64:
4719 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
4720 goto unhandled_cas; /* we'd have to generate
4721 cmpxchg16b, but the host
4722 doesn't support that */
4723 sz = 8;
4724 break;
4725 case Ity_I32:
4726 sz = 4;
4727 break;
4728 default:
4729 goto unhandled_cas;
4730 }
4731 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4732 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4733 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
4734 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
4735 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
4736 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
4737 addInstr(env, AMD64Instr_DACAS(am, sz));
sewardje357c672015-01-27 23:35:58 +00004738 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RDX(), rOldHi));
4739 addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOldLo));
sewardje9d8a262009-07-01 08:06:34 +00004740 return;
4741 }
4742 unhandled_cas:
4743 break;
4744
sewardjd20b2902005-03-22 00:15:00 +00004745 /* --------- INSTR MARK --------- */
4746 /* Doesn't generate any executable code ... */
4747 case Ist_IMark:
4748 return;
4749
sewardj5a9ffab2005-05-12 17:55:01 +00004750 /* --------- ABI HINT --------- */
4751 /* These have no meaning (denotation in the IR) and so we ignore
4752 them ... if any actually made it this far. */
4753 case Ist_AbiHint:
4754 return;
4755
sewardjd20b2902005-03-22 00:15:00 +00004756 /* --------- NO-OP --------- */
4757 case Ist_NoOp:
4758 return;
4759
sewardjf8c37f72005-02-07 18:55:29 +00004760 /* --------- EXIT --------- */
4761 case Ist_Exit: {
sewardjf8c37f72005-02-07 18:55:29 +00004762 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4763 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00004764
4765 AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4766 AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
4767 hregAMD64_RBP());
4768
4769 /* Case: boring transfer to known address */
4770 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4771 if (env->chainingAllowed) {
4772 /* .. almost always true .. */
4773 /* Skip the event check at the dst if this is a forwards
4774 edge. */
4775 Bool toFastEP
4776 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
4777 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4778 addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
4779 amRIP, cc, toFastEP));
4780 } else {
4781 /* .. very occasionally .. */
4782 /* We can't use chaining, so ask for an assisted transfer,
4783 as that's the only alternative that is allowable. */
4784 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4785 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
4786 }
4787 return;
4788 }
4789
4790 /* Case: assisted transfer to arbitrary address */
4791 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004792 /* Keep this list in sync with that in iselNext below */
4793 case Ijk_ClientReq:
4794 case Ijk_EmWarn:
4795 case Ijk_NoDecode:
4796 case Ijk_NoRedir:
4797 case Ijk_SigSEGV:
4798 case Ijk_SigTRAP:
4799 case Ijk_Sys_syscall:
sewardj3e5d82d2015-07-21 14:43:23 +00004800 case Ijk_Sys_int210:
sewardj05f5e012014-05-04 10:52:11 +00004801 case Ijk_InvalICache:
sewardj2f6902b2012-04-23 09:48:14 +00004802 case Ijk_Yield:
4803 {
sewardjc6f970f2012-04-02 21:54:49 +00004804 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4805 addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
4806 return;
4807 }
4808 default:
4809 break;
4810 }
4811
4812 /* Do we ever expect to see any other kind? */
4813 goto stmt_fail;
sewardjf8c37f72005-02-07 18:55:29 +00004814 }
sewardjc33671d2005-02-01 20:30:00 +00004815
4816 default: break;
4817 }
sewardjaf1ceca2005-06-30 23:31:27 +00004818 stmt_fail:
sewardjc33671d2005-02-01 20:30:00 +00004819 ppIRStmt(stmt);
4820 vpanic("iselStmt(amd64)");
4821}
4822
4823
4824/*---------------------------------------------------------*/
4825/*--- ISEL: Basic block terminators (Nexts) ---*/
4826/*---------------------------------------------------------*/
4827
sewardjc6f970f2012-04-02 21:54:49 +00004828static void iselNext ( ISelEnv* env,
4829 IRExpr* next, IRJumpKind jk, Int offsIP )
sewardjf67eadf2005-02-03 03:53:52 +00004830{
sewardjf67eadf2005-02-03 03:53:52 +00004831 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00004832 vex_printf( "\n-- PUT(%d) = ", offsIP);
4833 ppIRExpr( next );
4834 vex_printf( "; exit-");
sewardjf67eadf2005-02-03 03:53:52 +00004835 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00004836 vex_printf( "\n");
sewardjf67eadf2005-02-03 03:53:52 +00004837 }
sewardjc6f970f2012-04-02 21:54:49 +00004838
4839 /* Case: boring transfer to known address */
4840 if (next->tag == Iex_Const) {
4841 IRConst* cdst = next->Iex.Const.con;
4842 vassert(cdst->tag == Ico_U64);
4843 if (jk == Ijk_Boring || jk == Ijk_Call) {
4844 /* Boring transfer to known address */
4845 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4846 if (env->chainingAllowed) {
4847 /* .. almost always true .. */
4848 /* Skip the event check at the dst if this is a forwards
4849 edge. */
4850 Bool toFastEP
4851 = ((Addr64)cdst->Ico.U64) > env->max_ga;
4852 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4853 addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
4854 amRIP, Acc_ALWAYS,
4855 toFastEP));
4856 } else {
4857 /* .. very occasionally .. */
4858 /* We can't use chaining, so ask for an indirect transfer,
4859 as that's the cheapest alternative that is
4860 allowable. */
4861 HReg r = iselIntExpr_R(env, next);
4862 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4863 Ijk_Boring));
4864 }
4865 return;
4866 }
4867 }
4868
4869 /* Case: call/return (==boring) transfer to any address */
4870 switch (jk) {
4871 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4872 HReg r = iselIntExpr_R(env, next);
4873 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4874 if (env->chainingAllowed) {
4875 addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
4876 } else {
4877 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
4878 Ijk_Boring));
4879 }
4880 return;
4881 }
4882 default:
4883 break;
4884 }
4885
sewardj2f6902b2012-04-23 09:48:14 +00004886 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00004887 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00004888 /* Keep this list in sync with that for Ist_Exit above */
4889 case Ijk_ClientReq:
4890 case Ijk_EmWarn:
sewardj3d0e38e2012-04-21 07:38:29 +00004891 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00004892 case Ijk_NoRedir:
4893 case Ijk_SigSEGV:
4894 case Ijk_SigTRAP:
4895 case Ijk_Sys_syscall:
sewardj3e5d82d2015-07-21 14:43:23 +00004896 case Ijk_Sys_int210:
sewardj05f5e012014-05-04 10:52:11 +00004897 case Ijk_InvalICache:
sewardj2f6902b2012-04-23 09:48:14 +00004898 case Ijk_Yield: {
sewardjc6f970f2012-04-02 21:54:49 +00004899 HReg r = iselIntExpr_R(env, next);
4900 AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
4901 addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
4902 return;
4903 }
4904 default:
4905 break;
4906 }
4907
4908 vex_printf( "\n-- PUT(%d) = ", offsIP);
4909 ppIRExpr( next );
4910 vex_printf( "; exit-");
4911 ppIRJumpKind(jk);
4912 vex_printf( "\n");
4913 vassert(0); // are we expecting any other kind?
sewardjc33671d2005-02-01 20:30:00 +00004914}
4915
4916
4917/*---------------------------------------------------------*/
4918/*--- Insn selector top-level ---*/
4919/*---------------------------------------------------------*/
4920
sewardjdd40fdf2006-12-24 02:20:24 +00004921/* Translate an entire SB to amd64 code. */
sewardjc33671d2005-02-01 20:30:00 +00004922
floriancacba8e2014-12-15 18:58:07 +00004923HInstrArray* iselSB_AMD64 ( const IRSB* bb,
sewardjc6f970f2012-04-02 21:54:49 +00004924 VexArch arch_host,
floriand8c64e02014-10-08 08:54:44 +00004925 const VexArchInfo* archinfo_host,
4926 const VexAbiInfo* vbi/*UNUSED*/,
sewardjc6f970f2012-04-02 21:54:49 +00004927 Int offs_Host_EvC_Counter,
4928 Int offs_Host_EvC_FailAddr,
4929 Bool chainingAllowed,
4930 Bool addProfInc,
floriandcd6d232015-01-02 17:32:21 +00004931 Addr max_ga )
sewardjc33671d2005-02-01 20:30:00 +00004932{
sewardjc6f970f2012-04-02 21:54:49 +00004933 Int i, j;
4934 HReg hreg, hregHI;
4935 ISelEnv* env;
4936 UInt hwcaps_host = archinfo_host->hwcaps;
4937 AMD64AMode *amCounter, *amFailAddr;
sewardjc33671d2005-02-01 20:30:00 +00004938
4939 /* sanity ... */
sewardj8f073592006-05-01 02:14:17 +00004940 vassert(arch_host == VexArchAMD64);
sewardj536fbab2010-07-29 15:39:05 +00004941 vassert(0 == (hwcaps_host
4942 & ~(VEX_HWCAPS_AMD64_SSE3
4943 | VEX_HWCAPS_AMD64_CX16
sewardjf350a422012-04-26 14:16:52 +00004944 | VEX_HWCAPS_AMD64_LZCNT
sewardj818c7302013-03-26 13:53:18 +00004945 | VEX_HWCAPS_AMD64_AVX
sewardjcc3d2192013-03-27 11:37:33 +00004946 | VEX_HWCAPS_AMD64_RDTSCP
4947 | VEX_HWCAPS_AMD64_BMI
4948 | VEX_HWCAPS_AMD64_AVX2)));
sewardjc33671d2005-02-01 20:30:00 +00004949
sewardj9b769162014-07-24 12:42:03 +00004950 /* Check that the host's endianness is as expected. */
4951 vassert(archinfo_host->endness == VexEndnessLE);
4952
sewardjc33671d2005-02-01 20:30:00 +00004953 /* Make up an initial environment to use. */
floriand8e3eca2015-03-13 12:46:49 +00004954 env = LibVEX_Alloc_inline(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00004955 env->vreg_ctr = 0;
4956
4957 /* Set up output code array. */
4958 env->code = newHInstrArray();
4959
4960 /* Copy BB's type env. */
4961 env->type_env = bb->tyenv;
4962
4963 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4964 change as we go along. */
4965 env->n_vregmap = bb->tyenv->types_used;
floriand8e3eca2015-03-13 12:46:49 +00004966 env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4967 env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00004968
4969 /* and finally ... */
sewardjc6f970f2012-04-02 21:54:49 +00004970 env->chainingAllowed = chainingAllowed;
4971 env->hwcaps = hwcaps_host;
4972 env->max_ga = max_ga;
sewardjc33671d2005-02-01 20:30:00 +00004973
4974 /* For each IR temporary, allocate a suitably-kinded virtual
4975 register. */
4976 j = 0;
4977 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00004978 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00004979 switch (bb->tyenv->types[i]) {
4980 case Ity_I1:
sewardjc4530ae2012-05-21 10:18:49 +00004981 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
sewardja5b50222015-03-26 07:18:32 +00004982 hreg = mkHReg(True, HRcInt64, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004983 break;
4984 case Ity_I128:
sewardja5b50222015-03-26 07:18:32 +00004985 hreg = mkHReg(True, HRcInt64, 0, j++);
4986 hregHI = mkHReg(True, HRcInt64, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004987 break;
sewardjc33671d2005-02-01 20:30:00 +00004988 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00004989 case Ity_F64:
sewardjc4530ae2012-05-21 10:18:49 +00004990 case Ity_V128:
sewardja5b50222015-03-26 07:18:32 +00004991 hreg = mkHReg(True, HRcVec128, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004992 break;
4993 case Ity_V256:
sewardja5b50222015-03-26 07:18:32 +00004994 hreg = mkHReg(True, HRcVec128, 0, j++);
4995 hregHI = mkHReg(True, HRcVec128, 0, j++);
sewardjc4530ae2012-05-21 10:18:49 +00004996 break;
4997 default:
4998 ppIRType(bb->tyenv->types[i]);
4999 vpanic("iselBB(amd64): IRTemp type");
sewardjc33671d2005-02-01 20:30:00 +00005000 }
5001 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00005002 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00005003 }
5004 env->vreg_ctr = j;
5005
sewardjc6f970f2012-04-02 21:54:49 +00005006 /* The very first instruction must be an event check. */
5007 amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
5008 amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
5009 addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
5010
5011 /* Possibly a block counter increment (for profiling). At this
5012 point we don't know the address of the counter, so just pretend
5013 it is zero. It will have to be patched later, but before this
5014 translation is used, by a call to LibVEX_patchProfCtr. */
5015 if (addProfInc) {
5016 addInstr(env, AMD64Instr_ProfInc());
5017 }
5018
sewardjc33671d2005-02-01 20:30:00 +00005019 /* Ok, finally we can iterate over the statements. */
5020 for (i = 0; i < bb->stmts_used; i++)
5021 if (bb->stmts[i])
sewardjc6f970f2012-04-02 21:54:49 +00005022 iselStmt(env, bb->stmts[i]);
sewardjc33671d2005-02-01 20:30:00 +00005023
sewardjc6f970f2012-04-02 21:54:49 +00005024 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardjc33671d2005-02-01 20:30:00 +00005025
5026 /* record the number of vregs we used. */
5027 env->code->n_vregs = env->vreg_ctr;
5028 return env->code;
5029}
sewardja3e98302005-02-01 15:55:05 +00005030
5031
5032/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00005033/*--- end host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00005034/*---------------------------------------------------------------*/