blob: 0a0a3a621b27f66886962e592007144accbe3885 [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardja3e98302005-02-01 15:55:05 +00009
sewardj752f9062010-05-03 21:38:49 +000010 Copyright (C) 2004-2010 OpenWorks LLP
11 info@open-works.net
sewardja3e98302005-02-01 15:55:05 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardja3e98302005-02-01 15:55:05 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardja3e98302005-02-01 15:55:05 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardjcef7d3e2009-07-02 12:21:59 +000040#include "ir_match.h"
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"
45#include "host_amd64_defs.h"
sewardj1a01e652005-02-23 11:39:21 +000046
47
48/*---------------------------------------------------------*/
49/*--- x87/SSE control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, precision = 53 bits. This
54 corresponds to a FPU control word value of 0x027F.
55
56 Similarly the SSE control word (%mxcsr) should be 0x1F80.
57
58 %fpucw and %mxcsr should have these values on entry to
59 Vex-generated code, and should those values should be
60 unchanged at exit.
61*/
62
63#define DEFAULT_FPUCW 0x027F
64
65#define DEFAULT_MXCSR 0x1F80
66
67/* debugging only, do not use */
68/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000069
70
71/*---------------------------------------------------------*/
72/*--- misc helpers ---*/
73/*---------------------------------------------------------*/
74
75/* These are duplicated in guest-amd64/toIR.c */
76static IRExpr* unop ( IROp op, IRExpr* a )
77{
78 return IRExpr_Unop(op, a);
79}
80
81static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
82{
83 return IRExpr_Binop(op, a1, a2);
84}
85
sewardj05b3b6a2005-02-04 01:44:33 +000086static IRExpr* bind ( Int binder )
87{
88 return IRExpr_Binder(binder);
89}
sewardjc33671d2005-02-01 20:30:00 +000090
91
sewardjc33671d2005-02-01 20:30:00 +000092/*---------------------------------------------------------*/
93/*--- ISelEnv ---*/
94/*---------------------------------------------------------*/
95
96/* This carries around:
97
98 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
99 might encounter. This is computed before insn selection starts,
100 and does not change.
101
102 - A mapping from IRTemp to HReg. This tells the insn selector
103 which virtual register is associated with each IRTemp
104 temporary. This is computed before insn selection starts, and
105 does not change. We expect this mapping to map precisely the
106 same set of IRTemps as the type mapping does.
107
sewardj9b967672005-02-08 11:13:09 +0000108 - vregmap holds the primary register for the IRTemp.
109 - vregmapHI is only used for 128-bit integer-typed
110 IRTemps. It holds the identity of a second
111 64-bit virtual HReg, which holds the high half
112 of the value.
113
sewardjc33671d2005-02-01 20:30:00 +0000114 - The code array, that is, the insns selected so far.
115
116 - A counter, for generating new virtual registers.
117
118 - The host subarchitecture we are selecting insns for.
119 This is set at the start and does not change.
120
121 Note, this is all host-independent. (JRS 20050201: well, kinda
122 ... not completely. Compare with ISelEnv for X86.)
123*/
124
125typedef
126 struct {
127 IRTypeEnv* type_env;
128
129 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000130 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000131 Int n_vregmap;
132
133 HInstrArray* code;
134
135 Int vreg_ctr;
136
sewardj5117ce12006-01-27 21:20:15 +0000137 UInt hwcaps;
sewardjc33671d2005-02-01 20:30:00 +0000138 }
139 ISelEnv;
140
141
142static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
143{
144 vassert(tmp >= 0);
145 vassert(tmp < env->n_vregmap);
146 return env->vregmap[tmp];
147}
148
sewardj9b967672005-02-08 11:13:09 +0000149static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
150 ISelEnv* env, IRTemp tmp )
151{
152 vassert(tmp >= 0);
153 vassert(tmp < env->n_vregmap);
154 vassert(env->vregmapHI[tmp] != INVALID_HREG);
155 *vrLO = env->vregmap[tmp];
156 *vrHI = env->vregmapHI[tmp];
157}
sewardj614b3fb2005-02-02 02:16:03 +0000158
159static void addInstr ( ISelEnv* env, AMD64Instr* instr )
160{
161 addHInstr(env->code, instr);
162 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjf355f6b2006-08-16 00:23:21 +0000163 ppAMD64Instr(instr, True);
sewardj614b3fb2005-02-02 02:16:03 +0000164 vex_printf("\n");
165 }
166}
167
sewardj8258a8c2005-02-02 03:11:24 +0000168static HReg newVRegI ( ISelEnv* env )
169{
170 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
171 env->vreg_ctr++;
172 return reg;
173}
174
sewardja3e98302005-02-01 15:55:05 +0000175//.. static HReg newVRegF ( ISelEnv* env )
176//.. {
177//.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
178//.. env->vreg_ctr++;
179//.. return reg;
180//.. }
sewardj0852a132005-02-21 08:28:46 +0000181
182static HReg newVRegV ( ISelEnv* env )
183{
184 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
185 env->vreg_ctr++;
186 return reg;
187}
sewardj614b3fb2005-02-02 02:16:03 +0000188
189
190/*---------------------------------------------------------*/
191/*--- ISEL: Forward declarations ---*/
192/*---------------------------------------------------------*/
193
194/* These are organised as iselXXX and iselXXX_wrk pairs. The
195 iselXXX_wrk do the real work, but are not to be called directly.
196 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
197 checks that all returned registers are virtual. You should not
198 call the _wrk version directly.
199*/
200static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
201static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
202
203static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
204static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
205
206static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
207static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
208
209static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
210static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
211
212static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
213static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
214
sewardj9b967672005-02-08 11:13:09 +0000215static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
216 ISelEnv* env, IRExpr* e );
217static void iselInt128Expr ( HReg* rHi, HReg* rLo,
218 ISelEnv* env, IRExpr* e );
219
sewardj614b3fb2005-02-02 02:16:03 +0000220static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
221static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
222
sewardj18303862005-02-21 12:36:54 +0000223static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
224static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000225
sewardj8d965312005-02-25 02:48:47 +0000226static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
227static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000228
sewardj0852a132005-02-21 08:28:46 +0000229static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
230static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000231
232
233/*---------------------------------------------------------*/
234/*--- ISEL: Misc helpers ---*/
235/*---------------------------------------------------------*/
236
237static Bool sane_AMode ( AMD64AMode* am )
238{
239 switch (am->tag) {
240 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000241 return
242 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
243 && (hregIsVirtual(am->Aam.IR.reg)
244 || am->Aam.IR.reg == hregAMD64_RBP()) );
sewardj614b3fb2005-02-02 02:16:03 +0000245 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000246 return
247 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
248 && hregIsVirtual(am->Aam.IRRS.base)
249 && hregClass(am->Aam.IRRS.index) == HRcInt64
250 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000251 default:
252 vpanic("sane_AMode: unknown amd64 amode tag");
253 }
254}
255
256
257/* Can the lower 32 bits be signedly widened to produce the whole
258 64-bit value? In other words, are the top 33 bits either all 0 or
259 all 1 ? */
260static Bool fitsIn32Bits ( ULong x )
261{
262 Long y0 = (Long)x;
263 Long y1 = y0;
264 y1 <<= 32;
265 y1 >>=/*s*/ 32;
266 return toBool(x == y1);
267}
268
sewardjeb17e492007-08-25 23:07:44 +0000269/* Is this a 64-bit zero expression? */
270
271static Bool isZeroU64 ( IRExpr* e )
272{
273 return e->tag == Iex_Const
274 && e->Iex.Const.con->tag == Ico_U64
275 && e->Iex.Const.con->Ico.U64 == 0ULL;
276}
277
278static Bool isZeroU32 ( IRExpr* e )
279{
280 return e->tag == Iex_Const
281 && e->Iex.Const.con->tag == Ico_U32
282 && e->Iex.Const.con->Ico.U32 == 0;
283}
sewardj8258a8c2005-02-02 03:11:24 +0000284
285/* Make a int reg-reg move. */
286
287static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
288{
289 vassert(hregClass(src) == HRcInt64);
290 vassert(hregClass(dst) == HRcInt64);
291 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
292}
293
sewardj0852a132005-02-21 08:28:46 +0000294/* Make a vector reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000295
sewardj0852a132005-02-21 08:28:46 +0000296static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
297{
298 vassert(hregClass(src) == HRcVec128);
299 vassert(hregClass(dst) == HRcVec128);
300 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
301}
302
303/* Advance/retreat %rsp by n. */
304
305static void add_to_rsp ( ISelEnv* env, Int n )
306{
307 vassert(n > 0 && n < 256 && (n%8) == 0);
308 addInstr(env,
309 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
310 hregAMD64_RSP()));
311}
312
sewardj18303862005-02-21 12:36:54 +0000313static void sub_from_rsp ( ISelEnv* env, Int n )
314{
315 vassert(n > 0 && n < 256 && (n%8) == 0);
316 addInstr(env,
317 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
318 hregAMD64_RSP()));
319}
320
ded403e792010-04-02 14:15:58 +0000321/* Push 64-bit constants on the stack. */
322static void push_uimm64( ISelEnv* env, ULong uimm64 )
323{
324 /* If uimm64 can be expressed as the sign extension of its
325 lower 32 bits, we can do it the easy way. */
326 Long simm64 = (Long)uimm64;
327 if ( simm64 == ((simm64 << 32) >> 32) ) {
328 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
329 } else {
330 HReg tmp = newVRegI(env);
331 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
332 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
333 }
334}
sewardj18303862005-02-21 12:36:54 +0000335
sewardja3e98302005-02-01 15:55:05 +0000336//.. /* Given an amode, return one which references 4 bytes further
337//.. along. */
338//..
339//.. static X86AMode* advance4 ( X86AMode* am )
340//.. {
341//.. X86AMode* am4 = dopyX86AMode(am);
342//.. switch (am4->tag) {
343//.. case Xam_IRRS:
344//.. am4->Xam.IRRS.imm += 4; break;
345//.. case Xam_IR:
346//.. am4->Xam.IR.imm += 4; break;
347//.. default:
348//.. vpanic("advance4(x86,host)");
349//.. }
350//.. return am4;
351//.. }
352//..
353//..
354//.. /* Push an arg onto the host stack, in preparation for a call to a
355//.. helper function of some kind. Returns the number of 32-bit words
356//.. pushed. */
357//..
358//.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
359//.. {
360//.. IRType arg_ty = typeOfIRExpr(env->type_env, arg);
361//.. if (arg_ty == Ity_I32) {
362//.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
363//.. return 1;
364//.. } else
365//.. if (arg_ty == Ity_I64) {
366//.. HReg rHi, rLo;
367//.. iselInt64Expr(&rHi, &rLo, env, arg);
368//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
369//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
370//.. return 2;
371//.. }
372//.. ppIRExpr(arg);
373//.. vpanic("pushArg(x86): can't handle arg of this type");
374//.. }
sewardj05b3b6a2005-02-04 01:44:33 +0000375
376
sewardj4d77a9c2007-08-25 23:21:08 +0000377/* Used only in doHelperCall. If possible, produce a single
378 instruction which computes 'e' into 'dst'. If not possible, return
379 NULL. */
380
381static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
382 HReg dst,
383 IRExpr* e )
sewardj05b3b6a2005-02-04 01:44:33 +0000384{
sewardj4d77a9c2007-08-25 23:21:08 +0000385 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
386
387 if (e->tag == Iex_Const) {
388 vassert(e->Iex.Const.con->tag == Ico_U64);
389 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
390 return AMD64Instr_Alu64R(
391 Aalu_MOV,
392 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
393 dst
394 );
395 } else {
396 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
397 }
sewardj05b3b6a2005-02-04 01:44:33 +0000398 }
sewardj4d77a9c2007-08-25 23:21:08 +0000399
400 if (e->tag == Iex_RdTmp) {
401 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
402 return mk_iMOVsd_RR(src, dst);
403 }
404
405 if (e->tag == Iex_Get) {
406 vassert(e->Iex.Get.ty == Ity_I64);
407 return AMD64Instr_Alu64R(
408 Aalu_MOV,
409 AMD64RMI_Mem(
410 AMD64AMode_IR(e->Iex.Get.offset,
411 hregAMD64_RBP())),
412 dst);
413 }
414
415 if (e->tag == Iex_Unop
416 && e->Iex.Unop.op == Iop_32Uto64
417 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
418 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
419 return AMD64Instr_MovZLQ(src, dst);
420 }
421
422 if (0) { ppIRExpr(e); vex_printf("\n"); }
423
424 return NULL;
sewardj05b3b6a2005-02-04 01:44:33 +0000425}
426
427
428/* Do a complete function call. guard is a Ity_Bit expression
429 indicating whether or not the call happens. If guard==NULL, the
430 call is unconditional. */
431
432static
433void doHelperCall ( ISelEnv* env,
434 Bool passBBP,
435 IRExpr* guard, IRCallee* cee, IRExpr** args )
436{
437 AMD64CondCode cc;
438 HReg argregs[6];
439 HReg tmpregs[6];
sewardj4d77a9c2007-08-25 23:21:08 +0000440 AMD64Instr* fastinstrs[6];
sewardj05b3b6a2005-02-04 01:44:33 +0000441 Int n_args, i, argreg;
442
443 /* Marshal args for a call and do the call.
444
445 If passBBP is True, %rbp (the baseblock pointer) is to be passed
446 as the first arg.
447
448 This function only deals with a tiny set of possibilities, which
449 cover all helpers in practice. The restrictions are that only
450 arguments in registers are supported, hence only 6x64 integer
451 bits in total can be passed. In fact the only supported arg
452 type is I64.
453
454 Generating code which is both efficient and correct when
455 parameters are to be passed in registers is difficult, for the
456 reasons elaborated in detail in comments attached to
457 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
458 of the method described in those comments.
459
460 The problem is split into two cases: the fast scheme and the
461 slow scheme. In the fast scheme, arguments are computed
462 directly into the target (real) registers. This is only safe
463 when we can be sure that computation of each argument will not
464 trash any real registers set by computation of any other
465 argument.
466
467 In the slow scheme, all args are first computed into vregs, and
468 once they are all done, they are moved to the relevant real
469 regs. This always gives correct code, but it also gives a bunch
470 of vreg-to-rreg moves which are usually redundant but are hard
471 for the register allocator to get rid of.
472
473 To decide which scheme to use, all argument expressions are
474 first examined. If they are all so simple that it is clear they
475 will be evaluated without use of any fixed registers, use the
476 fast scheme, else use the slow scheme. Note also that only
477 unconditional calls may use the fast scheme, since having to
478 compute a condition expression could itself trash real
479 registers.
480
481 Note this requires being able to examine an expression and
482 determine whether or not evaluation of it might use a fixed
483 register. That requires knowledge of how the rest of this insn
484 selector works. Currently just the following 3 are regarded as
485 safe -- hopefully they cover the majority of arguments in
486 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
487 */
488
489 /* Note that the cee->regparms field is meaningless on AMD64 host
490 (since there is only one calling convention) and so we always
491 ignore it. */
492
493 n_args = 0;
494 for (i = 0; args[i]; i++)
495 n_args++;
496
497 if (6 < n_args + (passBBP ? 1 : 0))
498 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
499
500 argregs[0] = hregAMD64_RDI();
501 argregs[1] = hregAMD64_RSI();
502 argregs[2] = hregAMD64_RDX();
503 argregs[3] = hregAMD64_RCX();
504 argregs[4] = hregAMD64_R8();
505 argregs[5] = hregAMD64_R9();
506
507 tmpregs[0] = tmpregs[1] = tmpregs[2] =
508 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
509
sewardj4d77a9c2007-08-25 23:21:08 +0000510 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
511 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
512
sewardj05b3b6a2005-02-04 01:44:33 +0000513 /* First decide which scheme (slow or fast) is to be used. First
514 assume the fast scheme, and select slow if any contraindications
515 (wow) appear. */
516
sewardj05b3b6a2005-02-04 01:44:33 +0000517 if (guard) {
518 if (guard->tag == Iex_Const
519 && guard->Iex.Const.con->tag == Ico_U1
520 && guard->Iex.Const.con->Ico.U1 == True) {
521 /* unconditional */
522 } else {
523 /* Not manifestly unconditional -- be conservative. */
sewardj4d77a9c2007-08-25 23:21:08 +0000524 goto slowscheme;
sewardj05b3b6a2005-02-04 01:44:33 +0000525 }
526 }
527
sewardj4d77a9c2007-08-25 23:21:08 +0000528 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
529 use the slow scheme. Because this is tentative, we can't call
530 addInstr (that is, commit to) any instructions until we're
531 handled all the arguments. So park the resulting instructions
532 in a buffer and emit that if we're successful. */
533
534 /* FAST SCHEME */
535 argreg = 0;
536 if (passBBP) {
537 fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]);
538 argreg++;
539 }
540
541 for (i = 0; i < n_args; i++) {
542 vassert(argreg < 6);
543 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
544 fastinstrs[argreg]
545 = iselIntExpr_single_instruction( env, argregs[argreg], args[i] );
546 if (fastinstrs[argreg] == NULL)
547 goto slowscheme;
548 argreg++;
549 }
550
551 /* Looks like we're in luck. Emit the accumulated instructions and
552 move on to doing the call itself. */
553 vassert(argreg <= 6);
554 for (i = 0; i < argreg; i++)
555 addInstr(env, fastinstrs[i]);
556
557 /* Fast scheme only applies for unconditional calls. Hence: */
558 cc = Acc_ALWAYS;
559
560 goto handle_call;
561
562
563 /* SLOW SCHEME; move via temporaries */
564 slowscheme:
565#if 0
566if (n_args > 0) {for (i = 0; args[i]; i++) {
567ppIRExpr(args[i]); vex_printf(" "); }
568vex_printf("\n");}
569#endif
570 argreg = 0;
571
572 if (passBBP) {
573 /* This is pretty stupid; better to move directly to rdi
574 after the rest of the args are done. */
575 tmpregs[argreg] = newVRegI(env);
576 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
577 argreg++;
578 }
579
580 for (i = 0; i < n_args; i++) {
581 vassert(argreg < 6);
582 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
583 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
584 argreg++;
585 }
586
587 /* Now we can compute the condition. We can't do it earlier
588 because the argument computations could trash the condition
589 codes. Be a bit clever to handle the common case where the
590 guard is 1:Bit. */
591 cc = Acc_ALWAYS;
592 if (guard) {
593 if (guard->tag == Iex_Const
594 && guard->Iex.Const.con->tag == Ico_U1
595 && guard->Iex.Const.con->Ico.U1 == True) {
596 /* unconditional -- do nothing */
597 } else {
598 cc = iselCondCode( env, guard );
sewardj05b3b6a2005-02-04 01:44:33 +0000599 }
600 }
601
sewardj4d77a9c2007-08-25 23:21:08 +0000602 /* Move the args to their final destinations. */
603 for (i = 0; i < argreg; i++) {
604 /* None of these insns, including any spill code that might
605 be generated, may alter the condition codes. */
606 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
sewardj05b3b6a2005-02-04 01:44:33 +0000607 }
608
sewardj4d77a9c2007-08-25 23:21:08 +0000609
sewardj05b3b6a2005-02-04 01:44:33 +0000610 /* Finally, the call itself. */
sewardj4d77a9c2007-08-25 23:21:08 +0000611 handle_call:
sewardj05b3b6a2005-02-04 01:44:33 +0000612 addInstr(env, AMD64Instr_Call(
613 cc,
sewardjf3992bd2005-02-07 00:20:43 +0000614 Ptr_to_ULong(cee->addr),
sewardj05b3b6a2005-02-04 01:44:33 +0000615 n_args + (passBBP ? 1 : 0)
616 )
617 );
618}
619
620
sewardj8d965312005-02-25 02:48:47 +0000621/* Given a guest-state array descriptor, an index expression and a
622 bias, generate an AMD64AMode holding the relevant guest state
623 offset. */
624
625static
sewardjdd40fdf2006-12-24 02:20:24 +0000626AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
sewardj8d965312005-02-25 02:48:47 +0000627 IRExpr* off, Int bias )
628{
629 HReg tmp, roff;
630 Int elemSz = sizeofIRType(descr->elemTy);
631 Int nElems = descr->nElems;
632
633 /* Throw out any cases not generated by an amd64 front end. In
634 theory there might be a day where we need to handle them -- if
635 we ever run non-amd64-guest on amd64 host. */
636
637 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
638 vpanic("genGuestArrayOffset(amd64 host)");
639
640 /* Compute off into a reg, %off. Then return:
641
642 movq %off, %tmp
643 addq $bias, %tmp (if bias != 0)
644 andq %tmp, 7
645 ... base(%rbp, %tmp, shift) ...
646 */
647 tmp = newVRegI(env);
648 roff = iselIntExpr_R(env, off);
649 addInstr(env, mk_iMOVsd_RR(roff, tmp));
650 if (bias != 0) {
651 /* Make sure the bias is sane, in the sense that there are
652 no significant bits above bit 30 in it. */
653 vassert(-10000 < bias && bias < 10000);
654 addInstr(env,
655 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
656 }
657 addInstr(env,
658 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
659 vassert(elemSz == 1 || elemSz == 8);
660 return
661 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
662 elemSz==8 ? 3 : 0);
663}
664
sewardj1a01e652005-02-23 11:39:21 +0000665
666/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
667static
668void set_SSE_rounding_default ( ISelEnv* env )
669{
670 /* pushq $DEFAULT_MXCSR
671 ldmxcsr 0(%rsp)
672 addq $8, %rsp
673 */
674 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
675 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
676 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
677 add_to_rsp(env, 8);
678}
679
sewardj25a85812005-05-08 23:03:48 +0000680/* Mess with the FPU's rounding mode: set to the default rounding mode
681 (DEFAULT_FPUCW). */
682static
683void set_FPU_rounding_default ( ISelEnv* env )
684{
685 /* movq $DEFAULT_FPUCW, -8(%rsp)
686 fldcw -8(%esp)
687 */
688 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
689 addInstr(env, AMD64Instr_Alu64M(
690 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
691 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
692}
sewardj1a01e652005-02-23 11:39:21 +0000693
694
695/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
696 expression denoting a value in the range 0 .. 3, indicating a round
697 mode encoded as per type IRRoundingMode. Set the SSE machinery to
698 have the same rounding.
699*/
700static
701void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
702{
703 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
704 both rounding bits == 0. If that wasn't the case, we couldn't
705 create a new rounding field simply by ORing the new value into
706 place. */
707
708 /* movq $3, %reg
709 andq [[mode]], %reg -- shouldn't be needed; paranoia
710 shlq $13, %reg
711 orq $DEFAULT_MXCSR, %reg
712 pushq %reg
713 ldmxcsr 0(%esp)
714 addq $8, %rsp
715 */
716 HReg reg = newVRegI(env);
717 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
718 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
719 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
720 iselIntExpr_RMI(env, mode), reg));
sewardj501a3392005-05-11 15:37:50 +0000721 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
sewardj1a01e652005-02-23 11:39:21 +0000722 addInstr(env, AMD64Instr_Alu64R(
723 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
724 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
725 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
726 add_to_rsp(env, 8);
727}
728
729
sewardj25a85812005-05-08 23:03:48 +0000730/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
731 expression denoting a value in the range 0 .. 3, indicating a round
732 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
733 the same rounding.
734*/
735static
736void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
737{
738 HReg rrm = iselIntExpr_R(env, mode);
739 HReg rrm2 = newVRegI(env);
740 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
741
742 /* movq %rrm, %rrm2
743 andq $3, %rrm2 -- shouldn't be needed; paranoia
744 shlq $10, %rrm2
745 orq $DEFAULT_FPUCW, %rrm2
746 movq %rrm2, -8(%rsp)
747 fldcw -8(%esp)
748 */
749 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
750 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
sewardj501a3392005-05-11 15:37:50 +0000751 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
sewardj25a85812005-05-08 23:03:48 +0000752 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
753 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
754 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
755 AMD64RI_Reg(rrm2), m8_rsp));
756 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
757}
sewardj8d965312005-02-25 02:48:47 +0000758
759
sewardjac530442005-05-11 16:13:37 +0000760/* Generate all-zeroes into a new vector register.
761*/
762static HReg generate_zeroes_V128 ( ISelEnv* env )
763{
764 HReg dst = newVRegV(env);
765 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
766 return dst;
767}
768
769/* Generate all-ones into a new vector register.
770*/
771static HReg generate_ones_V128 ( ISelEnv* env )
772{
773 HReg dst = newVRegV(env);
774 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
775 return dst;
776}
777
778
sewardj09717342005-05-05 21:34:02 +0000779/* Generate !src into a new vector register. Amazing that there isn't
780 a less crappy way to do this.
sewardj8d965312005-02-25 02:48:47 +0000781*/
782static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
783{
sewardjac530442005-05-11 16:13:37 +0000784 HReg dst = generate_ones_V128(env);
sewardj8d965312005-02-25 02:48:47 +0000785 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
786 return dst;
787}
788
789
sewardja3e98302005-02-01 15:55:05 +0000790//.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
791//.. after most non-simple FPU operations (simple = +, -, *, / and
792//.. sqrt).
793//..
794//.. This could be done a lot more efficiently if needed, by loading
795//.. zero and adding it to the value to be rounded (fldz ; faddp?).
796//.. */
797//.. static void roundToF64 ( ISelEnv* env, HReg reg )
798//.. {
799//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
800//.. sub_from_esp(env, 8);
801//.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
802//.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
803//.. add_to_esp(env, 8);
804//.. }
sewardj8258a8c2005-02-02 03:11:24 +0000805
806
807/*---------------------------------------------------------*/
808/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
809/*---------------------------------------------------------*/
810
811/* Select insns for an integer-typed expression, and add them to the
812 code list. Return a reg holding the result. This reg will be a
813 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
814 want to modify it, ask for a new vreg, copy it in there, and modify
815 the copy. The register allocator will do its best to map both
816 vregs to the same real register, so the copies will often disappear
817 later in the game.
818
819 This should handle expressions of 64, 32, 16 and 8-bit type. All
820 results are returned in a 64-bit register. For 32-, 16- and 8-bit
821 expressions, the upper 32/16/24 bits are arbitrary, so you should
822 mask or sign extend partial values if necessary.
823*/
824
825static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
826{
827 HReg r = iselIntExpr_R_wrk(env, e);
828 /* sanity checks ... */
829# if 0
830 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
831# endif
832 vassert(hregClass(r) == HRcInt64);
833 vassert(hregIsVirtual(r));
834 return r;
835}
836
837/* DO NOT CALL THIS DIRECTLY ! */
838static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
839{
sewardje7905662005-05-09 18:15:21 +0000840 /* Used for unary/binary SIMD64 ops. */
841 HWord fn = 0;
sewardj8711f662005-05-09 17:52:56 +0000842 Bool second_is_UInt;
sewardje7905662005-05-09 18:15:21 +0000843
sewardj05b3b6a2005-02-04 01:44:33 +0000844 MatchInfo mi;
sewardj7f039c42005-02-04 21:13:55 +0000845 DECLARE_PATTERN(p_8Uto64);
sewardj176ad2f2005-04-27 11:55:08 +0000846 DECLARE_PATTERN(p_1Uto8_64to1);
sewardj8258a8c2005-02-02 03:11:24 +0000847
848 IRType ty = typeOfIRExpr(env->type_env,e);
849 vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
850
851 switch (e->tag) {
852
853 /* --------- TEMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +0000854 case Iex_RdTmp: {
855 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj8258a8c2005-02-02 03:11:24 +0000856 }
857
858 /* --------- LOAD --------- */
sewardjaf1ceca2005-06-30 23:31:27 +0000859 case Iex_Load: {
sewardj8258a8c2005-02-02 03:11:24 +0000860 HReg dst = newVRegI(env);
sewardjaf1ceca2005-06-30 23:31:27 +0000861 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
862
sewardje9d8a262009-07-01 08:06:34 +0000863 /* We can't handle big-endian loads, nor load-linked. */
sewardjaf1ceca2005-06-30 23:31:27 +0000864 if (e->Iex.Load.end != Iend_LE)
865 goto irreducible;
866
sewardjf67eadf2005-02-03 03:53:52 +0000867 if (ty == Ity_I64) {
868 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
869 AMD64RMI_Mem(amode), dst) );
870 return dst;
871 }
sewardj8258a8c2005-02-02 03:11:24 +0000872 if (ty == Ity_I32) {
873 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
874 return dst;
875 }
sewardj05b3b6a2005-02-04 01:44:33 +0000876 if (ty == Ity_I16) {
877 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
878 return dst;
879 }
sewardj7f039c42005-02-04 21:13:55 +0000880 if (ty == Ity_I8) {
881 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
882 return dst;
883 }
sewardj8258a8c2005-02-02 03:11:24 +0000884 break;
885 }
886
887 /* --------- BINARY OP --------- */
888 case Iex_Binop: {
889 AMD64AluOp aluOp;
890 AMD64ShiftOp shOp;
sewardj8711f662005-05-09 17:52:56 +0000891
sewardjeb17e492007-08-25 23:07:44 +0000892 /* Pattern: Sub64(0,x) */
893 /* and: Sub32(0,x) */
894 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
895 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
896 HReg dst = newVRegI(env);
897 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
898 addInstr(env, mk_iMOVsd_RR(reg,dst));
899 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
900 return dst;
901 }
902
sewardj8258a8c2005-02-02 03:11:24 +0000903 /* Is it an addition or logical style op? */
904 switch (e->Iex.Binop.op) {
905 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
906 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000907 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
908 aluOp = Aalu_SUB; break;
909 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
910 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000911 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000912 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000913 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
914 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000915 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000916 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000917 default:
918 aluOp = Aalu_INVALID; break;
919 }
920 /* For commutative ops we assume any literal
921 values are on the second operand. */
922 if (aluOp != Aalu_INVALID) {
923 HReg dst = newVRegI(env);
924 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
925 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
926 addInstr(env, mk_iMOVsd_RR(reg,dst));
927 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
928 return dst;
929 }
930
931 /* Perhaps a shift op? */
932 switch (e->Iex.Binop.op) {
933 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
934 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +0000935 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
936 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000937 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
938 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +0000939 default:
940 shOp = Ash_INVALID; break;
941 }
942 if (shOp != Ash_INVALID) {
943 HReg dst = newVRegI(env);
944
945 /* regL = the value to be shifted */
946 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
947 addInstr(env, mk_iMOVsd_RR(regL,dst));
948
949 /* Do any necessary widening for 32/16/8 bit operands */
950 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +0000951 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
952 break;
sewardj85520e42005-02-19 15:22:38 +0000953 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +0000954 break;
sewardj85520e42005-02-19 15:22:38 +0000955 case Iop_Shr8:
956 addInstr(env, AMD64Instr_Alu64R(
957 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
958 break;
959 case Iop_Shr16:
960 addInstr(env, AMD64Instr_Alu64R(
961 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
962 break;
sewardjb095fba2005-02-13 14:13:04 +0000963 case Iop_Shr32:
sewardj909c06d2005-02-19 22:47:41 +0000964 addInstr(env, AMD64Instr_MovZLQ(dst,dst));
sewardjb095fba2005-02-13 14:13:04 +0000965 break;
sewardje83d9b22005-08-13 23:58:34 +0000966 case Iop_Sar8:
967 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
968 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
969 break;
970 case Iop_Sar16:
971 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
972 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
973 break;
sewardj05b3b6a2005-02-04 01:44:33 +0000974 case Iop_Sar32:
sewardj501a3392005-05-11 15:37:50 +0000975 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, dst));
976 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 32, dst));
sewardj05b3b6a2005-02-04 01:44:33 +0000977 break;
978 default:
sewardj909c06d2005-02-19 22:47:41 +0000979 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +0000980 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +0000981 }
982
983 /* Now consider the shift amount. If it's a literal, we
984 can do a much better job than the general case. */
985 if (e->Iex.Binop.arg2->tag == Iex_Const) {
986 /* assert that the IR is well-typed */
987 Int nshift;
988 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
989 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
990 vassert(nshift >= 0);
991 if (nshift > 0)
992 /* Can't allow nshift==0 since that means %cl */
sewardj501a3392005-05-11 15:37:50 +0000993 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
sewardj8258a8c2005-02-02 03:11:24 +0000994 } else {
995 /* General case; we have to force the amount into %cl. */
996 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
997 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
sewardj501a3392005-05-11 15:37:50 +0000998 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
sewardj8258a8c2005-02-02 03:11:24 +0000999 }
1000 return dst;
1001 }
1002
sewardj8711f662005-05-09 17:52:56 +00001003 /* Deal with 64-bit SIMD binary ops */
1004 second_is_UInt = False;
1005 switch (e->Iex.Binop.op) {
1006 case Iop_Add8x8:
1007 fn = (HWord)h_generic_calc_Add8x8; break;
1008 case Iop_Add16x4:
1009 fn = (HWord)h_generic_calc_Add16x4; break;
1010 case Iop_Add32x2:
1011 fn = (HWord)h_generic_calc_Add32x2; break;
sewardja7ba8c42005-05-10 20:08:34 +00001012
1013 case Iop_Avg8Ux8:
1014 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1015 case Iop_Avg16Ux4:
1016 fn = (HWord)h_generic_calc_Avg16Ux4; break;
sewardj8711f662005-05-09 17:52:56 +00001017
1018 case Iop_CmpEQ8x8:
1019 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1020 case Iop_CmpEQ16x4:
1021 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1022 case Iop_CmpEQ32x2:
1023 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1024
1025 case Iop_CmpGT8Sx8:
1026 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1027 case Iop_CmpGT16Sx4:
1028 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1029 case Iop_CmpGT32Sx2:
1030 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1031
1032 case Iop_InterleaveHI8x8:
1033 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1034 case Iop_InterleaveLO8x8:
1035 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1036 case Iop_InterleaveHI16x4:
1037 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1038 case Iop_InterleaveLO16x4:
1039 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1040 case Iop_InterleaveHI32x2:
1041 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1042 case Iop_InterleaveLO32x2:
1043 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
sewardjd166e282008-02-06 11:42:45 +00001044 case Iop_CatOddLanes16x4:
1045 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1046 case Iop_CatEvenLanes16x4:
1047 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1048 case Iop_Perm8x8:
1049 fn = (HWord)h_generic_calc_Perm8x8; break;
sewardj8711f662005-05-09 17:52:56 +00001050
sewardja7ba8c42005-05-10 20:08:34 +00001051 case Iop_Max8Ux8:
1052 fn = (HWord)h_generic_calc_Max8Ux8; break;
1053 case Iop_Max16Sx4:
1054 fn = (HWord)h_generic_calc_Max16Sx4; break;
1055 case Iop_Min8Ux8:
1056 fn = (HWord)h_generic_calc_Min8Ux8; break;
1057 case Iop_Min16Sx4:
1058 fn = (HWord)h_generic_calc_Min16Sx4; break;
sewardj8711f662005-05-09 17:52:56 +00001059
1060 case Iop_Mul16x4:
1061 fn = (HWord)h_generic_calc_Mul16x4; break;
sewardjd166e282008-02-06 11:42:45 +00001062 case Iop_Mul32x2:
1063 fn = (HWord)h_generic_calc_Mul32x2; break;
sewardj8711f662005-05-09 17:52:56 +00001064 case Iop_MulHi16Sx4:
1065 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
sewardja7ba8c42005-05-10 20:08:34 +00001066 case Iop_MulHi16Ux4:
1067 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1068
sewardj8711f662005-05-09 17:52:56 +00001069 case Iop_QAdd8Sx8:
1070 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1071 case Iop_QAdd16Sx4:
1072 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1073 case Iop_QAdd8Ux8:
1074 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1075 case Iop_QAdd16Ux4:
1076 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1077
1078 case Iop_QNarrow32Sx2:
1079 fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
1080 case Iop_QNarrow16Sx4:
1081 fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
1082 case Iop_QNarrow16Ux4:
1083 fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
1084
1085 case Iop_QSub8Sx8:
1086 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1087 case Iop_QSub16Sx4:
1088 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1089 case Iop_QSub8Ux8:
1090 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1091 case Iop_QSub16Ux4:
1092 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1093
1094 case Iop_Sub8x8:
1095 fn = (HWord)h_generic_calc_Sub8x8; break;
1096 case Iop_Sub16x4:
1097 fn = (HWord)h_generic_calc_Sub16x4; break;
1098 case Iop_Sub32x2:
1099 fn = (HWord)h_generic_calc_Sub32x2; break;
1100
1101 case Iop_ShlN32x2:
1102 fn = (HWord)h_generic_calc_ShlN32x2;
1103 second_is_UInt = True;
1104 break;
1105 case Iop_ShlN16x4:
1106 fn = (HWord)h_generic_calc_ShlN16x4;
1107 second_is_UInt = True;
1108 break;
sewardjd166e282008-02-06 11:42:45 +00001109 case Iop_ShlN8x8:
1110 fn = (HWord)h_generic_calc_ShlN8x8;
1111 second_is_UInt = True;
1112 break;
sewardj8711f662005-05-09 17:52:56 +00001113 case Iop_ShrN32x2:
1114 fn = (HWord)h_generic_calc_ShrN32x2;
1115 second_is_UInt = True;
1116 break;
1117 case Iop_ShrN16x4:
1118 fn = (HWord)h_generic_calc_ShrN16x4;
1119 second_is_UInt = True;
1120 break;
1121 case Iop_SarN32x2:
1122 fn = (HWord)h_generic_calc_SarN32x2;
1123 second_is_UInt = True;
1124 break;
1125 case Iop_SarN16x4:
1126 fn = (HWord)h_generic_calc_SarN16x4;
1127 second_is_UInt = True;
1128 break;
sewardj02f79f12007-09-01 18:59:53 +00001129 case Iop_SarN8x8:
1130 fn = (HWord)h_generic_calc_SarN8x8;
1131 second_is_UInt = True;
1132 break;
sewardj8711f662005-05-09 17:52:56 +00001133
1134 default:
1135 fn = (HWord)0; break;
1136 }
1137 if (fn != (HWord)0) {
1138 /* Note: the following assumes all helpers are of signature
1139 ULong fn ( ULong, ULong ), and they are
1140 not marked as regparm functions.
1141 */
1142 HReg dst = newVRegI(env);
1143 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1144 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1145 if (second_is_UInt)
1146 addInstr(env, AMD64Instr_MovZLQ(argR, argR));
1147 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1148 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
1149 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
1150 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1151 return dst;
1152 }
1153
sewardj7de0d3c2005-02-13 02:26:41 +00001154 /* Handle misc other ops. */
1155
sewardj478646f2008-05-01 20:13:04 +00001156 if (e->Iex.Binop.op == Iop_Max32U) {
1157 /* This generates a truly rotten piece of code. Just as well
1158 it doesn't happen very often. */
1159 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1160 HReg src1L = newVRegI(env);
1161 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1162 HReg src2L = newVRegI(env);
1163 HReg dst = newVRegI(env);
1164 addInstr(env, mk_iMOVsd_RR(src1,dst));
1165 addInstr(env, mk_iMOVsd_RR(src1,src1L));
1166 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src1L));
1167 addInstr(env, mk_iMOVsd_RR(src2,src2L));
1168 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src2L));
1169 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, AMD64RMI_Reg(src2L), src1L));
1170 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
1171 return dst;
1172 }
1173
sewardj7de0d3c2005-02-13 02:26:41 +00001174 if (e->Iex.Binop.op == Iop_DivModS64to32
1175 || e->Iex.Binop.op == Iop_DivModU64to32) {
1176 /* 64 x 32 -> (32(rem),32(div)) division */
1177 /* Get the 64-bit operand into edx:eax, and the other into
1178 any old R/M. */
1179 HReg rax = hregAMD64_RAX();
1180 HReg rdx = hregAMD64_RDX();
1181 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00001182 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +00001183 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +00001184 /* Compute the left operand into a reg, and then
1185 put the top half in edx and the bottom in eax. */
1186 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +00001187 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1188 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj501a3392005-05-11 15:37:50 +00001189 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001190 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardj909c06d2005-02-19 22:47:41 +00001191 addInstr(env, AMD64Instr_MovZLQ(rdx,rdx));
1192 addInstr(env, AMD64Instr_MovZLQ(rax,rax));
sewardj501a3392005-05-11 15:37:50 +00001193 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001194 addInstr(env, mk_iMOVsd_RR(rax, dst));
1195 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1196 return dst;
1197 }
1198
1199 if (e->Iex.Binop.op == Iop_32HLto64) {
1200 HReg hi32 = newVRegI(env);
1201 HReg lo32 = newVRegI(env);
1202 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1203 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1204 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1205 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
sewardj501a3392005-05-11 15:37:50 +00001206 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
sewardj909c06d2005-02-19 22:47:41 +00001207 addInstr(env, AMD64Instr_MovZLQ(lo32,lo32));
sewardj7de0d3c2005-02-13 02:26:41 +00001208 addInstr(env, AMD64Instr_Alu64R(
1209 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1210 return hi32;
1211 }
1212
sewardj85520e42005-02-19 15:22:38 +00001213 if (e->Iex.Binop.op == Iop_16HLto32) {
1214 HReg hi16 = newVRegI(env);
1215 HReg lo16 = newVRegI(env);
1216 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1217 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1218 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1219 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
sewardj501a3392005-05-11 15:37:50 +00001220 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
sewardj85520e42005-02-19 15:22:38 +00001221 addInstr(env, AMD64Instr_Alu64R(
1222 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1223 addInstr(env, AMD64Instr_Alu64R(
1224 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1225 return hi16;
1226 }
sewardj7de0d3c2005-02-13 02:26:41 +00001227
sewardja64f8ad2005-04-24 00:26:37 +00001228 if (e->Iex.Binop.op == Iop_8HLto16) {
1229 HReg hi8 = newVRegI(env);
1230 HReg lo8 = newVRegI(env);
1231 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1232 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1233 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1234 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
sewardj501a3392005-05-11 15:37:50 +00001235 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
sewardja64f8ad2005-04-24 00:26:37 +00001236 addInstr(env, AMD64Instr_Alu64R(
1237 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1238 addInstr(env, AMD64Instr_Alu64R(
1239 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1240 return hi8;
1241 }
sewardj85520e42005-02-19 15:22:38 +00001242
1243 if (e->Iex.Binop.op == Iop_MullS32
1244 || e->Iex.Binop.op == Iop_MullS16
1245 || e->Iex.Binop.op == Iop_MullS8
1246 || e->Iex.Binop.op == Iop_MullU32
1247 || e->Iex.Binop.op == Iop_MullU16
1248 || e->Iex.Binop.op == Iop_MullU8) {
1249 HReg a32 = newVRegI(env);
1250 HReg b32 = newVRegI(env);
1251 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1252 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1253 Int shift = 0;
1254 AMD64ShiftOp shr_op = Ash_SHR;
1255 switch (e->Iex.Binop.op) {
1256 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1257 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1258 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1259 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1260 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1261 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1262 default: vassert(0);
1263 }
1264
1265 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1266 addInstr(env, mk_iMOVsd_RR(b32s, b32));
sewardj501a3392005-05-11 15:37:50 +00001267 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1268 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1269 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1270 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
sewardj85520e42005-02-19 15:22:38 +00001271 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1272 return b32;
1273 }
1274
sewardj18303862005-02-21 12:36:54 +00001275 if (e->Iex.Binop.op == Iop_CmpF64) {
1276 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1277 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1278 HReg dst = newVRegI(env);
1279 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1280 /* Mask out irrelevant parts of the result so as to conform
1281 to the CmpF64 definition. */
1282 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1283 return dst;
1284 }
1285
sewardj6c299f32009-12-31 18:00:12 +00001286 if (e->Iex.Binop.op == Iop_F64toI32S
1287 || e->Iex.Binop.op == Iop_F64toI64S) {
1288 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001289 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1290 HReg dst = newVRegI(env);
1291 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001292 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001293 set_SSE_rounding_default(env);
1294 return dst;
1295 }
1296
sewardja3e98302005-02-01 15:55:05 +00001297//.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
1298//.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
1299//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1300//.. HReg dst = newVRegI(env);
1301//..
1302//.. /* Used several times ... */
1303//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1304//..
sewardj7de0d3c2005-02-13 02:26:41 +00001305//.. /* rf now holds the value to be converted, and rrm holds the
sewardja3e98302005-02-01 15:55:05 +00001306//.. rounding mode value, encoded as per the IRRoundingMode
1307//.. enum. The first thing to do is set the FPU's rounding
1308//.. mode accordingly. */
1309//..
1310//.. /* Create a space for the format conversion. */
1311//.. /* subl $4, %esp */
1312//.. sub_from_esp(env, 4);
1313//..
1314//.. /* Set host rounding mode */
1315//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1316//..
1317//.. /* gistw/l %rf, 0(%esp) */
1318//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
1319//..
1320//.. if (sz == 2) {
1321//.. /* movzwl 0(%esp), %dst */
1322//.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1323//.. } else {
1324//.. /* movl 0(%esp), %dst */
1325//.. vassert(sz == 4);
1326//.. addInstr(env, X86Instr_Alu32R(
1327//.. Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1328//.. }
1329//..
1330//.. /* Restore default FPU rounding. */
1331//.. set_FPU_rounding_default( env );
1332//..
1333//.. /* addl $4, %esp */
1334//.. add_to_esp(env, 4);
1335//.. return dst;
1336//.. }
1337//..
1338//.. /* C3210 flags following FPU partial remainder (fprem), both
1339//.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1340//.. if (e->Iex.Binop.op == Iop_PRemC3210F64
1341//.. || e->Iex.Binop.op == Iop_PRem1C3210F64) {
1342//.. HReg junk = newVRegF(env);
1343//.. HReg dst = newVRegI(env);
1344//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1345//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1346//.. addInstr(env, X86Instr_FpBinary(
1347//.. e->Iex.Binop.op==Iop_PRemC3210F64
1348//.. ? Xfp_PREM : Xfp_PREM1,
1349//.. srcL,srcR,junk
1350//.. ));
1351//.. /* The previous pseudo-insn will have left the FPU's C3210
1352//.. flags set correctly. So bag them. */
1353//.. addInstr(env, X86Instr_FpStSW_AX());
1354//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1355//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1356//.. return dst;
1357//.. }
sewardj8258a8c2005-02-02 03:11:24 +00001358
1359 break;
1360 }
1361
sewardjf67eadf2005-02-03 03:53:52 +00001362 /* --------- UNARY OP --------- */
1363 case Iex_Unop: {
sewardj176ad2f2005-04-27 11:55:08 +00001364 /* 32Uto64(8Uto32(expr8)) */
sewardj7f039c42005-02-04 21:13:55 +00001365 DEFINE_PATTERN(p_8Uto64,
1366 unop(Iop_32Uto64, unop(Iop_8Uto32, bind(0)) ) );
1367 if (matchIRExpr(&mi,p_8Uto64,e)) {
1368 IRExpr* expr8 = mi.bindee[0];
1369 HReg dst = newVRegI(env);
1370 HReg src = iselIntExpr_R(env, expr8);
1371 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001372 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1373 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 56, dst));
sewardj7f039c42005-02-04 21:13:55 +00001374 return dst;
1375 }
1376
sewardj176ad2f2005-04-27 11:55:08 +00001377 /* 1Uto8(64to1(expr64)) */
1378 DEFINE_PATTERN( p_1Uto8_64to1,
1379 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1380 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
sewardj05b3b6a2005-02-04 01:44:33 +00001381 IRExpr* expr64 = mi.bindee[0];
1382 HReg dst = newVRegI(env);
1383 HReg src = iselIntExpr_R(env, expr64);
1384 addInstr(env, mk_iMOVsd_RR(src,dst) );
1385 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1386 AMD64RMI_Imm(1), dst));
1387 return dst;
1388 }
1389
sewardja3e98302005-02-01 15:55:05 +00001390//.. /* 16Uto32(LDle(expr32)) */
1391//.. {
1392//.. DECLARE_PATTERN(p_LDle16_then_16Uto32);
1393//.. DEFINE_PATTERN(p_LDle16_then_16Uto32,
1394//.. unop(Iop_16Uto32,IRExpr_LDle(Ity_I16,bind(0))) );
1395//.. if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1396//.. HReg dst = newVRegI(env);
1397//.. X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1398//.. addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1399//.. return dst;
1400//.. }
1401//.. }
sewardjf67eadf2005-02-03 03:53:52 +00001402
1403 switch (e->Iex.Unop.op) {
1404 case Iop_32Uto64: {
1405 HReg dst = newVRegI(env);
1406 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1407 addInstr(env, AMD64Instr_MovZLQ(src,dst) );
1408 return dst;
1409 }
sewardj05b3b6a2005-02-04 01:44:33 +00001410 case Iop_32Sto64: {
1411 HReg dst = newVRegI(env);
1412 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1413 UInt amt = 32;
1414 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001415 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1416 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj05b3b6a2005-02-04 01:44:33 +00001417 return dst;
1418 }
sewardj9b967672005-02-08 11:13:09 +00001419 case Iop_128HIto64: {
1420 HReg rHi, rLo;
1421 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1422 return rHi; /* and abandon rLo */
1423 }
1424 case Iop_128to64: {
1425 HReg rHi, rLo;
1426 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1427 return rLo; /* and abandon rHi */
1428 }
sewardj85520e42005-02-19 15:22:38 +00001429 case Iop_8Uto16:
sewardjec93f982005-06-21 13:51:18 +00001430 case Iop_8Uto32:
sewardj176ad2f2005-04-27 11:55:08 +00001431 case Iop_8Uto64:
1432 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001433 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001434 HReg dst = newVRegI(env);
1435 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001436 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1437 || e->Iex.Unop.op==Iop_16Uto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001438 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001439 addInstr(env, mk_iMOVsd_RR(src,dst) );
1440 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1441 AMD64RMI_Imm(mask), dst));
1442 return dst;
1443 }
sewardj85520e42005-02-19 15:22:38 +00001444 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001445 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001446 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001447 case Iop_16Sto32:
1448 case Iop_16Sto64: {
1449 HReg dst = newVRegI(env);
1450 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001451 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1452 || e->Iex.Unop.op==Iop_16Sto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001453 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001454 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001455 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1456 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj486074e2005-02-08 20:10:04 +00001457 return dst;
1458 }
sewardj85520e42005-02-19 15:22:38 +00001459 case Iop_Not8:
1460 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001461 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001462 case Iop_Not64: {
1463 HReg dst = newVRegI(env);
1464 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1465 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001466 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
sewardjd0a12df2005-02-10 02:07:43 +00001467 return dst;
1468 }
sewardja3e98302005-02-01 15:55:05 +00001469//.. case Iop_64HIto32: {
1470//.. HReg rHi, rLo;
1471//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1472//.. return rHi; /* and abandon rLo .. poor wee thing :-) */
1473//.. }
1474//.. case Iop_64to32: {
1475//.. HReg rHi, rLo;
1476//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1477//.. return rLo; /* similar stupid comment to the above ... */
1478//.. }
de5a70f5c2010-04-01 23:08:59 +00001479 case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001480 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001481 case Iop_64HIto32: {
1482 HReg dst = newVRegI(env);
1483 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1484 Int shift = 0;
1485 switch (e->Iex.Unop.op) {
sewardj9ba870d2010-04-02 11:29:23 +00001486 case Iop_16HIto8: shift = 8; break;
sewardj85520e42005-02-19 15:22:38 +00001487 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001488 case Iop_64HIto32: shift = 32; break;
1489 default: vassert(0);
1490 }
1491 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001492 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
sewardj7de0d3c2005-02-13 02:26:41 +00001493 return dst;
1494 }
sewardj176ad2f2005-04-27 11:55:08 +00001495 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001496 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001497 case Iop_1Uto8: {
1498 HReg dst = newVRegI(env);
1499 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1500 addInstr(env, AMD64Instr_Set64(cond,dst));
1501 return dst;
1502 }
sewardja64f8ad2005-04-24 00:26:37 +00001503 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001504 case Iop_1Sto16:
1505 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001506 case Iop_1Sto64: {
1507 /* could do better than this, but for now ... */
1508 HReg dst = newVRegI(env);
1509 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1510 addInstr(env, AMD64Instr_Set64(cond,dst));
sewardj501a3392005-05-11 15:37:50 +00001511 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1512 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
sewardj42322b52005-04-20 22:57:11 +00001513 return dst;
1514 }
sewardjf53b7352005-04-06 20:01:56 +00001515 case Iop_Ctz64: {
1516 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1517 HReg dst = newVRegI(env);
1518 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1519 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1520 return dst;
1521 }
sewardj537cab02005-04-07 02:03:52 +00001522 case Iop_Clz64: {
1523 /* Count leading zeroes. Do 'bsrq' to establish the index
1524 of the highest set bit, and subtract that value from
1525 63. */
1526 HReg tmp = newVRegI(env);
1527 HReg dst = newVRegI(env);
1528 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1529 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1530 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1531 AMD64RMI_Imm(63), dst));
1532 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1533 AMD64RMI_Reg(tmp), dst));
1534 return dst;
1535 }
sewardjeb17e492007-08-25 23:07:44 +00001536
1537 case Iop_CmpwNEZ64: {
sewardj176ad2f2005-04-27 11:55:08 +00001538 HReg dst = newVRegI(env);
sewardjeb17e492007-08-25 23:07:44 +00001539 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1540 addInstr(env, mk_iMOVsd_RR(src,dst));
sewardj501a3392005-05-11 15:37:50 +00001541 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
sewardjeb17e492007-08-25 23:07:44 +00001542 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1543 AMD64RMI_Reg(src), dst));
1544 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1545 return dst;
1546 }
1547
1548 case Iop_CmpwNEZ32: {
1549 HReg src = newVRegI(env);
1550 HReg dst = newVRegI(env);
1551 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1552 addInstr(env, mk_iMOVsd_RR(pre,src));
1553 addInstr(env, AMD64Instr_MovZLQ(src,src));
1554 addInstr(env, mk_iMOVsd_RR(src,dst));
1555 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1556 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1557 AMD64RMI_Reg(src), dst));
1558 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1559 return dst;
1560 }
1561
1562 case Iop_Left8:
1563 case Iop_Left16:
1564 case Iop_Left32:
1565 case Iop_Left64: {
1566 HReg dst = newVRegI(env);
1567 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1568 addInstr(env, mk_iMOVsd_RR(src, dst));
1569 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1570 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
sewardj176ad2f2005-04-27 11:55:08 +00001571 return dst;
1572 }
sewardj537cab02005-04-07 02:03:52 +00001573
sewardj478fe702005-04-23 01:15:47 +00001574 case Iop_V128to32: {
1575 HReg dst = newVRegI(env);
1576 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1577 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1578 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1579 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1580 return dst;
1581 }
sewardj1a01e652005-02-23 11:39:21 +00001582
1583 /* V128{HI}to64 */
1584 case Iop_V128HIto64:
1585 case Iop_V128to64: {
1586 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
1587 HReg dst = newVRegI(env);
1588 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1589 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
1590 AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
1591 sub_from_rsp(env, 16);
1592 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
1593 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
sewardj25a85812005-05-08 23:03:48 +00001594 AMD64RMI_Mem(rspN), dst ));
sewardj1a01e652005-02-23 11:39:21 +00001595 add_to_rsp(env, 16);
1596 return dst;
1597 }
1598
sewardj924215b2005-03-26 21:50:31 +00001599 /* ReinterpF64asI64(e) */
1600 /* Given an IEEE754 double, produce an I64 with the same bit
1601 pattern. */
1602 case Iop_ReinterpF64asI64: {
1603 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1604 HReg dst = newVRegI(env);
1605 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1606 /* paranoia */
1607 set_SSE_rounding_default(env);
1608 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1609 addInstr(env, AMD64Instr_Alu64R(
1610 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1611 return dst;
1612 }
1613
sewardj79501112008-07-29 09:48:26 +00001614 /* ReinterpF32asI32(e) */
1615 /* Given an IEEE754 single, produce an I64 with the same bit
1616 pattern in the lower half. */
1617 case Iop_ReinterpF32asI32: {
1618 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1619 HReg dst = newVRegI(env);
1620 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1621 /* paranoia */
1622 set_SSE_rounding_default(env);
1623 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1624 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1625 return dst;
1626 }
1627
sewardj85520e42005-02-19 15:22:38 +00001628 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001629 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001630 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001631 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001632 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001633 case Iop_64to32:
1634 /* These are no-ops. */
1635 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001636
1637 default:
1638 break;
1639 }
sewardje7905662005-05-09 18:15:21 +00001640
1641 /* Deal with unary 64-bit SIMD ops. */
1642 switch (e->Iex.Unop.op) {
1643 case Iop_CmpNEZ32x2:
1644 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1645 case Iop_CmpNEZ16x4:
1646 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1647 case Iop_CmpNEZ8x8:
1648 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1649 default:
1650 fn = (HWord)0; break;
1651 }
1652 if (fn != (HWord)0) {
1653 /* Note: the following assumes all helpers are of
1654 signature
1655 ULong fn ( ULong ), and they are
1656 not marked as regparm functions.
1657 */
1658 HReg dst = newVRegI(env);
1659 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1660 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
1661 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 ));
1662 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1663 return dst;
1664 }
1665
sewardjf67eadf2005-02-03 03:53:52 +00001666 break;
1667 }
sewardj8258a8c2005-02-02 03:11:24 +00001668
1669 /* --------- GET --------- */
1670 case Iex_Get: {
1671 if (ty == Ity_I64) {
1672 HReg dst = newVRegI(env);
1673 addInstr(env, AMD64Instr_Alu64R(
1674 Aalu_MOV,
1675 AMD64RMI_Mem(
1676 AMD64AMode_IR(e->Iex.Get.offset,
1677 hregAMD64_RBP())),
1678 dst));
1679 return dst;
1680 }
1681 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1682 HReg dst = newVRegI(env);
1683 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001684 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001685 False,
1686 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1687 dst));
1688 return dst;
1689 }
1690 break;
1691 }
1692
sewardj8d965312005-02-25 02:48:47 +00001693 case Iex_GetI: {
1694 AMD64AMode* am
1695 = genGuestArrayOffset(
1696 env, e->Iex.GetI.descr,
1697 e->Iex.GetI.ix, e->Iex.GetI.bias );
1698 HReg dst = newVRegI(env);
1699 if (ty == Ity_I8) {
1700 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1701 return dst;
1702 }
sewardj1e015d82005-04-23 23:41:46 +00001703 if (ty == Ity_I64) {
1704 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1705 return dst;
1706 }
sewardj8d965312005-02-25 02:48:47 +00001707 break;
1708 }
sewardj05b3b6a2005-02-04 01:44:33 +00001709
1710 /* --------- CCALL --------- */
1711 case Iex_CCall: {
1712 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001713 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001714
1715 /* be very restrictive for now. Only 64-bit ints allowed
sewardje8aaa872005-07-07 13:12:04 +00001716 for args, and 64 or 32 bits for return type. */
1717 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
sewardj05b3b6a2005-02-04 01:44:33 +00001718 goto irreducible;
1719
sewardj7f039c42005-02-04 21:13:55 +00001720 /* Marshal args, do the call. */
sewardj05b3b6a2005-02-04 01:44:33 +00001721 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1722
sewardje8aaa872005-07-07 13:12:04 +00001723 /* Move to dst, and zero out the top 32 bits if the result type is
1724 Ity_I32. Probably overkill, but still .. */
1725 if (e->Iex.CCall.retty == Ity_I64)
1726 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1727 else
1728 addInstr(env, AMD64Instr_MovZLQ(hregAMD64_RAX(), dst));
1729
sewardj05b3b6a2005-02-04 01:44:33 +00001730 return dst;
1731 }
1732
sewardj7f039c42005-02-04 21:13:55 +00001733 /* --------- LITERAL --------- */
1734 /* 64/32/16/8-bit literals */
1735 case Iex_Const:
1736 if (ty == Ity_I64) {
1737 HReg r = newVRegI(env);
1738 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1739 return r;
1740 } else {
1741 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1742 HReg r = newVRegI(env);
1743 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1744 return r;
1745 }
sewardj05b3b6a2005-02-04 01:44:33 +00001746
1747 /* --------- MULTIPLEX --------- */
1748 case Iex_Mux0X: {
1749 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1750 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1751 HReg r8;
1752 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1753 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1754 HReg dst = newVRegI(env);
1755 addInstr(env, mk_iMOVsd_RR(rX,dst));
1756 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
sewardj501a3392005-05-11 15:37:50 +00001757 addInstr(env, AMD64Instr_Test64(0xFF, r8));
sewardj05b3b6a2005-02-04 01:44:33 +00001758 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
1759 return dst;
1760 }
1761 break;
1762 }
sewardj8258a8c2005-02-02 03:11:24 +00001763
sewardjf4c803b2006-09-11 11:07:34 +00001764 /* --------- TERNARY OP --------- */
1765 case Iex_Triop: {
1766 /* C3210 flags following FPU partial remainder (fprem), both
1767 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
sewardj4970e4e2008-10-11 10:07:55 +00001768 if (e->Iex.Triop.op == Iop_PRemC3210F64
1769 || e->Iex.Triop.op == Iop_PRem1C3210F64) {
sewardjf4c803b2006-09-11 11:07:34 +00001770 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1771 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
1772 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
1773 HReg dst = newVRegI(env);
1774 addInstr(env, AMD64Instr_A87Free(2));
1775
1776 /* one arg -> top of x87 stack */
1777 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
1778 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
1779
1780 /* other arg -> top of x87 stack */
1781 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
1782 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
1783
1784 switch (e->Iex.Triop.op) {
1785 case Iop_PRemC3210F64:
1786 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1787 break;
sewardj4970e4e2008-10-11 10:07:55 +00001788 case Iop_PRem1C3210F64:
1789 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1790 break;
sewardjf4c803b2006-09-11 11:07:34 +00001791 default:
1792 vassert(0);
1793 }
1794 /* Ignore the result, and instead make off with the FPU's
1795 C3210 flags (in the status word). */
1796 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1797 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1798 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1799 return dst;
1800 }
1801 break;
1802 }
1803
sewardj8258a8c2005-02-02 03:11:24 +00001804 default:
1805 break;
1806 } /* switch (e->tag) */
1807
1808 /* We get here if no pattern matched. */
1809 irreducible:
1810 ppIRExpr(e);
1811 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1812}
sewardj614b3fb2005-02-02 02:16:03 +00001813
1814
1815/*---------------------------------------------------------*/
1816/*--- ISEL: Integer expression auxiliaries ---*/
1817/*---------------------------------------------------------*/
1818
1819/* --------------------- AMODEs --------------------- */
1820
1821/* Return an AMode which computes the value of the specified
1822 expression, possibly also adding insns to the code list as a
1823 result. The expression may only be a 32-bit one.
1824*/
1825
sewardj8258a8c2005-02-02 03:11:24 +00001826static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1827{
1828 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1829 vassert(sane_AMode(am));
1830 return am;
1831}
1832
1833/* DO NOT CALL THIS DIRECTLY ! */
1834static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1835{
sewardj05b3b6a2005-02-04 01:44:33 +00001836 MatchInfo mi;
1837 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001838 IRType ty = typeOfIRExpr(env->type_env,e);
1839 vassert(ty == Ity_I64);
1840
sewardj05b3b6a2005-02-04 01:44:33 +00001841 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1842 /* bind0 bind1 bind2 bind3 */
1843 DEFINE_PATTERN(p_complex,
1844 binop( Iop_Add64,
1845 binop( Iop_Add64,
1846 bind(0),
1847 binop(Iop_Shl64, bind(1), bind(2))
1848 ),
1849 bind(3)
1850 )
1851 );
1852 if (matchIRExpr(&mi, p_complex, e)) {
1853 IRExpr* expr1 = mi.bindee[0];
1854 IRExpr* expr2 = mi.bindee[1];
1855 IRExpr* imm8 = mi.bindee[2];
1856 IRExpr* simm32 = mi.bindee[3];
1857 if (imm8->tag == Iex_Const
1858 && imm8->Iex.Const.con->tag == Ico_U8
1859 && imm8->Iex.Const.con->Ico.U8 < 4
1860 /* imm8 is OK, now check simm32 */
1861 && simm32->tag == Iex_Const
1862 && simm32->Iex.Const.con->tag == Ico_U64
1863 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1864 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001865 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001866 HReg r1 = iselIntExpr_R(env, expr1);
1867 HReg r2 = iselIntExpr_R(env, expr2);
1868 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1869 return AMD64AMode_IRRS(offset, r1, r2, shift);
1870 }
1871 }
1872
sewardj8258a8c2005-02-02 03:11:24 +00001873 /* Add64(expr1, Shl64(expr2, imm)) */
1874 if (e->tag == Iex_Binop
1875 && e->Iex.Binop.op == Iop_Add64
1876 && e->Iex.Binop.arg2->tag == Iex_Binop
1877 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1878 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1879 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1880 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1881 if (shift == 1 || shift == 2 || shift == 3) {
1882 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1883 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1884 return AMD64AMode_IRRS(0, r1, r2, shift);
1885 }
1886 }
1887
1888 /* Add64(expr,i) */
1889 if (e->tag == Iex_Binop
1890 && e->Iex.Binop.op == Iop_Add64
1891 && e->Iex.Binop.arg2->tag == Iex_Const
1892 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
1893 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
1894 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1895 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00001896 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00001897 r1
1898 );
1899 }
1900
1901 /* Doesn't match anything in particular. Generate it into
1902 a register and use that. */
1903 {
1904 HReg r1 = iselIntExpr_R(env, e);
1905 return AMD64AMode_IR(0, r1);
1906 }
1907}
sewardj614b3fb2005-02-02 02:16:03 +00001908
1909
1910/* --------------------- RMIs --------------------- */
1911
1912/* Similarly, calculate an expression into an X86RMI operand. As with
1913 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1914
1915static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1916{
1917 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1918 /* sanity checks ... */
1919 switch (rmi->tag) {
1920 case Armi_Imm:
1921 return rmi;
1922 case Armi_Reg:
1923 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
1924 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
1925 return rmi;
1926 case Armi_Mem:
1927 vassert(sane_AMode(rmi->Armi.Mem.am));
1928 return rmi;
1929 default:
1930 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
1931 }
1932}
1933
1934/* DO NOT CALL THIS DIRECTLY ! */
1935static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1936{
1937 IRType ty = typeOfIRExpr(env->type_env,e);
1938 vassert(ty == Ity_I64 || ty == Ity_I32
1939 || ty == Ity_I16 || ty == Ity_I8);
1940
1941 /* special case: immediate 64/32/16/8 */
1942 if (e->tag == Iex_Const) {
1943 switch (e->Iex.Const.con->tag) {
1944 case Ico_U64:
1945 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00001946 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00001947 }
1948 break;
1949 case Ico_U32:
1950 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
1951 case Ico_U16:
1952 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
1953 case Ico_U8:
1954 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
1955 default:
1956 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1957 }
1958 }
1959
1960 /* special case: 64-bit GET */
1961 if (e->tag == Iex_Get && ty == Ity_I64) {
1962 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1963 hregAMD64_RBP()));
1964 }
1965
sewardj0852a132005-02-21 08:28:46 +00001966 /* special case: 64-bit load from memory */
sewardje9d8a262009-07-01 08:06:34 +00001967 if (e->tag == Iex_Load && ty == Ity_I64
sewardje768e922009-11-26 17:17:37 +00001968 && e->Iex.Load.end == Iend_LE) {
sewardjaf1ceca2005-06-30 23:31:27 +00001969 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj0852a132005-02-21 08:28:46 +00001970 return AMD64RMI_Mem(am);
1971 }
sewardj614b3fb2005-02-02 02:16:03 +00001972
1973 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00001974 {
1975 HReg r = iselIntExpr_R ( env, e );
1976 return AMD64RMI_Reg(r);
1977 }
sewardj614b3fb2005-02-02 02:16:03 +00001978}
1979
1980
sewardjf67eadf2005-02-03 03:53:52 +00001981/* --------------------- RIs --------------------- */
1982
1983/* Calculate an expression into an AMD64RI operand. As with
1984 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1985 bits. */
1986
1987static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1988{
1989 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
1990 /* sanity checks ... */
1991 switch (ri->tag) {
1992 case Ari_Imm:
1993 return ri;
sewardj80d6e6d2008-05-28 09:40:29 +00001994 case Ari_Reg:
sewardjf67eadf2005-02-03 03:53:52 +00001995 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
1996 vassert(hregIsVirtual(ri->Ari.Reg.reg));
1997 return ri;
1998 default:
1999 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2000 }
2001}
2002
2003/* DO NOT CALL THIS DIRECTLY ! */
2004static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2005{
2006 IRType ty = typeOfIRExpr(env->type_env,e);
2007 vassert(ty == Ity_I64 || ty == Ity_I32
2008 || ty == Ity_I16 || ty == Ity_I8);
2009
2010 /* special case: immediate */
2011 if (e->tag == Iex_Const) {
2012 switch (e->Iex.Const.con->tag) {
2013 case Ico_U64:
2014 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00002015 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00002016 }
2017 break;
2018 case Ico_U32:
2019 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2020 case Ico_U16:
2021 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2022 case Ico_U8:
2023 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2024 default:
2025 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2026 }
2027 }
2028
2029 /* default case: calculate into a register and return that */
2030 {
2031 HReg r = iselIntExpr_R ( env, e );
2032 return AMD64RI_Reg(r);
2033 }
2034}
2035
2036
sewardj05b3b6a2005-02-04 01:44:33 +00002037/* --------------------- RMs --------------------- */
2038
2039/* Similarly, calculate an expression into an AMD64RM operand. As
2040 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2041 bits. */
2042
2043static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2044{
2045 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2046 /* sanity checks ... */
2047 switch (rm->tag) {
2048 case Arm_Reg:
2049 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2050 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2051 return rm;
2052 case Arm_Mem:
2053 vassert(sane_AMode(rm->Arm.Mem.am));
2054 return rm;
2055 default:
2056 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2057 }
2058}
2059
2060/* DO NOT CALL THIS DIRECTLY ! */
2061static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2062{
2063 IRType ty = typeOfIRExpr(env->type_env,e);
2064 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2065
2066 /* special case: 64-bit GET */
2067 if (e->tag == Iex_Get && ty == Ity_I64) {
2068 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2069 hregAMD64_RBP()));
2070 }
2071
2072 /* special case: load from memory */
2073
2074 /* default case: calculate into a register and return that */
2075 {
2076 HReg r = iselIntExpr_R ( env, e );
2077 return AMD64RM_Reg(r);
2078 }
2079}
2080
2081
2082/* --------------------- CONDCODE --------------------- */
2083
2084/* Generate code to evaluated a bit-typed expression, returning the
2085 condition code which would correspond when the expression would
2086 notionally have returned 1. */
2087
2088static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2089{
2090 /* Uh, there's nothing we can sanity check here, unfortunately. */
2091 return iselCondCode_wrk(env,e);
2092}
2093
2094/* DO NOT CALL THIS DIRECTLY ! */
2095static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2096{
sewardjf8c37f72005-02-07 18:55:29 +00002097 MatchInfo mi;
sewardj0af46ab2005-04-26 01:52:29 +00002098
sewardj05b3b6a2005-02-04 01:44:33 +00002099 vassert(e);
2100 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2101
sewardj176ad2f2005-04-27 11:55:08 +00002102 /* var */
sewardjdd40fdf2006-12-24 02:20:24 +00002103 if (e->tag == Iex_RdTmp) {
2104 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj176ad2f2005-04-27 11:55:08 +00002105 HReg dst = newVRegI(env);
2106 addInstr(env, mk_iMOVsd_RR(r64,dst));
2107 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2108 return Acc_NZ;
2109 }
2110
sewardj109e9352005-07-19 08:42:56 +00002111 /* Constant 1:Bit */
2112 if (e->tag == Iex_Const) {
2113 HReg r;
2114 vassert(e->Iex.Const.con->tag == Ico_U1);
2115 vassert(e->Iex.Const.con->Ico.U1 == True
2116 || e->Iex.Const.con->Ico.U1 == False);
2117 r = newVRegI(env);
2118 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2119 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2120 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2121 }
sewardj486074e2005-02-08 20:10:04 +00002122
2123 /* Not1(...) */
2124 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2125 /* Generate code for the arg, and negate the test condition */
2126 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2127 }
2128
sewardj176ad2f2005-04-27 11:55:08 +00002129 /* --- patterns rooted at: 64to1 --- */
2130
sewardj176ad2f2005-04-27 11:55:08 +00002131 /* 64to1 */
2132 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
sewardj501a3392005-05-11 15:37:50 +00002133 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2134 addInstr(env, AMD64Instr_Test64(1,reg));
sewardjf8c37f72005-02-07 18:55:29 +00002135 return Acc_NZ;
2136 }
2137
sewardj176ad2f2005-04-27 11:55:08 +00002138 /* --- patterns rooted at: CmpNEZ8 --- */
2139
2140 /* CmpNEZ8(x) */
2141 if (e->tag == Iex_Unop
2142 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2143 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002144 addInstr(env, AMD64Instr_Test64(0xFF,r));
sewardj176ad2f2005-04-27 11:55:08 +00002145 return Acc_NZ;
2146 }
2147
sewardj86ec28b2005-04-27 13:39:35 +00002148 /* --- patterns rooted at: CmpNEZ16 --- */
2149
2150 /* CmpNEZ16(x) */
2151 if (e->tag == Iex_Unop
2152 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2153 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002154 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
sewardj86ec28b2005-04-27 13:39:35 +00002155 return Acc_NZ;
2156 }
2157
sewardj176ad2f2005-04-27 11:55:08 +00002158 /* --- patterns rooted at: CmpNEZ32 --- */
2159
2160 /* CmpNEZ32(x) */
2161 if (e->tag == Iex_Unop
2162 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2163 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2164 HReg tmp = newVRegI(env);
2165 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2166 addInstr(env, AMD64Instr_MovZLQ(r1,tmp));
2167 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
2168 return Acc_NZ;
2169 }
2170
2171 /* --- patterns rooted at: CmpNEZ64 --- */
2172
sewardj0bc78ab2005-05-11 22:47:32 +00002173 /* CmpNEZ64(Or64(x,y)) */
2174 {
2175 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2176 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2177 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2178 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2179 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2180 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2181 HReg tmp = newVRegI(env);
2182 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2183 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2184 return Acc_NZ;
2185 }
2186 }
2187
sewardj176ad2f2005-04-27 11:55:08 +00002188 /* CmpNEZ64(x) */
2189 if (e->tag == Iex_Unop
2190 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2191 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2192 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2193 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2194 return Acc_NZ;
2195 }
2196
2197 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2198
sewardj42322b52005-04-20 22:57:11 +00002199 /* CmpEQ8 / CmpNE8 */
2200 if (e->tag == Iex_Binop
2201 && (e->Iex.Binop.op == Iop_CmpEQ8
sewardj1fb8c922009-07-12 12:56:53 +00002202 || e->Iex.Binop.op == Iop_CmpNE8
2203 || e->Iex.Binop.op == Iop_CasCmpEQ8
2204 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
sewardj42322b52005-04-20 22:57:11 +00002205 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2206 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2207 HReg r = newVRegI(env);
2208 addInstr(env, mk_iMOVsd_RR(r1,r));
2209 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2210 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2211 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002212 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2213 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
sewardj42322b52005-04-20 22:57:11 +00002214 default: vpanic("iselCondCode(amd64): CmpXX8");
2215 }
2216 }
2217
sewardj0af46ab2005-04-26 01:52:29 +00002218 /* CmpEQ16 / CmpNE16 */
2219 if (e->tag == Iex_Binop
2220 && (e->Iex.Binop.op == Iop_CmpEQ16
sewardj1fb8c922009-07-12 12:56:53 +00002221 || e->Iex.Binop.op == Iop_CmpNE16
2222 || e->Iex.Binop.op == Iop_CasCmpEQ16
2223 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
sewardj0af46ab2005-04-26 01:52:29 +00002224 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2225 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2226 HReg r = newVRegI(env);
2227 addInstr(env, mk_iMOVsd_RR(r1,r));
2228 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2229 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2230 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002231 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2232 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002233 default: vpanic("iselCondCode(amd64): CmpXX16");
2234 }
2235 }
2236
sewardj478fe702005-04-23 01:15:47 +00002237 /* CmpEQ32 / CmpNE32 */
2238 if (e->tag == Iex_Binop
2239 && (e->Iex.Binop.op == Iop_CmpEQ32
sewardj1fb8c922009-07-12 12:56:53 +00002240 || e->Iex.Binop.op == Iop_CmpNE32
2241 || e->Iex.Binop.op == Iop_CasCmpEQ32
2242 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
sewardj478fe702005-04-23 01:15:47 +00002243 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2244 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2245 HReg r = newVRegI(env);
2246 addInstr(env, mk_iMOVsd_RR(r1,r));
2247 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
sewardj501a3392005-05-11 15:37:50 +00002248 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, r));
sewardj478fe702005-04-23 01:15:47 +00002249 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002250 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
2251 case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ;
sewardj176ad2f2005-04-27 11:55:08 +00002252 default: vpanic("iselCondCode(amd64): CmpXX32");
sewardj42322b52005-04-20 22:57:11 +00002253 }
2254 }
sewardjd0a12df2005-02-10 02:07:43 +00002255
2256 /* Cmp*64*(x,y) */
2257 if (e->tag == Iex_Binop
2258 && (e->Iex.Binop.op == Iop_CmpEQ64
2259 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00002260 || e->Iex.Binop.op == Iop_CmpLT64S
2261 || e->Iex.Binop.op == Iop_CmpLT64U
2262 || e->Iex.Binop.op == Iop_CmpLE64S
sewardja9e4a802005-12-26 19:33:55 +00002263 || e->Iex.Binop.op == Iop_CmpLE64U
sewardj1fb8c922009-07-12 12:56:53 +00002264 || e->Iex.Binop.op == Iop_CasCmpEQ64
2265 || e->Iex.Binop.op == Iop_CasCmpNE64)) {
sewardjd0a12df2005-02-10 02:07:43 +00002266 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2267 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2268 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2269 switch (e->Iex.Binop.op) {
sewardj1fb8c922009-07-12 12:56:53 +00002270 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
2271 case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002272 case Iop_CmpLT64S: return Acc_L;
2273 case Iop_CmpLT64U: return Acc_B;
2274 case Iop_CmpLE64S: return Acc_LE;
sewardja9e4a802005-12-26 19:33:55 +00002275 case Iop_CmpLE64U: return Acc_BE;
sewardjd0a12df2005-02-10 02:07:43 +00002276 default: vpanic("iselCondCode(amd64): CmpXX64");
2277 }
2278 }
2279
sewardj05b3b6a2005-02-04 01:44:33 +00002280 ppIRExpr(e);
2281 vpanic("iselCondCode(amd64)");
2282}
2283
2284
sewardj9b967672005-02-08 11:13:09 +00002285/*---------------------------------------------------------*/
2286/*--- ISEL: Integer expressions (128 bit) ---*/
2287/*---------------------------------------------------------*/
2288
2289/* Compute a 128-bit value into a register pair, which is returned as
2290 the first two parameters. As with iselIntExpr_R, these may be
2291 either real or virtual regs; in any case they must not be changed
2292 by subsequent code emitted by the caller. */
2293
2294static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2295 ISelEnv* env, IRExpr* e )
2296{
2297 iselInt128Expr_wrk(rHi, rLo, env, e);
2298# if 0
2299 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2300# endif
2301 vassert(hregClass(*rHi) == HRcInt64);
2302 vassert(hregIsVirtual(*rHi));
2303 vassert(hregClass(*rLo) == HRcInt64);
2304 vassert(hregIsVirtual(*rLo));
2305}
2306
2307/* DO NOT CALL THIS DIRECTLY ! */
2308static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2309 ISelEnv* env, IRExpr* e )
2310{
sewardja3e98302005-02-01 15:55:05 +00002311//.. HWord fn = 0; /* helper fn for most SIMD64 stuff */
sewardj9b967672005-02-08 11:13:09 +00002312 vassert(e);
2313 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2314
sewardja3e98302005-02-01 15:55:05 +00002315//.. /* 64-bit literal */
2316//.. if (e->tag == Iex_Const) {
2317//.. ULong w64 = e->Iex.Const.con->Ico.U64;
2318//.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
2319//.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
2320//.. HReg tLo = newVRegI(env);
2321//.. HReg tHi = newVRegI(env);
2322//.. vassert(e->Iex.Const.con->tag == Ico_U64);
2323//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2324//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2325//.. *rHi = tHi;
2326//.. *rLo = tLo;
2327//.. return;
2328//.. }
sewardj9b967672005-02-08 11:13:09 +00002329
2330 /* read 128-bit IRTemp */
sewardjdd40fdf2006-12-24 02:20:24 +00002331 if (e->tag == Iex_RdTmp) {
2332 lookupIRTemp128( rHi, rLo, env, e->Iex.RdTmp.tmp);
sewardj9b967672005-02-08 11:13:09 +00002333 return;
2334 }
2335
sewardja3e98302005-02-01 15:55:05 +00002336//.. /* 64-bit load */
2337//.. if (e->tag == Iex_LDle) {
2338//.. HReg tLo, tHi;
2339//.. X86AMode *am0, *am4;
2340//.. vassert(e->Iex.LDle.ty == Ity_I64);
2341//.. tLo = newVRegI(env);
2342//.. tHi = newVRegI(env);
2343//.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2344//.. am4 = advance4(am0);
2345//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2346//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2347//.. *rHi = tHi;
2348//.. *rLo = tLo;
2349//.. return;
2350//.. }
2351//..
2352//.. /* 64-bit GET */
2353//.. if (e->tag == Iex_Get) {
2354//.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2355//.. X86AMode* am4 = advance4(am);
2356//.. HReg tLo = newVRegI(env);
2357//.. HReg tHi = newVRegI(env);
2358//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2359//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2360//.. *rHi = tHi;
2361//.. *rLo = tLo;
2362//.. return;
2363//.. }
2364//..
2365//.. /* 64-bit GETI */
2366//.. if (e->tag == Iex_GetI) {
2367//.. X86AMode* am
2368//.. = genGuestArrayOffset( env, e->Iex.GetI.descr,
2369//.. e->Iex.GetI.ix, e->Iex.GetI.bias );
2370//.. X86AMode* am4 = advance4(am);
2371//.. HReg tLo = newVRegI(env);
2372//.. HReg tHi = newVRegI(env);
2373//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2374//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2375//.. *rHi = tHi;
2376//.. *rLo = tLo;
2377//.. return;
2378//.. }
2379//..
2380//.. /* 64-bit Mux0X */
2381//.. if (e->tag == Iex_Mux0X) {
2382//.. HReg e0Lo, e0Hi, eXLo, eXHi, r8;
2383//.. HReg tLo = newVRegI(env);
2384//.. HReg tHi = newVRegI(env);
2385//.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2386//.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2387//.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2388//.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2389//.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2390//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
2391//.. /* This assumes the first cmov32 doesn't trash the condition
2392//.. codes, so they are still available for the second cmov32 */
2393//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2394//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2395//.. *rHi = tHi;
2396//.. *rLo = tLo;
2397//.. return;
2398//.. }
sewardj9b967672005-02-08 11:13:09 +00002399
2400 /* --------- BINARY ops --------- */
2401 if (e->tag == Iex_Binop) {
2402 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002403 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002404 case Iop_MullU64:
2405 case Iop_MullS64: {
2406 /* get one operand into %rax, and the other into a R/M.
2407 Need to make an educated guess about which is better in
2408 which. */
2409 HReg tLo = newVRegI(env);
2410 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002411 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002412 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2413 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2414 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
sewardj501a3392005-05-11 15:37:50 +00002415 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
sewardj9b967672005-02-08 11:13:09 +00002416 /* Result is now in RDX:RAX. Tell the caller. */
2417 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2418 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2419 *rHi = tHi;
2420 *rLo = tLo;
2421 return;
2422 }
sewardj7de0d3c2005-02-13 02:26:41 +00002423
sewardja6b93d12005-02-17 09:28:28 +00002424 /* 128 x 64 -> (64(rem),64(div)) division */
2425 case Iop_DivModU128to64:
2426 case Iop_DivModS128to64: {
2427 /* Get the 128-bit operand into rdx:rax, and the other into
2428 any old R/M. */
2429 HReg sHi, sLo;
2430 HReg tLo = newVRegI(env);
2431 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002432 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002433 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2434 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2435 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2436 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2437 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2438 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2439 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2440 *rHi = tHi;
2441 *rLo = tLo;
2442 return;
2443 }
2444
2445 /* 64HLto128(e1,e2) */
2446 case Iop_64HLto128:
2447 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2448 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2449 return;
2450
sewardja3e98302005-02-01 15:55:05 +00002451//.. /* Or64/And64/Xor64 */
2452//.. case Iop_Or64:
2453//.. case Iop_And64:
2454//.. case Iop_Xor64: {
2455//.. HReg xLo, xHi, yLo, yHi;
2456//.. HReg tLo = newVRegI(env);
2457//.. HReg tHi = newVRegI(env);
2458//.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2459//.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2460//.. : Xalu_XOR;
2461//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2462//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2463//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2464//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2465//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2466//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2467//.. *rHi = tHi;
2468//.. *rLo = tLo;
2469//.. return;
2470//.. }
2471//..
2472//.. /* Add64/Sub64 */
2473//.. case Iop_Add64:
2474//.. case Iop_Sub64: {
2475//.. HReg xLo, xHi, yLo, yHi;
2476//.. HReg tLo = newVRegI(env);
2477//.. HReg tHi = newVRegI(env);
2478//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2479//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2480//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2481//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2482//.. if (e->Iex.Binop.op==Iop_Add64) {
2483//.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2484//.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2485//.. } else {
2486//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2487//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2488//.. }
2489//.. *rHi = tHi;
2490//.. *rLo = tLo;
2491//.. return;
2492//.. }
2493//..
2494//.. /* 32HLto64(e1,e2) */
2495//.. case Iop_32HLto64:
2496//.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2497//.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2498//.. return;
2499//..
2500//.. /* 64-bit shifts */
2501//.. case Iop_Shl64: {
2502//.. /* We use the same ingenious scheme as gcc. Put the value
2503//.. to be shifted into %hi:%lo, and the shift amount into
2504//.. %cl. Then (dsts on right, a la ATT syntax):
2505//..
2506//.. shldl %cl, %lo, %hi -- make %hi be right for the
2507//.. -- shift amt %cl % 32
2508//.. shll %cl, %lo -- make %lo be right for the
2509//.. -- shift amt %cl % 32
2510//..
2511//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2512//.. we have to do a fixup, which puts the result low half
2513//.. into the result high half, and zeroes the low half:
2514//..
2515//.. testl $32, %ecx
2516//..
2517//.. cmovnz %lo, %hi
2518//.. movl $0, %tmp -- sigh; need yet another reg
2519//.. cmovnz %tmp, %lo
2520//.. */
2521//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2522//.. tLo = newVRegI(env);
2523//.. tHi = newVRegI(env);
2524//.. tTemp = newVRegI(env);
2525//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2526//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2527//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2528//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2529//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2530//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2531//.. and those regs are legitimately modifiable. */
2532//.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2533//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
2534//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2535//.. X86RM_Reg(hregX86_ECX())));
2536//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2537//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2538//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2539//.. *rHi = tHi;
2540//.. *rLo = tLo;
2541//.. return;
2542//.. }
2543//..
2544//.. case Iop_Shr64: {
2545//.. /* We use the same ingenious scheme as gcc. Put the value
2546//.. to be shifted into %hi:%lo, and the shift amount into
2547//.. %cl. Then:
2548//..
2549//.. shrdl %cl, %hi, %lo -- make %lo be right for the
2550//.. -- shift amt %cl % 32
2551//.. shrl %cl, %hi -- make %hi be right for the
2552//.. -- shift amt %cl % 32
2553//..
2554//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2555//.. we have to do a fixup, which puts the result high half
2556//.. into the result low half, and zeroes the high half:
2557//..
2558//.. testl $32, %ecx
2559//..
2560//.. cmovnz %hi, %lo
2561//.. movl $0, %tmp -- sigh; need yet another reg
2562//.. cmovnz %tmp, %hi
2563//.. */
2564//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2565//.. tLo = newVRegI(env);
2566//.. tHi = newVRegI(env);
2567//.. tTemp = newVRegI(env);
2568//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2569//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2570//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2571//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2572//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2573//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2574//.. and those regs are legitimately modifiable. */
2575//.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2576//.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
2577//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2578//.. X86RM_Reg(hregX86_ECX())));
2579//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2580//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2581//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2582//.. *rHi = tHi;
2583//.. *rLo = tLo;
2584//.. return;
2585//.. }
2586//..
2587//.. /* F64 -> I64 */
2588//.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2589//.. case. Unfortunately I see no easy way to avoid the
2590//.. duplication. */
2591//.. case Iop_F64toI64: {
2592//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2593//.. HReg tLo = newVRegI(env);
2594//.. HReg tHi = newVRegI(env);
2595//..
2596//.. /* Used several times ... */
2597//.. /* Careful ... this sharing is only safe because
2598//.. zero_esp/four_esp do not hold any registers which the
2599//.. register allocator could attempt to swizzle later. */
2600//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2601//.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2602//..
2603//.. /* rf now holds the value to be converted, and rrm holds
2604//.. the rounding mode value, encoded as per the
2605//.. IRRoundingMode enum. The first thing to do is set the
2606//.. FPU's rounding mode accordingly. */
2607//..
2608//.. /* Create a space for the format conversion. */
2609//.. /* subl $8, %esp */
2610//.. sub_from_esp(env, 8);
2611//..
2612//.. /* Set host rounding mode */
2613//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2614//..
2615//.. /* gistll %rf, 0(%esp) */
2616//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2617//..
2618//.. /* movl 0(%esp), %dstLo */
2619//.. /* movl 4(%esp), %dstHi */
2620//.. addInstr(env, X86Instr_Alu32R(
2621//.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2622//.. addInstr(env, X86Instr_Alu32R(
2623//.. Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2624//..
2625//.. /* Restore default FPU rounding. */
2626//.. set_FPU_rounding_default( env );
2627//..
2628//.. /* addl $8, %esp */
2629//.. add_to_esp(env, 8);
2630//..
2631//.. *rHi = tHi;
2632//.. *rLo = tLo;
2633//.. return;
2634//.. }
2635//..
sewardj9b967672005-02-08 11:13:09 +00002636 default:
2637 break;
2638 }
2639 } /* if (e->tag == Iex_Binop) */
2640
2641
sewardja3e98302005-02-01 15:55:05 +00002642//.. /* --------- UNARY ops --------- */
2643//.. if (e->tag == Iex_Unop) {
2644//.. switch (e->Iex.Unop.op) {
2645//..
2646//.. /* 32Sto64(e) */
2647//.. case Iop_32Sto64: {
2648//.. HReg tLo = newVRegI(env);
2649//.. HReg tHi = newVRegI(env);
2650//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2651//.. addInstr(env, mk_iMOVsd_RR(src,tHi));
2652//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2653//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
2654//.. *rHi = tHi;
2655//.. *rLo = tLo;
2656//.. return;
2657//.. }
2658//..
2659//.. /* 32Uto64(e) */
2660//.. case Iop_32Uto64: {
2661//.. HReg tLo = newVRegI(env);
2662//.. HReg tHi = newVRegI(env);
2663//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2664//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2665//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2666//.. *rHi = tHi;
2667//.. *rLo = tLo;
2668//.. return;
2669//.. }
sewardj1a01e652005-02-23 11:39:21 +00002670
sewardja3e98302005-02-01 15:55:05 +00002671//.. /* could do better than this, but for now ... */
2672//.. case Iop_1Sto64: {
2673//.. HReg tLo = newVRegI(env);
2674//.. HReg tHi = newVRegI(env);
2675//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2676//.. addInstr(env, X86Instr_Set32(cond,tLo));
2677//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
2678//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
2679//.. addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2680//.. *rHi = tHi;
2681//.. *rLo = tLo;
2682//.. return;
2683//.. }
2684//..
2685//.. /* Not64(e) */
2686//.. case Iop_Not64: {
2687//.. HReg tLo = newVRegI(env);
2688//.. HReg tHi = newVRegI(env);
2689//.. HReg sHi, sLo;
2690//.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2691//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2692//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2693//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
2694//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
2695//.. *rHi = tHi;
2696//.. *rLo = tLo;
2697//.. return;
2698//.. }
2699//..
sewardja3e98302005-02-01 15:55:05 +00002700//.. default:
2701//.. break;
2702//.. }
2703//.. } /* if (e->tag == Iex_Unop) */
2704//..
2705//..
2706//.. /* --------- CCALL --------- */
2707//.. if (e->tag == Iex_CCall) {
2708//.. HReg tLo = newVRegI(env);
2709//.. HReg tHi = newVRegI(env);
2710//..
2711//.. /* Marshal args, do the call, clear stack. */
2712//.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2713//..
2714//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2715//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2716//.. *rHi = tHi;
2717//.. *rLo = tLo;
2718//.. return;
2719//.. }
sewardj9b967672005-02-08 11:13:09 +00002720
2721 ppIRExpr(e);
2722 vpanic("iselInt128Expr");
2723}
2724
2725
sewardj8d965312005-02-25 02:48:47 +00002726/*---------------------------------------------------------*/
2727/*--- ISEL: Floating point expressions (32 bit) ---*/
2728/*---------------------------------------------------------*/
2729
2730/* Nothing interesting here; really just wrappers for
2731 64-bit stuff. */
2732
2733static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2734{
2735 HReg r = iselFltExpr_wrk( env, e );
2736# if 0
2737 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2738# endif
2739 vassert(hregClass(r) == HRcVec128);
2740 vassert(hregIsVirtual(r));
2741 return r;
2742}
2743
2744/* DO NOT CALL THIS DIRECTLY */
2745static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2746{
2747 IRType ty = typeOfIRExpr(env->type_env,e);
2748 vassert(ty == Ity_F32);
2749
sewardjdd40fdf2006-12-24 02:20:24 +00002750 if (e->tag == Iex_RdTmp) {
2751 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardjc49ce232005-02-25 13:03:03 +00002752 }
2753
sewardje768e922009-11-26 17:17:37 +00002754 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardjc49ce232005-02-25 13:03:03 +00002755 AMD64AMode* am;
2756 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002757 vassert(e->Iex.Load.ty == Ity_F32);
2758 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardjc49ce232005-02-25 13:03:03 +00002759 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2760 return res;
2761 }
sewardj8d965312005-02-25 02:48:47 +00002762
2763 if (e->tag == Iex_Binop
2764 && e->Iex.Binop.op == Iop_F64toF32) {
2765 /* Although the result is still held in a standard SSE register,
2766 we need to round it to reflect the loss of accuracy/range
2767 entailed in casting it to a 32-bit float. */
2768 HReg dst = newVRegV(env);
2769 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2770 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2771 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2772 set_SSE_rounding_default( env );
2773 return dst;
2774 }
2775
sewardjc49ce232005-02-25 13:03:03 +00002776 if (e->tag == Iex_Get) {
2777 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2778 hregAMD64_RBP() );
2779 HReg res = newVRegV(env);
2780 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2781 return res;
2782 }
2783
sewardj5992bd02005-05-11 02:13:42 +00002784 if (e->tag == Iex_Unop
2785 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2786 /* Given an I32, produce an IEEE754 float with the same bit
2787 pattern. */
2788 HReg dst = newVRegV(env);
2789 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2790 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2791 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2792 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2793 return dst;
2794 }
sewardj8d965312005-02-25 02:48:47 +00002795
2796 ppIRExpr(e);
2797 vpanic("iselFltExpr_wrk");
2798}
sewardj18303862005-02-21 12:36:54 +00002799
2800
2801/*---------------------------------------------------------*/
2802/*--- ISEL: Floating point expressions (64 bit) ---*/
2803/*---------------------------------------------------------*/
2804
2805/* Compute a 64-bit floating point value into the lower half of an xmm
2806 register, the identity of which is returned. As with
2807 iselIntExpr_R, the returned reg will be virtual, and it must not be
2808 changed by subsequent code emitted by the caller.
2809*/
2810
2811/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2812
2813 Type S (1 bit) E (11 bits) F (52 bits)
2814 ---- --------- ----------- -----------
2815 signalling NaN u 2047 (max) .0uuuuu---u
2816 (with at least
2817 one 1 bit)
2818 quiet NaN u 2047 (max) .1uuuuu---u
2819
2820 negative infinity 1 2047 (max) .000000---0
2821
2822 positive infinity 0 2047 (max) .000000---0
2823
2824 negative zero 1 0 .000000---0
2825
2826 positive zero 0 0 .000000---0
2827*/
2828
2829static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2830{
2831 HReg r = iselDblExpr_wrk( env, e );
2832# if 0
2833 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2834# endif
2835 vassert(hregClass(r) == HRcVec128);
2836 vassert(hregIsVirtual(r));
2837 return r;
2838}
2839
2840/* DO NOT CALL THIS DIRECTLY */
2841static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2842{
2843 IRType ty = typeOfIRExpr(env->type_env,e);
2844 vassert(e);
2845 vassert(ty == Ity_F64);
2846
sewardjdd40fdf2006-12-24 02:20:24 +00002847 if (e->tag == Iex_RdTmp) {
2848 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj18303862005-02-21 12:36:54 +00002849 }
2850
sewardj8d965312005-02-25 02:48:47 +00002851 if (e->tag == Iex_Const) {
2852 union { ULong u64; Double f64; } u;
2853 HReg res = newVRegV(env);
2854 HReg tmp = newVRegI(env);
2855 vassert(sizeof(u) == 8);
2856 vassert(sizeof(u.u64) == 8);
2857 vassert(sizeof(u.f64) == 8);
2858
2859 if (e->Iex.Const.con->tag == Ico_F64) {
2860 u.f64 = e->Iex.Const.con->Ico.F64;
2861 }
2862 else if (e->Iex.Const.con->tag == Ico_F64i) {
2863 u.u64 = e->Iex.Const.con->Ico.F64i;
2864 }
2865 else
2866 vpanic("iselDblExpr(amd64): const");
2867
2868 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2869 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2870 addInstr(env, AMD64Instr_SseLdSt(
2871 True/*load*/, 8, res,
2872 AMD64AMode_IR(0, hregAMD64_RSP())
2873 ));
2874 add_to_rsp(env, 8);
2875 return res;
2876 }
sewardj9da16972005-02-21 13:58:26 +00002877
sewardje768e922009-11-26 17:17:37 +00002878 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj9da16972005-02-21 13:58:26 +00002879 AMD64AMode* am;
2880 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002881 vassert(e->Iex.Load.ty == Ity_F64);
2882 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj9da16972005-02-21 13:58:26 +00002883 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2884 return res;
2885 }
sewardj18303862005-02-21 12:36:54 +00002886
2887 if (e->tag == Iex_Get) {
2888 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2889 hregAMD64_RBP() );
2890 HReg res = newVRegV(env);
2891 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2892 return res;
2893 }
2894
sewardj8d965312005-02-25 02:48:47 +00002895 if (e->tag == Iex_GetI) {
2896 AMD64AMode* am
2897 = genGuestArrayOffset(
2898 env, e->Iex.GetI.descr,
2899 e->Iex.GetI.ix, e->Iex.GetI.bias );
2900 HReg res = newVRegV(env);
2901 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2902 return res;
2903 }
2904
sewardj4796d662006-02-05 16:06:26 +00002905 if (e->tag == Iex_Triop) {
sewardj137015d2005-03-27 04:01:15 +00002906 AMD64SseOp op = Asse_INVALID;
sewardj4796d662006-02-05 16:06:26 +00002907 switch (e->Iex.Triop.op) {
sewardj137015d2005-03-27 04:01:15 +00002908 case Iop_AddF64: op = Asse_ADDF; break;
2909 case Iop_SubF64: op = Asse_SUBF; break;
2910 case Iop_MulF64: op = Asse_MULF; break;
2911 case Iop_DivF64: op = Asse_DIVF; break;
2912 default: break;
2913 }
2914 if (op != Asse_INVALID) {
2915 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00002916 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
2917 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
sewardj137015d2005-03-27 04:01:15 +00002918 addInstr(env, mk_vMOVsd_RR(argL, dst));
sewardj4796d662006-02-05 16:06:26 +00002919 /* XXXROUNDINGFIXME */
2920 /* set roundingmode here */
sewardj137015d2005-03-27 04:01:15 +00002921 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2922 return dst;
2923 }
2924 }
2925
sewardjb183b852006-02-03 16:08:03 +00002926 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
sewardj25a85812005-05-08 23:03:48 +00002927 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2928 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2929 HReg dst = newVRegV(env);
2930
2931 /* rf now holds the value to be rounded. The first thing to do
2932 is set the FPU's rounding mode accordingly. */
2933
2934 /* Set host x87 rounding mode */
2935 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2936
2937 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2938 addInstr(env, AMD64Instr_A87Free(1));
2939 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2940 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2941 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
2942 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2943
2944 /* Restore default x87 rounding. */
2945 set_FPU_rounding_default( env );
2946
2947 return dst;
2948 }
2949
sewardj4796d662006-02-05 16:06:26 +00002950 if (e->tag == Iex_Triop
2951 && (e->Iex.Triop.op == Iop_ScaleF64
2952 || e->Iex.Triop.op == Iop_AtanF64
2953 || e->Iex.Triop.op == Iop_Yl2xF64
sewardjf4c803b2006-09-11 11:07:34 +00002954 || e->Iex.Triop.op == Iop_Yl2xp1F64
sewardj4970e4e2008-10-11 10:07:55 +00002955 || e->Iex.Triop.op == Iop_PRemF64
2956 || e->Iex.Triop.op == Iop_PRem1F64)
sewardj25a85812005-05-08 23:03:48 +00002957 ) {
2958 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00002959 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
2960 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
sewardj25a85812005-05-08 23:03:48 +00002961 HReg dst = newVRegV(env);
sewardjf4c803b2006-09-11 11:07:34 +00002962 Bool arg2first = toBool(e->Iex.Triop.op == Iop_ScaleF64
sewardj4970e4e2008-10-11 10:07:55 +00002963 || e->Iex.Triop.op == Iop_PRemF64
2964 || e->Iex.Triop.op == Iop_PRem1F64);
sewardj25a85812005-05-08 23:03:48 +00002965 addInstr(env, AMD64Instr_A87Free(2));
2966
2967 /* one arg -> top of x87 stack */
2968 addInstr(env, AMD64Instr_SseLdSt(
2969 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
2970 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2971
2972 /* other arg -> top of x87 stack */
2973 addInstr(env, AMD64Instr_SseLdSt(
2974 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
2975 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2976
2977 /* do it */
sewardj4796d662006-02-05 16:06:26 +00002978 /* XXXROUNDINGFIXME */
2979 /* set roundingmode here */
2980 switch (e->Iex.Triop.op) {
sewardj25a85812005-05-08 23:03:48 +00002981 case Iop_ScaleF64:
2982 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2983 break;
2984 case Iop_AtanF64:
2985 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2986 break;
2987 case Iop_Yl2xF64:
2988 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2989 break;
sewardj5e205372005-05-09 02:57:08 +00002990 case Iop_Yl2xp1F64:
2991 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2992 break;
sewardjf4c803b2006-09-11 11:07:34 +00002993 case Iop_PRemF64:
2994 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2995 break;
sewardj4970e4e2008-10-11 10:07:55 +00002996 case Iop_PRem1F64:
2997 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
2998 break;
sewardj25a85812005-05-08 23:03:48 +00002999 default:
3000 vassert(0);
3001 }
3002
3003 /* save result */
3004 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3005 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3006 return dst;
3007 }
sewardj1a01e652005-02-23 11:39:21 +00003008
sewardj6c299f32009-12-31 18:00:12 +00003009 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00003010 HReg dst = newVRegV(env);
3011 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
3012 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
3013 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
3014 set_SSE_rounding_default( env );
3015 return dst;
3016 }
3017
sewardj6c299f32009-12-31 18:00:12 +00003018 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
sewardj1a01e652005-02-23 11:39:21 +00003019 HReg dst = newVRegV(env);
3020 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3021 set_SSE_rounding_default( env );
3022 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
3023 return dst;
3024 }
3025
sewardj137015d2005-03-27 04:01:15 +00003026 if (e->tag == Iex_Unop
3027 && (e->Iex.Unop.op == Iop_NegF64
3028 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00003029 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00003030 /* Get the 128-bit literal 00---0 10---0 into a register
3031 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00003032 HReg r1 = newVRegI(env);
3033 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00003034 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00003035 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3036 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00003037 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00003038 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3039 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3040 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00003041 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3042
3043 if (e->Iex.Unop.op == Iop_NegF64)
3044 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3045 else
3046 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3047
sewardj8d965312005-02-25 02:48:47 +00003048 add_to_rsp(env, 16);
3049 return dst;
3050 }
3051
sewardj4796d662006-02-05 16:06:26 +00003052 if (e->tag == Iex_Binop) {
sewardj25a85812005-05-08 23:03:48 +00003053 A87FpOp fpop = Afp_INVALID;
sewardj4796d662006-02-05 16:06:26 +00003054 switch (e->Iex.Binop.op) {
sewardj25a85812005-05-08 23:03:48 +00003055 case Iop_SqrtF64: fpop = Afp_SQRT; break;
sewardj5e205372005-05-09 02:57:08 +00003056 case Iop_SinF64: fpop = Afp_SIN; break;
3057 case Iop_CosF64: fpop = Afp_COS; break;
3058 case Iop_TanF64: fpop = Afp_TAN; break;
sewardj25a85812005-05-08 23:03:48 +00003059 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3060 default: break;
3061 }
3062 if (fpop != Afp_INVALID) {
3063 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00003064 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
sewardj25a85812005-05-08 23:03:48 +00003065 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00003066 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
sewardj25a85812005-05-08 23:03:48 +00003067 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
sewardj5e205372005-05-09 02:57:08 +00003068 addInstr(env, AMD64Instr_A87Free(nNeeded));
sewardj25a85812005-05-08 23:03:48 +00003069 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
sewardj4796d662006-02-05 16:06:26 +00003070 /* XXXROUNDINGFIXME */
3071 /* set roundingmode here */
sewardj25a85812005-05-08 23:03:48 +00003072 addInstr(env, AMD64Instr_A87FpOp(fpop));
sewardj4796d662006-02-05 16:06:26 +00003073 if (e->Iex.Binop.op==Iop_TanF64) {
sewardj5e205372005-05-09 02:57:08 +00003074 /* get rid of the extra 1.0 that fptan pushes */
3075 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3076 }
sewardj25a85812005-05-08 23:03:48 +00003077 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3078 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3079 return dst;
3080 }
3081 }
sewardjc49ce232005-02-25 13:03:03 +00003082
3083 if (e->tag == Iex_Unop) {
3084 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00003085//.. case Iop_I32toF64: {
3086//.. HReg dst = newVRegF(env);
3087//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3088//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3089//.. set_FPU_rounding_default(env);
3090//.. addInstr(env, X86Instr_FpLdStI(
3091//.. True/*load*/, 4, dst,
3092//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00003093//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00003094//.. return dst;
3095//.. }
sewardj924215b2005-03-26 21:50:31 +00003096 case Iop_ReinterpI64asF64: {
3097 /* Given an I64, produce an IEEE754 double with the same
3098 bit pattern. */
3099 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3100 HReg dst = newVRegV(env);
3101 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3102 /* paranoia */
3103 set_SSE_rounding_default(env);
3104 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3105 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3106 return dst;
3107 }
sewardjc49ce232005-02-25 13:03:03 +00003108 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00003109 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00003110 HReg f64 = newVRegV(env);
3111 /* this shouldn't be necessary, but be paranoid ... */
3112 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00003113 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00003114 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3115 return f64;
3116 }
3117 default:
3118 break;
3119 }
3120 }
sewardj8d965312005-02-25 02:48:47 +00003121
3122 /* --------- MULTIPLEX --------- */
3123 if (e->tag == Iex_Mux0X) {
3124 HReg r8, rX, r0, dst;
3125 vassert(ty == Ity_F64);
3126 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
3127 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3128 rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3129 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3130 dst = newVRegV(env);
3131 addInstr(env, mk_vMOVsd_RR(rX,dst));
sewardj501a3392005-05-11 15:37:50 +00003132 addInstr(env, AMD64Instr_Test64(0xFF, r8));
sewardj8d965312005-02-25 02:48:47 +00003133 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3134 return dst;
3135 }
sewardj18303862005-02-21 12:36:54 +00003136
3137 ppIRExpr(e);
3138 vpanic("iselDblExpr_wrk");
3139}
sewardjc2bcb6f2005-02-07 00:17:12 +00003140
sewardj0852a132005-02-21 08:28:46 +00003141
3142/*---------------------------------------------------------*/
3143/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3144/*---------------------------------------------------------*/
3145
3146static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3147{
3148 HReg r = iselVecExpr_wrk( env, e );
3149# if 0
3150 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3151# endif
3152 vassert(hregClass(r) == HRcVec128);
3153 vassert(hregIsVirtual(r));
3154 return r;
3155}
3156
3157
3158/* DO NOT CALL THIS DIRECTLY */
3159static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3160{
sewardj9da16972005-02-21 13:58:26 +00003161 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00003162 AMD64SseOp op = Asse_INVALID;
3163 IRType ty = typeOfIRExpr(env->type_env,e);
3164 vassert(e);
3165 vassert(ty == Ity_V128);
3166
sewardjdd40fdf2006-12-24 02:20:24 +00003167 if (e->tag == Iex_RdTmp) {
3168 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj0852a132005-02-21 08:28:46 +00003169 }
3170
3171 if (e->tag == Iex_Get) {
3172 HReg dst = newVRegV(env);
3173 addInstr(env, AMD64Instr_SseLdSt(
3174 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00003175 16,
sewardj0852a132005-02-21 08:28:46 +00003176 dst,
3177 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3178 )
3179 );
3180 return dst;
3181 }
3182
sewardje768e922009-11-26 17:17:37 +00003183 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj1a01e652005-02-23 11:39:21 +00003184 HReg dst = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00003185 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj1a01e652005-02-23 11:39:21 +00003186 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3187 return dst;
3188 }
3189
3190 if (e->tag == Iex_Const) {
3191 HReg dst = newVRegV(env);
3192 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj9ba870d2010-04-02 11:29:23 +00003193 switch (e->Iex.Const.con->Ico.V128) {
3194 case 0x0000:
3195 dst = generate_zeroes_V128(env);
3196 return dst;
3197 case 0xFFFF:
3198 dst = generate_ones_V128(env);
3199 return dst;
3200 default:
3201 break;
sewardj1a01e652005-02-23 11:39:21 +00003202 }
sewardj9ba870d2010-04-02 11:29:23 +00003203 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
ded403e792010-04-02 14:15:58 +00003204 const ULong const_z64 = 0x0000000000000000ULL;
3205 const ULong const_o64 = 0xFFFFFFFFFFFFFFFFULL;
3206 const ULong const_z32o32 = 0x00000000FFFFFFFFULL;
3207 const ULong const_o32z32 = 0xFFFFFFFF00000000ULL;
sewardj9ba870d2010-04-02 11:29:23 +00003208 switch (e->Iex.Const.con->Ico.V128) {
3209 case 0x0000: case 0xFFFF:
3210 vassert(0); /* handled just above */
ded403e792010-04-02 14:15:58 +00003211 /* do push_uimm64 twice, first time for the high-order half. */
3212 case 0x00F0:
3213 push_uimm64(env, const_z64);
3214 push_uimm64(env, const_o32z32);
3215 break;
sewardj9ba870d2010-04-02 11:29:23 +00003216 case 0x00FF:
ded403e792010-04-02 14:15:58 +00003217 push_uimm64(env, const_z64);
3218 push_uimm64(env, const_o64);
sewardj9ba870d2010-04-02 11:29:23 +00003219 break;
ded403e792010-04-02 14:15:58 +00003220 case 0x000F:
3221 push_uimm64(env, const_z64);
3222 push_uimm64(env, const_z32o32);
sewardj9ba870d2010-04-02 11:29:23 +00003223 break;
ded403e792010-04-02 14:15:58 +00003224 case 0x0F00:
3225 push_uimm64(env, const_z32o32);
3226 push_uimm64(env, const_z64);
3227 break;
3228 case 0x0F0F:
3229 push_uimm64(env, const_z32o32);
3230 push_uimm64(env, const_z32o32);
3231 break;
3232 case 0x0FF0:
3233 push_uimm64(env, const_z32o32);
3234 push_uimm64(env, const_o32z32);
3235 break;
3236 case 0x0FFF:
3237 push_uimm64(env, const_z32o32);
3238 push_uimm64(env, const_o64);
3239 break;
3240 case 0xF000:
3241 push_uimm64(env, const_o32z32);
3242 push_uimm64(env, const_z64);
3243 break;
3244 case 0xF00F:
3245 push_uimm64(env, const_o32z32);
3246 push_uimm64(env, const_z32o32);
3247 break;
3248 case 0xF0F0:
3249 push_uimm64(env, const_o32z32);
3250 push_uimm64(env, const_o32z32);
3251 break;
3252 case 0xF0FF:
3253 push_uimm64(env, const_o32z32);
3254 push_uimm64(env, const_o64);
3255 break;
sewardj9ba870d2010-04-02 11:29:23 +00003256 case 0xFF00:
ded403e792010-04-02 14:15:58 +00003257 push_uimm64(env, const_o64);
3258 push_uimm64(env, const_z64);
3259 break;
3260 case 0xFF0F:
3261 push_uimm64(env, const_o64);
3262 push_uimm64(env, const_z32o32);
3263 break;
3264 case 0xFFF0:
3265 push_uimm64(env, const_o64);
3266 push_uimm64(env, const_o32z32);
sewardj9ba870d2010-04-02 11:29:23 +00003267 break;
3268 default:
3269 goto vec_fail;
3270 }
3271 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3272 add_to_rsp(env, 16);
3273 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003274 }
sewardj0852a132005-02-21 08:28:46 +00003275
3276 if (e->tag == Iex_Unop) {
3277 switch (e->Iex.Unop.op) {
3278
sewardj8d965312005-02-25 02:48:47 +00003279 case Iop_NotV128: {
3280 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3281 return do_sse_NotV128(env, arg);
3282 }
3283
sewardj09717342005-05-05 21:34:02 +00003284 case Iop_CmpNEZ64x2: {
3285 /* We can use SSE2 instructions for this. */
3286 /* Ideally, we want to do a 64Ix2 comparison against zero of
3287 the operand. Problem is no such insn exists. Solution
3288 therefore is to do a 32Ix4 comparison instead, and bitwise-
3289 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3290 let the not'd result of this initial comparison be a:b:c:d.
3291 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3292 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3293 giving the required result.
3294
3295 The required selection sequence is 2,3,0,1, which
3296 according to Intel's documentation means the pshufd
3297 literal value is 0xB1, that is,
3298 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3299 */
3300 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
sewardjac530442005-05-11 16:13:37 +00003301 HReg tmp = generate_zeroes_V128(env);
sewardj09717342005-05-05 21:34:02 +00003302 HReg dst = newVRegV(env);
sewardj09717342005-05-05 21:34:02 +00003303 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3304 tmp = do_sse_NotV128(env, tmp);
3305 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3306 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3307 return dst;
3308 }
3309
sewardjac530442005-05-11 16:13:37 +00003310 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3311 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3312 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3313 do_CmpNEZ_vector:
3314 {
3315 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3316 HReg tmp = newVRegV(env);
3317 HReg zero = generate_zeroes_V128(env);
3318 HReg dst;
3319 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3320 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3321 dst = do_sse_NotV128(env, tmp);
3322 return dst;
3323 }
sewardja7ba8c42005-05-10 20:08:34 +00003324
3325 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3326 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3327 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
3328 do_32Fx4_unary:
3329 {
3330 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3331 HReg dst = newVRegV(env);
3332 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3333 return dst;
3334 }
3335
sewardja3e98302005-02-01 15:55:05 +00003336//.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
sewardj97628592005-05-10 22:42:54 +00003337//.. case Iop_RSqrt64Fx2: op = Asse_RSQRTF; goto do_64Fx2_unary;
3338 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
3339 do_64Fx2_unary:
3340 {
3341 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3342 HReg dst = newVRegV(env);
3343 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3344 return dst;
3345 }
sewardja7ba8c42005-05-10 20:08:34 +00003346
3347 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3348 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3349 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
3350 do_32F0x4_unary:
3351 {
3352 /* A bit subtle. We have to copy the arg to the result
3353 register first, because actually doing the SSE scalar insn
3354 leaves the upper 3/4 of the destination register
3355 unchanged. Whereas the required semantics of these
3356 primops is that the upper 3/4 is simply copied in from the
3357 argument. */
3358 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3359 HReg dst = newVRegV(env);
3360 addInstr(env, mk_vMOVsd_RR(arg, dst));
3361 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3362 return dst;
3363 }
3364
sewardja3e98302005-02-01 15:55:05 +00003365//.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3366//.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
sewardj0852a132005-02-21 08:28:46 +00003367 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3368 do_64F0x2_unary:
3369 {
3370 /* A bit subtle. We have to copy the arg to the result
3371 register first, because actually doing the SSE scalar insn
3372 leaves the upper half of the destination register
3373 unchanged. Whereas the required semantics of these
3374 primops is that the upper half is simply copied in from the
3375 argument. */
3376 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3377 HReg dst = newVRegV(env);
3378 addInstr(env, mk_vMOVsd_RR(arg, dst));
3379 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3380 return dst;
3381 }
3382
sewardj8d965312005-02-25 02:48:47 +00003383 case Iop_32UtoV128: {
3384 HReg dst = newVRegV(env);
3385 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3386 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3387 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3388 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3389 return dst;
3390 }
sewardj0852a132005-02-21 08:28:46 +00003391
3392 case Iop_64UtoV128: {
3393 HReg dst = newVRegV(env);
3394 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3395 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3396 addInstr(env, AMD64Instr_Push(rmi));
3397 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3398 add_to_rsp(env, 8);
3399 return dst;
3400 }
3401
3402 default:
3403 break;
3404 } /* switch (e->Iex.Unop.op) */
3405 } /* if (e->tag == Iex_Unop) */
3406
3407 if (e->tag == Iex_Binop) {
3408 switch (e->Iex.Binop.op) {
3409
sewardj18303862005-02-21 12:36:54 +00003410 case Iop_SetV128lo64: {
3411 HReg dst = newVRegV(env);
3412 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3413 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003414 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3415 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3416 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3417 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3418 return dst;
3419 }
3420
3421 case Iop_SetV128lo32: {
3422 HReg dst = newVRegV(env);
3423 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3424 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3425 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3426 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3427 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3428 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003429 return dst;
3430 }
3431
sewardj1a01e652005-02-23 11:39:21 +00003432 case Iop_64HLtoV128: {
3433 AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
3434 HReg dst = newVRegV(env);
3435 /* do this via the stack (easy, convenient, etc) */
3436 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
3437 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
3438 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
3439 add_to_rsp(env, 16);
3440 return dst;
3441 }
3442
sewardj432f8b62005-05-10 02:50:05 +00003443 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3444 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3445 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
sewardjb9282632005-11-05 02:33:25 +00003446 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003447 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
3448 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
3449 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3450 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
3451 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
3452 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
3453 do_32Fx4:
3454 {
3455 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3456 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3457 HReg dst = newVRegV(env);
3458 addInstr(env, mk_vMOVsd_RR(argL, dst));
3459 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3460 return dst;
3461 }
3462
sewardj97628592005-05-10 22:42:54 +00003463 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3464 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3465 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
sewardjb9282632005-11-05 02:33:25 +00003466 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003467 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
sewardj5992bd02005-05-11 02:13:42 +00003468 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
3469 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3470 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003471 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
3472 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
3473 do_64Fx2:
3474 {
3475 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3476 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3477 HReg dst = newVRegV(env);
3478 addInstr(env, mk_vMOVsd_RR(argL, dst));
3479 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3480 return dst;
3481 }
sewardj8d965312005-02-25 02:48:47 +00003482
sewardj432f8b62005-05-10 02:50:05 +00003483 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003484 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardj4c328cf2005-05-05 12:05:54 +00003485 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
sewardjb9282632005-11-05 02:33:25 +00003486 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003487 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003488 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003489 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3490 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003491 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3492 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3493 do_32F0x4: {
3494 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3495 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3496 HReg dst = newVRegV(env);
3497 addInstr(env, mk_vMOVsd_RR(argL, dst));
3498 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3499 return dst;
3500 }
3501
sewardj137015d2005-03-27 04:01:15 +00003502 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003503 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003504 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardjb9282632005-11-05 02:33:25 +00003505 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003506 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3507 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003508 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003509 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003510 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3511 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3512 do_64F0x2: {
3513 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3514 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3515 HReg dst = newVRegV(env);
3516 addInstr(env, mk_vMOVsd_RR(argL, dst));
3517 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3518 return dst;
3519 }
3520
sewardj97628592005-05-10 22:42:54 +00003521 case Iop_QNarrow32Sx4:
3522 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3523 case Iop_QNarrow16Sx8:
3524 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3525 case Iop_QNarrow16Ux8:
3526 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3527
3528 case Iop_InterleaveHI8x16:
3529 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3530 case Iop_InterleaveHI16x8:
3531 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3532 case Iop_InterleaveHI32x4:
3533 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3534 case Iop_InterleaveHI64x2:
3535 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3536
3537 case Iop_InterleaveLO8x16:
3538 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3539 case Iop_InterleaveLO16x8:
3540 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3541 case Iop_InterleaveLO32x4:
3542 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3543 case Iop_InterleaveLO64x2:
3544 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3545
sewardj1a01e652005-02-23 11:39:21 +00003546 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003547 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003548 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003549 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003550 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003551 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003552 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003553 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3554 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3555 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3556 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3557 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3558 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3559 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3560 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3561 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3562 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3563 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3564 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
sewardjadffcef2005-05-11 00:03:06 +00003565 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3566 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3567 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3568 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3569 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3570 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3571 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003572 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3573 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3574 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003575 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003576 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3577 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3578 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3579 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003580 do_SseReRg: {
3581 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3582 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3583 HReg dst = newVRegV(env);
3584 if (arg1isEReg) {
sewardj9da16972005-02-21 13:58:26 +00003585 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3586 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3587 } else {
3588 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3589 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3590 }
3591 return dst;
3592 }
3593
sewardjadffcef2005-05-11 00:03:06 +00003594 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3595 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3596 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3597 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3598 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3599 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3600 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
sewardj09717342005-05-05 21:34:02 +00003601 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3602 do_SseShift: {
3603 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3604 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3605 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3606 HReg ereg = newVRegV(env);
3607 HReg dst = newVRegV(env);
3608 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3609 addInstr(env, AMD64Instr_Push(rmi));
3610 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3611 addInstr(env, mk_vMOVsd_RR(greg, dst));
3612 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3613 add_to_rsp(env, 16);
3614 return dst;
3615 }
sewardj0852a132005-02-21 08:28:46 +00003616
3617 default:
3618 break;
3619 } /* switch (e->Iex.Binop.op) */
3620 } /* if (e->tag == Iex_Binop) */
3621
sewardjadffcef2005-05-11 00:03:06 +00003622 if (e->tag == Iex_Mux0X) {
3623 HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3624 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3625 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3626 HReg dst = newVRegV(env);
3627 addInstr(env, mk_vMOVsd_RR(rX,dst));
sewardj501a3392005-05-11 15:37:50 +00003628 addInstr(env, AMD64Instr_Test64(0xFF, r8));
sewardjadffcef2005-05-11 00:03:06 +00003629 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3630 return dst;
3631 }
3632
sewardj9da16972005-02-21 13:58:26 +00003633 vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003634 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
sewardj5117ce12006-01-27 21:20:15 +00003635 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
sewardj0852a132005-02-21 08:28:46 +00003636 ppIRExpr(e);
3637 vpanic("iselVecExpr_wrk");
3638}
sewardjc33671d2005-02-01 20:30:00 +00003639
3640
3641/*---------------------------------------------------------*/
3642/*--- ISEL: Statements ---*/
3643/*---------------------------------------------------------*/
3644
3645static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3646{
3647 if (vex_traceflags & VEX_TRACE_VCODE) {
3648 vex_printf("\n-- ");
3649 ppIRStmt(stmt);
3650 vex_printf("\n");
3651 }
3652
3653 switch (stmt->tag) {
3654
sewardj05b3b6a2005-02-04 01:44:33 +00003655 /* --------- STORE --------- */
sewardjaf1ceca2005-06-30 23:31:27 +00003656 case Ist_Store: {
sewardje9d8a262009-07-01 08:06:34 +00003657 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3658 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3659 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00003660
sewardje768e922009-11-26 17:17:37 +00003661 if (tya != Ity_I64 || end != Iend_LE)
sewardjaf1ceca2005-06-30 23:31:27 +00003662 goto stmt_fail;
3663
sewardj31191072005-02-05 18:24:47 +00003664 if (tyd == Ity_I64) {
sewardjbf0d86c2007-11-26 23:18:52 +00003665 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00003666 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
sewardj31191072005-02-05 18:24:47 +00003667 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
3668 return;
3669 }
sewardj05b3b6a2005-02-04 01:44:33 +00003670 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
sewardjbf0d86c2007-11-26 23:18:52 +00003671 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00003672 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
sewardj428fabd2005-03-21 03:11:17 +00003673 addInstr(env, AMD64Instr_Store(
3674 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
3675 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00003676 return;
3677 }
sewardj8d965312005-02-25 02:48:47 +00003678 if (tyd == Ity_F64) {
sewardjbf0d86c2007-11-26 23:18:52 +00003679 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00003680 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
sewardj8d965312005-02-25 02:48:47 +00003681 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
3682 return;
3683 }
sewardjc49ce232005-02-25 13:03:03 +00003684 if (tyd == Ity_F32) {
sewardjbf0d86c2007-11-26 23:18:52 +00003685 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00003686 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
sewardjc49ce232005-02-25 13:03:03 +00003687 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
3688 return;
3689 }
sewardj0852a132005-02-21 08:28:46 +00003690 if (tyd == Ity_V128) {
sewardjbf0d86c2007-11-26 23:18:52 +00003691 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardjaf1ceca2005-06-30 23:31:27 +00003692 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
sewardj18303862005-02-21 12:36:54 +00003693 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00003694 return;
3695 }
3696 break;
sewardj05b3b6a2005-02-04 01:44:33 +00003697 }
sewardjf67eadf2005-02-03 03:53:52 +00003698
3699 /* --------- PUT --------- */
3700 case Ist_Put: {
3701 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3702 if (ty == Ity_I64) {
3703 /* We're going to write to memory, so compute the RHS into an
3704 AMD64RI. */
3705 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3706 addInstr(env,
3707 AMD64Instr_Alu64M(
3708 Aalu_MOV,
3709 ri,
3710 AMD64AMode_IR(stmt->Ist.Put.offset,
3711 hregAMD64_RBP())
3712 ));
3713 return;
3714 }
sewardjf67eadf2005-02-03 03:53:52 +00003715 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
3716 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3717 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00003718 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00003719 r,
3720 AMD64AMode_IR(stmt->Ist.Put.offset,
3721 hregAMD64_RBP())));
3722 return;
3723 }
sewardj0852a132005-02-21 08:28:46 +00003724 if (ty == Ity_V128) {
3725 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3726 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
3727 hregAMD64_RBP());
sewardj18303862005-02-21 12:36:54 +00003728 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
sewardj0852a132005-02-21 08:28:46 +00003729 return;
3730 }
sewardj8d965312005-02-25 02:48:47 +00003731 if (ty == Ity_F32) {
3732 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3733 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
3734 set_SSE_rounding_default(env); /* paranoia */
3735 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
3736 return;
3737 }
sewardj1a01e652005-02-23 11:39:21 +00003738 if (ty == Ity_F64) {
3739 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3740 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
3741 hregAMD64_RBP() );
3742 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
3743 return;
3744 }
sewardjf67eadf2005-02-03 03:53:52 +00003745 break;
3746 }
3747
sewardj8d965312005-02-25 02:48:47 +00003748 /* --------- Indexed PUT --------- */
3749 case Ist_PutI: {
3750 AMD64AMode* am
3751 = genGuestArrayOffset(
3752 env, stmt->Ist.PutI.descr,
3753 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3754
3755 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3756 if (ty == Ity_F64) {
3757 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3758 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
3759 return;
3760 }
3761 if (ty == Ity_I8) {
3762 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3763 addInstr(env, AMD64Instr_Store( 1, r, am ));
3764 return;
3765 }
sewardj1e015d82005-04-23 23:41:46 +00003766 if (ty == Ity_I64) {
3767 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data);
3768 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
3769 return;
3770 }
sewardj8d965312005-02-25 02:48:47 +00003771 break;
3772 }
sewardj614b3fb2005-02-02 02:16:03 +00003773
3774 /* --------- TMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +00003775 case Ist_WrTmp: {
3776 IRTemp tmp = stmt->Ist.WrTmp.tmp;
sewardj614b3fb2005-02-02 02:16:03 +00003777 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj6ce1a232007-03-31 19:12:38 +00003778
3779 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
3780 compute it into an AMode and then use LEA. This usually
3781 produces fewer instructions, often because (for memcheck
3782 created IR) we get t = address-expression, (t is later used
3783 twice) and so doing this naturally turns address-expression
3784 back into an AMD64 amode. */
3785 if (ty == Ity_I64
3786 && stmt->Ist.WrTmp.data->tag == Iex_Binop
3787 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
3788 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3789 HReg dst = lookupIRTemp(env, tmp);
3790 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
3791 /* Hmm, iselIntExpr_AMode wimped out and just computed the
3792 value into a register. Just emit a normal reg-reg move
3793 so reg-alloc can coalesce it away in the usual way. */
3794 HReg src = am->Aam.IR.reg;
3795 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
3796 } else {
3797 addInstr(env, AMD64Instr_Lea64(am,dst));
3798 }
3799 return;
3800 }
3801
sewardj9b967672005-02-08 11:13:09 +00003802 if (ty == Ity_I64 || ty == Ity_I32
3803 || ty == Ity_I16 || ty == Ity_I8) {
sewardjdd40fdf2006-12-24 02:20:24 +00003804 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
sewardj614b3fb2005-02-02 02:16:03 +00003805 HReg dst = lookupIRTemp(env, tmp);
3806 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
3807 return;
3808 }
sewardj9b967672005-02-08 11:13:09 +00003809 if (ty == Ity_I128) {
3810 HReg rHi, rLo, dstHi, dstLo;
sewardjdd40fdf2006-12-24 02:20:24 +00003811 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
sewardj9b967672005-02-08 11:13:09 +00003812 lookupIRTemp128( &dstHi, &dstLo, env, tmp);
3813 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3814 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3815 return;
3816 }
sewardja5bd0af2005-03-24 20:40:12 +00003817 if (ty == Ity_I1) {
sewardjdd40fdf2006-12-24 02:20:24 +00003818 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
sewardja5bd0af2005-03-24 20:40:12 +00003819 HReg dst = lookupIRTemp(env, tmp);
3820 addInstr(env, AMD64Instr_Set64(cond, dst));
3821 return;
3822 }
sewardj18303862005-02-21 12:36:54 +00003823 if (ty == Ity_F64) {
3824 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00003825 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00003826 addInstr(env, mk_vMOVsd_RR(src, dst));
3827 return;
3828 }
sewardjc49ce232005-02-25 13:03:03 +00003829 if (ty == Ity_F32) {
3830 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00003831 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
sewardjc49ce232005-02-25 13:03:03 +00003832 addInstr(env, mk_vMOVsd_RR(src, dst));
3833 return;
3834 }
sewardj0852a132005-02-21 08:28:46 +00003835 if (ty == Ity_V128) {
3836 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00003837 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00003838 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00003839 return;
3840 }
sewardj614b3fb2005-02-02 02:16:03 +00003841 break;
3842 }
3843
sewardjd0a12df2005-02-10 02:07:43 +00003844 /* --------- Call to DIRTY helper --------- */
3845 case Ist_Dirty: {
3846 IRType retty;
3847 IRDirty* d = stmt->Ist.Dirty.details;
3848 Bool passBBP = False;
3849
3850 if (d->nFxState == 0)
3851 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00003852
3853 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
sewardjd0a12df2005-02-10 02:07:43 +00003854
3855 /* Marshal args, do the call, clear stack. */
3856 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3857
3858 /* Now figure out what to do with the returned value, if any. */
3859 if (d->tmp == IRTemp_INVALID)
3860 /* No return value. Nothing to do. */
3861 return;
3862
3863 retty = typeOfIRTemp(env->type_env, d->tmp);
sewardj478fe702005-04-23 01:15:47 +00003864 if (retty == Ity_I64 || retty == Ity_I32
3865 || retty == Ity_I16 || retty == Ity_I8) {
sewardjd0a12df2005-02-10 02:07:43 +00003866 /* The returned value is in %rax. Park it in the register
3867 associated with tmp. */
3868 HReg dst = lookupIRTemp(env, d->tmp);
3869 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
3870 return;
3871 }
3872 break;
3873 }
3874
3875 /* --------- MEM FENCE --------- */
sewardjc4356f02007-11-09 21:15:04 +00003876 case Ist_MBE:
3877 switch (stmt->Ist.MBE.event) {
3878 case Imbe_Fence:
3879 addInstr(env, AMD64Instr_MFence());
3880 return;
sewardjc4356f02007-11-09 21:15:04 +00003881 default:
3882 break;
3883 }
3884 break;
sewardjf8c37f72005-02-07 18:55:29 +00003885
sewardje9d8a262009-07-01 08:06:34 +00003886 /* --------- ACAS --------- */
3887 case Ist_CAS:
3888 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3889 /* "normal" singleton CAS */
3890 UChar sz;
3891 IRCAS* cas = stmt->Ist.CAS.details;
3892 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3893 /* get: cas->expd into %rax, and cas->data into %rbx */
3894 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
3895 HReg rData = iselIntExpr_R(env, cas->dataLo);
3896 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
3897 HReg rOld = lookupIRTemp(env, cas->oldLo);
3898 vassert(cas->expdHi == NULL);
3899 vassert(cas->dataHi == NULL);
3900 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
3901 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
3902 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
3903 switch (ty) {
3904 case Ity_I64: sz = 8; break;
3905 case Ity_I32: sz = 4; break;
3906 case Ity_I16: sz = 2; break;
3907 case Ity_I8: sz = 1; break;
3908 default: goto unhandled_cas;
3909 }
3910 addInstr(env, AMD64Instr_ACAS(am, sz));
3911 addInstr(env, AMD64Instr_CMov64(
3912 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
3913 return;
3914 } else {
3915 /* double CAS */
3916 UChar sz;
3917 IRCAS* cas = stmt->Ist.CAS.details;
3918 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3919 /* only 32-bit and 64-bit allowed in this case */
3920 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
3921 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
3922 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
3923 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
3924 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3925 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
3926 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3927 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
3928 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
3929 switch (ty) {
3930 case Ity_I64:
3931 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
3932 goto unhandled_cas; /* we'd have to generate
3933 cmpxchg16b, but the host
3934 doesn't support that */
3935 sz = 8;
3936 break;
3937 case Ity_I32:
3938 sz = 4;
3939 break;
3940 default:
3941 goto unhandled_cas;
3942 }
3943 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
3944 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3945 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
3946 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
3947 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
3948 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
3949 addInstr(env, AMD64Instr_DACAS(am, sz));
3950 addInstr(env,
3951 AMD64Instr_CMov64(
3952 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
3953 addInstr(env,
3954 AMD64Instr_CMov64(
3955 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
3956 return;
3957 }
3958 unhandled_cas:
3959 break;
3960
sewardjd20b2902005-03-22 00:15:00 +00003961 /* --------- INSTR MARK --------- */
3962 /* Doesn't generate any executable code ... */
3963 case Ist_IMark:
3964 return;
3965
sewardj5a9ffab2005-05-12 17:55:01 +00003966 /* --------- ABI HINT --------- */
3967 /* These have no meaning (denotation in the IR) and so we ignore
3968 them ... if any actually made it this far. */
3969 case Ist_AbiHint:
3970 return;
3971
sewardjd20b2902005-03-22 00:15:00 +00003972 /* --------- NO-OP --------- */
3973 case Ist_NoOp:
3974 return;
3975
sewardjf8c37f72005-02-07 18:55:29 +00003976 /* --------- EXIT --------- */
3977 case Ist_Exit: {
3978 AMD64RI* dst;
3979 AMD64CondCode cc;
3980 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3981 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
3982 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
3983 cc = iselCondCode(env,stmt->Ist.Exit.guard);
3984 addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
3985 return;
3986 }
sewardjc33671d2005-02-01 20:30:00 +00003987
3988 default: break;
3989 }
sewardjaf1ceca2005-06-30 23:31:27 +00003990 stmt_fail:
sewardjc33671d2005-02-01 20:30:00 +00003991 ppIRStmt(stmt);
3992 vpanic("iselStmt(amd64)");
3993}
3994
3995
3996/*---------------------------------------------------------*/
3997/*--- ISEL: Basic block terminators (Nexts) ---*/
3998/*---------------------------------------------------------*/
3999
4000static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
sewardjf67eadf2005-02-03 03:53:52 +00004001{
4002 AMD64RI* ri;
4003 if (vex_traceflags & VEX_TRACE_VCODE) {
4004 vex_printf("\n-- goto {");
4005 ppIRJumpKind(jk);
4006 vex_printf("} ");
4007 ppIRExpr(next);
4008 vex_printf("\n");
4009 }
4010 ri = iselIntExpr_RI(env, next);
4011 addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
sewardjc33671d2005-02-01 20:30:00 +00004012}
4013
4014
4015/*---------------------------------------------------------*/
4016/*--- Insn selector top-level ---*/
4017/*---------------------------------------------------------*/
4018
sewardjdd40fdf2006-12-24 02:20:24 +00004019/* Translate an entire SB to amd64 code. */
sewardjc33671d2005-02-01 20:30:00 +00004020
sewardjdd40fdf2006-12-24 02:20:24 +00004021HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host,
sewardjaca070a2006-10-17 00:28:22 +00004022 VexArchInfo* archinfo_host,
sewardjdd40fdf2006-12-24 02:20:24 +00004023 VexAbiInfo* vbi/*UNUSED*/ )
sewardjc33671d2005-02-01 20:30:00 +00004024{
sewardj5117ce12006-01-27 21:20:15 +00004025 Int i, j;
4026 HReg hreg, hregHI;
4027 ISelEnv* env;
4028 UInt hwcaps_host = archinfo_host->hwcaps;
sewardjc33671d2005-02-01 20:30:00 +00004029
4030 /* sanity ... */
sewardj8f073592006-05-01 02:14:17 +00004031 vassert(arch_host == VexArchAMD64);
sewardje9d8a262009-07-01 08:06:34 +00004032 vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3
4033 |VEX_HWCAPS_AMD64_CX16)));
sewardjc33671d2005-02-01 20:30:00 +00004034
4035 /* Make up an initial environment to use. */
sewardj9a036bf2005-03-14 18:19:08 +00004036 env = LibVEX_Alloc(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00004037 env->vreg_ctr = 0;
4038
4039 /* Set up output code array. */
4040 env->code = newHInstrArray();
4041
4042 /* Copy BB's type env. */
4043 env->type_env = bb->tyenv;
4044
4045 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4046 change as we go along. */
4047 env->n_vregmap = bb->tyenv->types_used;
4048 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardj9b967672005-02-08 11:13:09 +00004049 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00004050
4051 /* and finally ... */
sewardj5117ce12006-01-27 21:20:15 +00004052 env->hwcaps = hwcaps_host;
sewardjc33671d2005-02-01 20:30:00 +00004053
4054 /* For each IR temporary, allocate a suitably-kinded virtual
4055 register. */
4056 j = 0;
4057 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00004058 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00004059 switch (bb->tyenv->types[i]) {
4060 case Ity_I1:
4061 case Ity_I8:
4062 case Ity_I16:
4063 case Ity_I32:
sewardj9b967672005-02-08 11:13:09 +00004064 case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break;
4065 case Ity_I128: hreg = mkHReg(j++, HRcInt64, True);
4066 hregHI = mkHReg(j++, HRcInt64, True); break;
sewardjc33671d2005-02-01 20:30:00 +00004067 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00004068 case Ity_F64:
sewardj9b967672005-02-08 11:13:09 +00004069 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardjc33671d2005-02-01 20:30:00 +00004070 default: ppIRType(bb->tyenv->types[i]);
4071 vpanic("iselBB(amd64): IRTemp type");
4072 }
4073 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00004074 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00004075 }
4076 env->vreg_ctr = j;
4077
4078 /* Ok, finally we can iterate over the statements. */
4079 for (i = 0; i < bb->stmts_used; i++)
4080 if (bb->stmts[i])
4081 iselStmt(env,bb->stmts[i]);
4082
4083 iselNext(env,bb->next,bb->jumpkind);
4084
4085 /* record the number of vregs we used. */
4086 env->code->n_vregs = env->vreg_ctr;
4087 return env->code;
4088}
sewardja3e98302005-02-01 15:55:05 +00004089
4090
4091/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00004092/*--- end host_amd64_isel.c ---*/
sewardja3e98302005-02-01 15:55:05 +00004093/*---------------------------------------------------------------*/