blob: 35d0f0f49b88bacd3efd0cc1af2bd8dbe14b7132 [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
3/*--- ---*/
4/*--- This file (host-amd64/isel.c) is ---*/
sewardj428fabd2005-03-21 03:11:17 +00005/*--- Copyright (c) OpenWorks LLP. All rights reserved. ---*/
sewardja3e98302005-02-01 15:55:05 +00006/*--- ---*/
7/*---------------------------------------------------------------*/
8
9/*
10 This file is part of LibVEX, a library for dynamic binary
11 instrumentation and translation.
12
sewardj428fabd2005-03-21 03:11:17 +000013 Copyright (C) 2004-2005 OpenWorks LLP.
sewardja3e98302005-02-01 15:55:05 +000014
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; Version 2 dated June 1991 of the
18 license.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or liability
23 for damages. See the GNU General Public License for more details.
24
25 Neither the names of the U.S. Department of Energy nor the
26 University of California nor the names of its contributors may be
27 used to endorse or promote products derived from this software
28 without prior written permission.
29
30 You should have received a copy of the GNU General Public License
31 along with this program; if not, write to the Free Software
32 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
33 USA.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardj05b3b6a2005-02-04 01:44:33 +000040#include "ir/irmatch.h"
sewardjc33671d2005-02-01 20:30:00 +000041#include "main/vex_util.h"
42#include "main/vex_globals.h"
43#include "host-generic/h_generic_regs.h"
sewardja3e98302005-02-01 15:55:05 +000044//.. #include "host-generic/h_generic_simd64.h"
sewardjc33671d2005-02-01 20:30:00 +000045#include "host-amd64/hdefs.h"
sewardj1a01e652005-02-23 11:39:21 +000046
47
48/*---------------------------------------------------------*/
49/*--- x87/SSE control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, precision = 53 bits. This
54 corresponds to a FPU control word value of 0x027F.
55
56 Similarly the SSE control word (%mxcsr) should be 0x1F80.
57
58 %fpucw and %mxcsr should have these values on entry to
59 Vex-generated code, and should those values should be
60 unchanged at exit.
61*/
62
63#define DEFAULT_FPUCW 0x027F
64
65#define DEFAULT_MXCSR 0x1F80
66
67/* debugging only, do not use */
68/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000069
70
71/*---------------------------------------------------------*/
72/*--- misc helpers ---*/
73/*---------------------------------------------------------*/
74
75/* These are duplicated in guest-amd64/toIR.c */
76static IRExpr* unop ( IROp op, IRExpr* a )
77{
78 return IRExpr_Unop(op, a);
79}
80
81static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
82{
83 return IRExpr_Binop(op, a1, a2);
84}
85
sewardja3e98302005-02-01 15:55:05 +000086//.. static IRExpr* mkU64 ( ULong i )
87//.. {
88//.. return IRExpr_Const(IRConst_U64(i));
89//.. }
sewardj42322b52005-04-20 22:57:11 +000090
91static IRExpr* mkU32 ( UInt i )
92{
93 return IRExpr_Const(IRConst_U32(i));
94}
sewardj05b3b6a2005-02-04 01:44:33 +000095
96static IRExpr* bind ( Int binder )
97{
98 return IRExpr_Binder(binder);
99}
sewardjc33671d2005-02-01 20:30:00 +0000100
101
sewardjc33671d2005-02-01 20:30:00 +0000102/*---------------------------------------------------------*/
103/*--- ISelEnv ---*/
104/*---------------------------------------------------------*/
105
106/* This carries around:
107
108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
109 might encounter. This is computed before insn selection starts,
110 and does not change.
111
112 - A mapping from IRTemp to HReg. This tells the insn selector
113 which virtual register is associated with each IRTemp
114 temporary. This is computed before insn selection starts, and
115 does not change. We expect this mapping to map precisely the
116 same set of IRTemps as the type mapping does.
117
sewardj9b967672005-02-08 11:13:09 +0000118 - vregmap holds the primary register for the IRTemp.
119 - vregmapHI is only used for 128-bit integer-typed
120 IRTemps. It holds the identity of a second
121 64-bit virtual HReg, which holds the high half
122 of the value.
123
sewardjc33671d2005-02-01 20:30:00 +0000124 - The code array, that is, the insns selected so far.
125
126 - A counter, for generating new virtual registers.
127
128 - The host subarchitecture we are selecting insns for.
129 This is set at the start and does not change.
130
131 Note, this is all host-independent. (JRS 20050201: well, kinda
132 ... not completely. Compare with ISelEnv for X86.)
133*/
134
135typedef
136 struct {
137 IRTypeEnv* type_env;
138
139 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000140 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000141 Int n_vregmap;
142
143 HInstrArray* code;
144
145 Int vreg_ctr;
146
147 VexSubArch subarch;
148 }
149 ISelEnv;
150
151
152static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
153{
154 vassert(tmp >= 0);
155 vassert(tmp < env->n_vregmap);
156 return env->vregmap[tmp];
157}
158
sewardj9b967672005-02-08 11:13:09 +0000159static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
160 ISelEnv* env, IRTemp tmp )
161{
162 vassert(tmp >= 0);
163 vassert(tmp < env->n_vregmap);
164 vassert(env->vregmapHI[tmp] != INVALID_HREG);
165 *vrLO = env->vregmap[tmp];
166 *vrHI = env->vregmapHI[tmp];
167}
sewardj614b3fb2005-02-02 02:16:03 +0000168
169static void addInstr ( ISelEnv* env, AMD64Instr* instr )
170{
171 addHInstr(env->code, instr);
172 if (vex_traceflags & VEX_TRACE_VCODE) {
173 ppAMD64Instr(instr);
174 vex_printf("\n");
175 }
176}
177
sewardj8258a8c2005-02-02 03:11:24 +0000178static HReg newVRegI ( ISelEnv* env )
179{
180 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
181 env->vreg_ctr++;
182 return reg;
183}
184
sewardja3e98302005-02-01 15:55:05 +0000185//.. static HReg newVRegF ( ISelEnv* env )
186//.. {
187//.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
188//.. env->vreg_ctr++;
189//.. return reg;
190//.. }
sewardj0852a132005-02-21 08:28:46 +0000191
192static HReg newVRegV ( ISelEnv* env )
193{
194 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
195 env->vreg_ctr++;
196 return reg;
197}
sewardj614b3fb2005-02-02 02:16:03 +0000198
199
200/*---------------------------------------------------------*/
201/*--- ISEL: Forward declarations ---*/
202/*---------------------------------------------------------*/
203
204/* These are organised as iselXXX and iselXXX_wrk pairs. The
205 iselXXX_wrk do the real work, but are not to be called directly.
206 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
207 checks that all returned registers are virtual. You should not
208 call the _wrk version directly.
209*/
210static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
211static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
212
213static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
214static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
215
216static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
217static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
218
219static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
220static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
221
222static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
223static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
224
sewardj9b967672005-02-08 11:13:09 +0000225static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
226 ISelEnv* env, IRExpr* e );
227static void iselInt128Expr ( HReg* rHi, HReg* rLo,
228 ISelEnv* env, IRExpr* e );
229
sewardj614b3fb2005-02-02 02:16:03 +0000230static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
231static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
232
sewardj18303862005-02-21 12:36:54 +0000233static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
234static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000235
sewardj8d965312005-02-25 02:48:47 +0000236static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
237static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000238
sewardj0852a132005-02-21 08:28:46 +0000239static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
240static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000241
242
243/*---------------------------------------------------------*/
244/*--- ISEL: Misc helpers ---*/
245/*---------------------------------------------------------*/
246
247static Bool sane_AMode ( AMD64AMode* am )
248{
249 switch (am->tag) {
250 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000251 return
252 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
253 && (hregIsVirtual(am->Aam.IR.reg)
254 || am->Aam.IR.reg == hregAMD64_RBP()) );
sewardj614b3fb2005-02-02 02:16:03 +0000255 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000256 return
257 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
258 && hregIsVirtual(am->Aam.IRRS.base)
259 && hregClass(am->Aam.IRRS.index) == HRcInt64
260 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000261 default:
262 vpanic("sane_AMode: unknown amd64 amode tag");
263 }
264}
265
266
267/* Can the lower 32 bits be signedly widened to produce the whole
268 64-bit value? In other words, are the top 33 bits either all 0 or
269 all 1 ? */
270static Bool fitsIn32Bits ( ULong x )
271{
272 Long y0 = (Long)x;
273 Long y1 = y0;
274 y1 <<= 32;
275 y1 >>=/*s*/ 32;
276 return toBool(x == y1);
277}
278
sewardja3e98302005-02-01 15:55:05 +0000279//.. /* Is this a 32-bit zero expression? */
280//..
281//.. static Bool isZero32 ( IRExpr* e )
282//.. {
283//.. return e->tag == Iex_Const
284//.. && e->Iex.Const.con->tag == Ico_U32
285//.. && e->Iex.Const.con->Ico.U32 == 0;
286//.. }
sewardj8258a8c2005-02-02 03:11:24 +0000287
288/* Make a int reg-reg move. */
289
290static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
291{
292 vassert(hregClass(src) == HRcInt64);
293 vassert(hregClass(dst) == HRcInt64);
294 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
295}
296
sewardj0852a132005-02-21 08:28:46 +0000297/* Make a vector reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000298
sewardj0852a132005-02-21 08:28:46 +0000299static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
300{
301 vassert(hregClass(src) == HRcVec128);
302 vassert(hregClass(dst) == HRcVec128);
303 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
304}
305
306/* Advance/retreat %rsp by n. */
307
308static void add_to_rsp ( ISelEnv* env, Int n )
309{
310 vassert(n > 0 && n < 256 && (n%8) == 0);
311 addInstr(env,
312 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
313 hregAMD64_RSP()));
314}
315
sewardj18303862005-02-21 12:36:54 +0000316static void sub_from_rsp ( ISelEnv* env, Int n )
317{
318 vassert(n > 0 && n < 256 && (n%8) == 0);
319 addInstr(env,
320 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
321 hregAMD64_RSP()));
322}
323
324
sewardja3e98302005-02-01 15:55:05 +0000325//.. /* Given an amode, return one which references 4 bytes further
326//.. along. */
327//..
328//.. static X86AMode* advance4 ( X86AMode* am )
329//.. {
330//.. X86AMode* am4 = dopyX86AMode(am);
331//.. switch (am4->tag) {
332//.. case Xam_IRRS:
333//.. am4->Xam.IRRS.imm += 4; break;
334//.. case Xam_IR:
335//.. am4->Xam.IR.imm += 4; break;
336//.. default:
337//.. vpanic("advance4(x86,host)");
338//.. }
339//.. return am4;
340//.. }
341//..
342//..
343//.. /* Push an arg onto the host stack, in preparation for a call to a
344//.. helper function of some kind. Returns the number of 32-bit words
345//.. pushed. */
346//..
347//.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
348//.. {
349//.. IRType arg_ty = typeOfIRExpr(env->type_env, arg);
350//.. if (arg_ty == Ity_I32) {
351//.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
352//.. return 1;
353//.. } else
354//.. if (arg_ty == Ity_I64) {
355//.. HReg rHi, rLo;
356//.. iselInt64Expr(&rHi, &rLo, env, arg);
357//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
358//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
359//.. return 2;
360//.. }
361//.. ppIRExpr(arg);
362//.. vpanic("pushArg(x86): can't handle arg of this type");
363//.. }
sewardj05b3b6a2005-02-04 01:44:33 +0000364
365
366/* Used only in doHelperCall. See big comment in doHelperCall re
367 handling of register-parameter args. This function figures out
368 whether evaluation of an expression might require use of a fixed
369 register. If in doubt return True (safe but suboptimal).
370*/
371static
372Bool mightRequireFixedRegs ( IRExpr* e )
373{
374 switch (e->tag) {
375 case Iex_Tmp: case Iex_Const: case Iex_Get:
376 return False;
377 default:
378 return True;
379 }
380}
381
382
383/* Do a complete function call. guard is a Ity_Bit expression
384 indicating whether or not the call happens. If guard==NULL, the
385 call is unconditional. */
386
387static
388void doHelperCall ( ISelEnv* env,
389 Bool passBBP,
390 IRExpr* guard, IRCallee* cee, IRExpr** args )
391{
392 AMD64CondCode cc;
393 HReg argregs[6];
394 HReg tmpregs[6];
395 Bool go_fast;
396 Int n_args, i, argreg;
397
398 /* Marshal args for a call and do the call.
399
400 If passBBP is True, %rbp (the baseblock pointer) is to be passed
401 as the first arg.
402
403 This function only deals with a tiny set of possibilities, which
404 cover all helpers in practice. The restrictions are that only
405 arguments in registers are supported, hence only 6x64 integer
406 bits in total can be passed. In fact the only supported arg
407 type is I64.
408
409 Generating code which is both efficient and correct when
410 parameters are to be passed in registers is difficult, for the
411 reasons elaborated in detail in comments attached to
412 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
413 of the method described in those comments.
414
415 The problem is split into two cases: the fast scheme and the
416 slow scheme. In the fast scheme, arguments are computed
417 directly into the target (real) registers. This is only safe
418 when we can be sure that computation of each argument will not
419 trash any real registers set by computation of any other
420 argument.
421
422 In the slow scheme, all args are first computed into vregs, and
423 once they are all done, they are moved to the relevant real
424 regs. This always gives correct code, but it also gives a bunch
425 of vreg-to-rreg moves which are usually redundant but are hard
426 for the register allocator to get rid of.
427
428 To decide which scheme to use, all argument expressions are
429 first examined. If they are all so simple that it is clear they
430 will be evaluated without use of any fixed registers, use the
431 fast scheme, else use the slow scheme. Note also that only
432 unconditional calls may use the fast scheme, since having to
433 compute a condition expression could itself trash real
434 registers.
435
436 Note this requires being able to examine an expression and
437 determine whether or not evaluation of it might use a fixed
438 register. That requires knowledge of how the rest of this insn
439 selector works. Currently just the following 3 are regarded as
440 safe -- hopefully they cover the majority of arguments in
441 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
442 */
443
444 /* Note that the cee->regparms field is meaningless on AMD64 host
445 (since there is only one calling convention) and so we always
446 ignore it. */
447
448 n_args = 0;
449 for (i = 0; args[i]; i++)
450 n_args++;
451
452 if (6 < n_args + (passBBP ? 1 : 0))
453 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
454
455 argregs[0] = hregAMD64_RDI();
456 argregs[1] = hregAMD64_RSI();
457 argregs[2] = hregAMD64_RDX();
458 argregs[3] = hregAMD64_RCX();
459 argregs[4] = hregAMD64_R8();
460 argregs[5] = hregAMD64_R9();
461
462 tmpregs[0] = tmpregs[1] = tmpregs[2] =
463 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
464
465 /* First decide which scheme (slow or fast) is to be used. First
466 assume the fast scheme, and select slow if any contraindications
467 (wow) appear. */
468
469 go_fast = True;
470
471 if (guard) {
472 if (guard->tag == Iex_Const
473 && guard->Iex.Const.con->tag == Ico_U1
474 && guard->Iex.Const.con->Ico.U1 == True) {
475 /* unconditional */
476 } else {
477 /* Not manifestly unconditional -- be conservative. */
478 go_fast = False;
479 }
480 }
481
482 if (go_fast) {
483 for (i = 0; i < n_args; i++) {
484 if (mightRequireFixedRegs(args[i])) {
485 go_fast = False;
486 break;
487 }
488 }
489 }
490
491 /* At this point the scheme to use has been established. Generate
492 code to get the arg values into the argument rregs. */
493
494 if (go_fast) {
495
496 /* FAST SCHEME */
497 argreg = 0;
498 if (passBBP) {
499 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]));
500 argreg++;
501 }
502
503 for (i = 0; i < n_args; i++) {
504 vassert(argreg < 6);
505 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
506 addInstr(env, AMD64Instr_Alu64R(
507 Aalu_MOV,
508 iselIntExpr_RMI(env, args[i]),
509 argregs[argreg]
510 )
511 );
512 argreg++;
513 }
514
515 /* Fast scheme only applies for unconditional calls. Hence: */
516 cc = Acc_ALWAYS;
517
518 } else {
519
520 /* SLOW SCHEME; move via temporaries */
521 argreg = 0;
522
523 if (passBBP) {
524 /* This is pretty stupid; better to move directly to rdi
525 after the rest of the args are done. */
526 tmpregs[argreg] = newVRegI(env);
527 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
528 argreg++;
529 }
530
531 for (i = 0; i < n_args; i++) {
532 vassert(argreg < 6);
533 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
534 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
535 argreg++;
536 }
537
538 /* Now we can compute the condition. We can't do it earlier
539 because the argument computations could trash the condition
540 codes. Be a bit clever to handle the common case where the
541 guard is 1:Bit. */
542 cc = Acc_ALWAYS;
543 if (guard) {
544 if (guard->tag == Iex_Const
545 && guard->Iex.Const.con->tag == Ico_U1
546 && guard->Iex.Const.con->Ico.U1 == True) {
547 /* unconditional -- do nothing */
548 } else {
549 cc = iselCondCode( env, guard );
550 }
551 }
552
553 /* Move the args to their final destinations. */
554 for (i = 0; i < argreg; i++) {
555 /* None of these insns, including any spill code that might
556 be generated, may alter the condition codes. */
557 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
558 }
559
560 }
561
562 /* Finally, the call itself. */
563 addInstr(env, AMD64Instr_Call(
564 cc,
sewardjf3992bd2005-02-07 00:20:43 +0000565 Ptr_to_ULong(cee->addr),
sewardj05b3b6a2005-02-04 01:44:33 +0000566 n_args + (passBBP ? 1 : 0)
567 )
568 );
569}
570
571
sewardj8d965312005-02-25 02:48:47 +0000572/* Given a guest-state array descriptor, an index expression and a
573 bias, generate an AMD64AMode holding the relevant guest state
574 offset. */
575
576static
577AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRArray* descr,
578 IRExpr* off, Int bias )
579{
580 HReg tmp, roff;
581 Int elemSz = sizeofIRType(descr->elemTy);
582 Int nElems = descr->nElems;
583
584 /* Throw out any cases not generated by an amd64 front end. In
585 theory there might be a day where we need to handle them -- if
586 we ever run non-amd64-guest on amd64 host. */
587
588 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
589 vpanic("genGuestArrayOffset(amd64 host)");
590
591 /* Compute off into a reg, %off. Then return:
592
593 movq %off, %tmp
594 addq $bias, %tmp (if bias != 0)
595 andq %tmp, 7
596 ... base(%rbp, %tmp, shift) ...
597 */
598 tmp = newVRegI(env);
599 roff = iselIntExpr_R(env, off);
600 addInstr(env, mk_iMOVsd_RR(roff, tmp));
601 if (bias != 0) {
602 /* Make sure the bias is sane, in the sense that there are
603 no significant bits above bit 30 in it. */
604 vassert(-10000 < bias && bias < 10000);
605 addInstr(env,
606 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
607 }
608 addInstr(env,
609 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
610 vassert(elemSz == 1 || elemSz == 8);
611 return
612 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
613 elemSz==8 ? 3 : 0);
614}
615
sewardj1a01e652005-02-23 11:39:21 +0000616
617/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
618static
619void set_SSE_rounding_default ( ISelEnv* env )
620{
621 /* pushq $DEFAULT_MXCSR
622 ldmxcsr 0(%rsp)
623 addq $8, %rsp
624 */
625 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
626 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
627 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
628 add_to_rsp(env, 8);
629}
630
sewardja3e98302005-02-01 15:55:05 +0000631//.. /* Mess with the FPU's rounding mode: set to the default rounding mode
632//.. (DEFAULT_FPUCW). */
633//.. static
634//.. void set_FPU_rounding_default ( ISelEnv* env )
635//.. {
636//.. /* pushl $DEFAULT_FPUCW
637//.. fldcw 0(%esp)
638//.. addl $4, %esp
639//.. */
640//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
641//.. addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
642//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
643//.. add_to_esp(env, 4);
644//.. }
sewardj1a01e652005-02-23 11:39:21 +0000645
646
647/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
648 expression denoting a value in the range 0 .. 3, indicating a round
649 mode encoded as per type IRRoundingMode. Set the SSE machinery to
650 have the same rounding.
651*/
652static
653void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
654{
655 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
656 both rounding bits == 0. If that wasn't the case, we couldn't
657 create a new rounding field simply by ORing the new value into
658 place. */
659
660 /* movq $3, %reg
661 andq [[mode]], %reg -- shouldn't be needed; paranoia
662 shlq $13, %reg
663 orq $DEFAULT_MXCSR, %reg
664 pushq %reg
665 ldmxcsr 0(%esp)
666 addq $8, %rsp
667 */
668 HReg reg = newVRegI(env);
669 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
670 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
671 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
672 iselIntExpr_RMI(env, mode), reg));
673 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, AMD64RM_Reg(reg)));
674 addInstr(env, AMD64Instr_Alu64R(
675 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
676 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
677 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
678 add_to_rsp(env, 8);
679}
680
681
sewardja3e98302005-02-01 15:55:05 +0000682//.. /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
683//.. expression denoting a value in the range 0 .. 3, indicating a round
684//.. mode encoded as per type IRRoundingMode. Set the x87 FPU to have
685//.. the same rounding.
686//.. */
687//.. static
688//.. void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
689//.. {
690//.. HReg rrm = iselIntExpr_R(env, mode);
691//.. HReg rrm2 = newVRegI(env);
692//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
693//..
694//.. /* movl %rrm, %rrm2
695//.. andl $3, %rrm2 -- shouldn't be needed; paranoia
696//.. shll $10, %rrm2
697//.. orl $DEFAULT_FPUCW, %rrm2
698//.. pushl %rrm2
699//.. fldcw 0(%esp)
700//.. addl $4, %esp
701//.. */
702//.. addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
703//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
704//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, X86RM_Reg(rrm2)));
705//.. addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
706//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
707//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
708//.. add_to_esp(env, 4);
709//.. }
sewardj8d965312005-02-25 02:48:47 +0000710
711
712/* Generate !src into a new vector register, and be sure that the code
713 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
714 way to do this.
715*/
716static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
717{
718 HReg dst = newVRegV(env);
719 /* Set dst to zero. Not strictly necessary, but the idea of doing
720 a FP comparison on whatever junk happens to be floating around
721 in it is just too scary. */
722 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
723 /* And now make it all 1s ... */
724 addInstr(env, AMD64Instr_Sse32Fx4(Asse_CMPEQF, dst, dst));
725 /* Finally, xor 'src' into it. */
726 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
727 return dst;
728}
729
730
sewardja3e98302005-02-01 15:55:05 +0000731//.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
732//.. after most non-simple FPU operations (simple = +, -, *, / and
733//.. sqrt).
734//..
735//.. This could be done a lot more efficiently if needed, by loading
736//.. zero and adding it to the value to be rounded (fldz ; faddp?).
737//.. */
738//.. static void roundToF64 ( ISelEnv* env, HReg reg )
739//.. {
740//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
741//.. sub_from_esp(env, 8);
742//.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
743//.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
744//.. add_to_esp(env, 8);
745//.. }
sewardj8258a8c2005-02-02 03:11:24 +0000746
747
748/*---------------------------------------------------------*/
749/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
750/*---------------------------------------------------------*/
751
752/* Select insns for an integer-typed expression, and add them to the
753 code list. Return a reg holding the result. This reg will be a
754 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
755 want to modify it, ask for a new vreg, copy it in there, and modify
756 the copy. The register allocator will do its best to map both
757 vregs to the same real register, so the copies will often disappear
758 later in the game.
759
760 This should handle expressions of 64, 32, 16 and 8-bit type. All
761 results are returned in a 64-bit register. For 32-, 16- and 8-bit
762 expressions, the upper 32/16/24 bits are arbitrary, so you should
763 mask or sign extend partial values if necessary.
764*/
765
766static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
767{
768 HReg r = iselIntExpr_R_wrk(env, e);
769 /* sanity checks ... */
770# if 0
771 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
772# endif
773 vassert(hregClass(r) == HRcInt64);
774 vassert(hregIsVirtual(r));
775 return r;
776}
777
778/* DO NOT CALL THIS DIRECTLY ! */
779static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
780{
sewardj05b3b6a2005-02-04 01:44:33 +0000781 MatchInfo mi;
sewardj7f039c42005-02-04 21:13:55 +0000782 DECLARE_PATTERN(p_8Uto64);
sewardj05b3b6a2005-02-04 01:44:33 +0000783 DECLARE_PATTERN(p_16Uto64);
sewardj176ad2f2005-04-27 11:55:08 +0000784 DECLARE_PATTERN(p_1Uto8_64to1);
sewardja3e98302005-02-01 15:55:05 +0000785//.. DECLARE_PATTERN(p_32to1_then_1Uto8);
sewardj8258a8c2005-02-02 03:11:24 +0000786
787 IRType ty = typeOfIRExpr(env->type_env,e);
788 vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
789
790 switch (e->tag) {
791
792 /* --------- TEMP --------- */
793 case Iex_Tmp: {
794 return lookupIRTemp(env, e->Iex.Tmp.tmp);
795 }
796
797 /* --------- LOAD --------- */
798 case Iex_LDle: {
799 HReg dst = newVRegI(env);
800 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.LDle.addr );
sewardjf67eadf2005-02-03 03:53:52 +0000801 if (ty == Ity_I64) {
802 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
803 AMD64RMI_Mem(amode), dst) );
804 return dst;
805 }
sewardj8258a8c2005-02-02 03:11:24 +0000806 if (ty == Ity_I32) {
807 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
808 return dst;
809 }
sewardj05b3b6a2005-02-04 01:44:33 +0000810 if (ty == Ity_I16) {
811 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
812 return dst;
813 }
sewardj7f039c42005-02-04 21:13:55 +0000814 if (ty == Ity_I8) {
815 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
816 return dst;
817 }
sewardj8258a8c2005-02-02 03:11:24 +0000818 break;
819 }
820
821 /* --------- BINARY OP --------- */
822 case Iex_Binop: {
823 AMD64AluOp aluOp;
824 AMD64ShiftOp shOp;
sewardja3e98302005-02-01 15:55:05 +0000825//..
826//.. /* Pattern: Sub32(0,x) */
827//.. if (e->Iex.Binop.op == Iop_Sub32 && isZero32(e->Iex.Binop.arg1)) {
828//.. HReg dst = newVRegI(env);
829//.. HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
830//.. addInstr(env, mk_iMOVsd_RR(reg,dst));
831//.. addInstr(env, X86Instr_Unary32(Xun_NEG,X86RM_Reg(dst)));
832//.. return dst;
833//.. }
834//..
sewardj8258a8c2005-02-02 03:11:24 +0000835 /* Is it an addition or logical style op? */
836 switch (e->Iex.Binop.op) {
837 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
838 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000839 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
840 aluOp = Aalu_SUB; break;
841 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
842 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000843 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000844 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000845 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
846 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000847 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000848 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000849 default:
850 aluOp = Aalu_INVALID; break;
851 }
852 /* For commutative ops we assume any literal
853 values are on the second operand. */
854 if (aluOp != Aalu_INVALID) {
855 HReg dst = newVRegI(env);
856 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
857 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
858 addInstr(env, mk_iMOVsd_RR(reg,dst));
859 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
860 return dst;
861 }
862
863 /* Perhaps a shift op? */
864 switch (e->Iex.Binop.op) {
865 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
866 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +0000867 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
868 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000869 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
870 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +0000871 default:
872 shOp = Ash_INVALID; break;
873 }
874 if (shOp != Ash_INVALID) {
875 HReg dst = newVRegI(env);
876
877 /* regL = the value to be shifted */
878 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
879 addInstr(env, mk_iMOVsd_RR(regL,dst));
880
881 /* Do any necessary widening for 32/16/8 bit operands */
882 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +0000883 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
884 break;
sewardj85520e42005-02-19 15:22:38 +0000885 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +0000886 break;
sewardj85520e42005-02-19 15:22:38 +0000887 case Iop_Shr8:
888 addInstr(env, AMD64Instr_Alu64R(
889 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
890 break;
891 case Iop_Shr16:
892 addInstr(env, AMD64Instr_Alu64R(
893 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
894 break;
sewardjb095fba2005-02-13 14:13:04 +0000895 case Iop_Shr32:
sewardj909c06d2005-02-19 22:47:41 +0000896 addInstr(env, AMD64Instr_MovZLQ(dst,dst));
sewardjb095fba2005-02-13 14:13:04 +0000897 break;
sewardja3e98302005-02-01 15:55:05 +0000898//.. case Iop_Sar8:
899//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, X86RM_Reg(dst)));
900//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, X86RM_Reg(dst)));
901//.. break;
902//.. case Iop_Sar16:
903//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, X86RM_Reg(dst)));
904//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, X86RM_Reg(dst)));
905//.. break;
sewardj05b3b6a2005-02-04 01:44:33 +0000906 case Iop_Sar32:
907 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, AMD64RM_Reg(dst)));
908 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 32, AMD64RM_Reg(dst)));
909 break;
910 default:
sewardj909c06d2005-02-19 22:47:41 +0000911 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +0000912 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +0000913 }
914
915 /* Now consider the shift amount. If it's a literal, we
916 can do a much better job than the general case. */
917 if (e->Iex.Binop.arg2->tag == Iex_Const) {
918 /* assert that the IR is well-typed */
919 Int nshift;
920 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
921 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
922 vassert(nshift >= 0);
923 if (nshift > 0)
924 /* Can't allow nshift==0 since that means %cl */
925 addInstr(env, AMD64Instr_Sh64(
926 shOp,
927 nshift,
928 AMD64RM_Reg(dst)));
929 } else {
930 /* General case; we have to force the amount into %cl. */
931 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
932 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
933 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, AMD64RM_Reg(dst)));
934 }
935 return dst;
936 }
937
sewardj7de0d3c2005-02-13 02:26:41 +0000938 /* Handle misc other ops. */
939
940 if (e->Iex.Binop.op == Iop_DivModS64to32
941 || e->Iex.Binop.op == Iop_DivModU64to32) {
942 /* 64 x 32 -> (32(rem),32(div)) division */
943 /* Get the 64-bit operand into edx:eax, and the other into
944 any old R/M. */
945 HReg rax = hregAMD64_RAX();
946 HReg rdx = hregAMD64_RDX();
947 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +0000948 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +0000949 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +0000950 /* Compute the left operand into a reg, and then
951 put the top half in edx and the bottom in eax. */
952 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +0000953 addInstr(env, mk_iMOVsd_RR(left64, rdx));
954 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj909c06d2005-02-19 22:47:41 +0000955 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, AMD64RM_Reg(rdx)));
sewardj7de0d3c2005-02-13 02:26:41 +0000956 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardj909c06d2005-02-19 22:47:41 +0000957 addInstr(env, AMD64Instr_MovZLQ(rdx,rdx));
958 addInstr(env, AMD64Instr_MovZLQ(rax,rax));
sewardj7de0d3c2005-02-13 02:26:41 +0000959 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, AMD64RM_Reg(rdx)));
960 addInstr(env, mk_iMOVsd_RR(rax, dst));
961 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
962 return dst;
963 }
964
965 if (e->Iex.Binop.op == Iop_32HLto64) {
966 HReg hi32 = newVRegI(env);
967 HReg lo32 = newVRegI(env);
968 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
969 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
970 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
971 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
972 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, AMD64RM_Reg(hi32)));
sewardj909c06d2005-02-19 22:47:41 +0000973 addInstr(env, AMD64Instr_MovZLQ(lo32,lo32));
sewardj7de0d3c2005-02-13 02:26:41 +0000974 addInstr(env, AMD64Instr_Alu64R(
975 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
976 return hi32;
977 }
978
sewardj85520e42005-02-19 15:22:38 +0000979 if (e->Iex.Binop.op == Iop_16HLto32) {
980 HReg hi16 = newVRegI(env);
981 HReg lo16 = newVRegI(env);
982 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
983 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
984 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
985 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
986 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, AMD64RM_Reg(hi16)));
987 addInstr(env, AMD64Instr_Alu64R(
988 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
989 addInstr(env, AMD64Instr_Alu64R(
990 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
991 return hi16;
992 }
sewardj7de0d3c2005-02-13 02:26:41 +0000993
sewardja64f8ad2005-04-24 00:26:37 +0000994 if (e->Iex.Binop.op == Iop_8HLto16) {
995 HReg hi8 = newVRegI(env);
996 HReg lo8 = newVRegI(env);
997 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
998 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
999 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1000 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1001 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, AMD64RM_Reg(hi8)));
1002 addInstr(env, AMD64Instr_Alu64R(
1003 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1004 addInstr(env, AMD64Instr_Alu64R(
1005 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1006 return hi8;
1007 }
sewardj85520e42005-02-19 15:22:38 +00001008
1009 if (e->Iex.Binop.op == Iop_MullS32
1010 || e->Iex.Binop.op == Iop_MullS16
1011 || e->Iex.Binop.op == Iop_MullS8
1012 || e->Iex.Binop.op == Iop_MullU32
1013 || e->Iex.Binop.op == Iop_MullU16
1014 || e->Iex.Binop.op == Iop_MullU8) {
1015 HReg a32 = newVRegI(env);
1016 HReg b32 = newVRegI(env);
1017 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1018 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1019 Int shift = 0;
1020 AMD64ShiftOp shr_op = Ash_SHR;
1021 switch (e->Iex.Binop.op) {
1022 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1023 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1024 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1025 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1026 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1027 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1028 default: vassert(0);
1029 }
1030
1031 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1032 addInstr(env, mk_iMOVsd_RR(b32s, b32));
1033 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, AMD64RM_Reg(a32)));
1034 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, AMD64RM_Reg(b32)));
1035 addInstr(env, AMD64Instr_Sh64(shr_op, shift, AMD64RM_Reg(a32)));
1036 addInstr(env, AMD64Instr_Sh64(shr_op, shift, AMD64RM_Reg(b32)));
1037 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1038 return b32;
1039 }
1040
sewardj18303862005-02-21 12:36:54 +00001041 if (e->Iex.Binop.op == Iop_CmpF64) {
1042 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1043 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1044 HReg dst = newVRegI(env);
1045 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1046 /* Mask out irrelevant parts of the result so as to conform
1047 to the CmpF64 definition. */
1048 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1049 return dst;
1050 }
1051
sewardj37d52572005-02-25 14:22:12 +00001052 if (e->Iex.Binop.op == Iop_F64toI32
1053 || e->Iex.Binop.op == Iop_F64toI64) {
1054 Int szD = e->Iex.Binop.op==Iop_F64toI32 ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001055 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1056 HReg dst = newVRegI(env);
1057 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001058 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001059 set_SSE_rounding_default(env);
1060 return dst;
1061 }
1062
sewardja3e98302005-02-01 15:55:05 +00001063//.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
1064//.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
1065//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1066//.. HReg dst = newVRegI(env);
1067//..
1068//.. /* Used several times ... */
1069//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1070//..
sewardj7de0d3c2005-02-13 02:26:41 +00001071//.. /* rf now holds the value to be converted, and rrm holds the
sewardja3e98302005-02-01 15:55:05 +00001072//.. rounding mode value, encoded as per the IRRoundingMode
1073//.. enum. The first thing to do is set the FPU's rounding
1074//.. mode accordingly. */
1075//..
1076//.. /* Create a space for the format conversion. */
1077//.. /* subl $4, %esp */
1078//.. sub_from_esp(env, 4);
1079//..
1080//.. /* Set host rounding mode */
1081//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1082//..
1083//.. /* gistw/l %rf, 0(%esp) */
1084//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
1085//..
1086//.. if (sz == 2) {
1087//.. /* movzwl 0(%esp), %dst */
1088//.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1089//.. } else {
1090//.. /* movl 0(%esp), %dst */
1091//.. vassert(sz == 4);
1092//.. addInstr(env, X86Instr_Alu32R(
1093//.. Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1094//.. }
1095//..
1096//.. /* Restore default FPU rounding. */
1097//.. set_FPU_rounding_default( env );
1098//..
1099//.. /* addl $4, %esp */
1100//.. add_to_esp(env, 4);
1101//.. return dst;
1102//.. }
1103//..
1104//.. /* C3210 flags following FPU partial remainder (fprem), both
1105//.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1106//.. if (e->Iex.Binop.op == Iop_PRemC3210F64
1107//.. || e->Iex.Binop.op == Iop_PRem1C3210F64) {
1108//.. HReg junk = newVRegF(env);
1109//.. HReg dst = newVRegI(env);
1110//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1111//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1112//.. addInstr(env, X86Instr_FpBinary(
1113//.. e->Iex.Binop.op==Iop_PRemC3210F64
1114//.. ? Xfp_PREM : Xfp_PREM1,
1115//.. srcL,srcR,junk
1116//.. ));
1117//.. /* The previous pseudo-insn will have left the FPU's C3210
1118//.. flags set correctly. So bag them. */
1119//.. addInstr(env, X86Instr_FpStSW_AX());
1120//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1121//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1122//.. return dst;
1123//.. }
sewardj8258a8c2005-02-02 03:11:24 +00001124
1125 break;
1126 }
1127
sewardjf67eadf2005-02-03 03:53:52 +00001128 /* --------- UNARY OP --------- */
1129 case Iex_Unop: {
sewardj176ad2f2005-04-27 11:55:08 +00001130 /* 32Uto64(8Uto32(expr8)) */
sewardj7f039c42005-02-04 21:13:55 +00001131 DEFINE_PATTERN(p_8Uto64,
1132 unop(Iop_32Uto64, unop(Iop_8Uto32, bind(0)) ) );
1133 if (matchIRExpr(&mi,p_8Uto64,e)) {
1134 IRExpr* expr8 = mi.bindee[0];
1135 HReg dst = newVRegI(env);
1136 HReg src = iselIntExpr_R(env, expr8);
1137 addInstr(env, mk_iMOVsd_RR(src,dst) );
1138 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, AMD64RM_Reg(dst)));
1139 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 56, AMD64RM_Reg(dst)));
1140 return dst;
1141 }
1142
sewardj176ad2f2005-04-27 11:55:08 +00001143 /* 1Uto8(64to1(expr64)) */
1144 DEFINE_PATTERN( p_1Uto8_64to1,
1145 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1146 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
sewardj05b3b6a2005-02-04 01:44:33 +00001147 IRExpr* expr64 = mi.bindee[0];
1148 HReg dst = newVRegI(env);
1149 HReg src = iselIntExpr_R(env, expr64);
1150 addInstr(env, mk_iMOVsd_RR(src,dst) );
1151 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1152 AMD64RMI_Imm(1), dst));
1153 return dst;
1154 }
1155
sewardja3e98302005-02-01 15:55:05 +00001156//.. /* 16Uto32(LDle(expr32)) */
1157//.. {
1158//.. DECLARE_PATTERN(p_LDle16_then_16Uto32);
1159//.. DEFINE_PATTERN(p_LDle16_then_16Uto32,
1160//.. unop(Iop_16Uto32,IRExpr_LDle(Ity_I16,bind(0))) );
1161//.. if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1162//.. HReg dst = newVRegI(env);
1163//.. X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1164//.. addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1165//.. return dst;
1166//.. }
1167//.. }
sewardjf67eadf2005-02-03 03:53:52 +00001168
1169 switch (e->Iex.Unop.op) {
1170 case Iop_32Uto64: {
1171 HReg dst = newVRegI(env);
1172 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1173 addInstr(env, AMD64Instr_MovZLQ(src,dst) );
1174 return dst;
1175 }
sewardj05b3b6a2005-02-04 01:44:33 +00001176 case Iop_32Sto64: {
1177 HReg dst = newVRegI(env);
1178 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1179 UInt amt = 32;
1180 addInstr(env, mk_iMOVsd_RR(src,dst) );
1181 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, AMD64RM_Reg(dst)));
1182 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, AMD64RM_Reg(dst)));
1183 return dst;
1184 }
sewardj9b967672005-02-08 11:13:09 +00001185 case Iop_128HIto64: {
1186 HReg rHi, rLo;
1187 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1188 return rHi; /* and abandon rLo */
1189 }
1190 case Iop_128to64: {
1191 HReg rHi, rLo;
1192 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1193 return rLo; /* and abandon rHi */
1194 }
sewardj85520e42005-02-19 15:22:38 +00001195 case Iop_8Uto16:
sewardj176ad2f2005-04-27 11:55:08 +00001196// case Iop_8Uto32:
1197 case Iop_8Uto64:
1198 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001199 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001200 HReg dst = newVRegI(env);
1201 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1202 Bool srcIs16 = e->Iex.Unop.op==Iop_16Uto32
1203 || e->Iex.Unop.op==Iop_16Uto64;
1204 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001205 addInstr(env, mk_iMOVsd_RR(src,dst) );
1206 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1207 AMD64RMI_Imm(mask), dst));
1208 return dst;
1209 }
sewardj85520e42005-02-19 15:22:38 +00001210 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001211 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001212 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001213 case Iop_16Sto32:
1214 case Iop_16Sto64: {
1215 HReg dst = newVRegI(env);
1216 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1217 Bool srcIs16 = e->Iex.Unop.op==Iop_16Sto32
1218 || e->Iex.Unop.op==Iop_16Sto64;
1219 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001220 addInstr(env, mk_iMOVsd_RR(src,dst) );
1221 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, AMD64RM_Reg(dst)));
1222 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, AMD64RM_Reg(dst)));
1223 return dst;
1224 }
sewardj85520e42005-02-19 15:22:38 +00001225 case Iop_Not8:
1226 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001227 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001228 case Iop_Not64: {
1229 HReg dst = newVRegI(env);
1230 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1231 addInstr(env, mk_iMOVsd_RR(src,dst) );
1232 addInstr(env, AMD64Instr_Unary64(Aun_NOT,AMD64RM_Reg(dst)));
1233 return dst;
1234 }
sewardja3e98302005-02-01 15:55:05 +00001235//.. case Iop_64HIto32: {
1236//.. HReg rHi, rLo;
1237//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1238//.. return rHi; /* and abandon rLo .. poor wee thing :-) */
1239//.. }
1240//.. case Iop_64to32: {
1241//.. HReg rHi, rLo;
1242//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1243//.. return rLo; /* similar stupid comment to the above ... */
1244//.. }
1245//.. case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001246 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001247 case Iop_64HIto32: {
1248 HReg dst = newVRegI(env);
1249 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1250 Int shift = 0;
1251 switch (e->Iex.Unop.op) {
sewardj85520e42005-02-19 15:22:38 +00001252 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001253 case Iop_64HIto32: shift = 32; break;
1254 default: vassert(0);
1255 }
1256 addInstr(env, mk_iMOVsd_RR(src,dst) );
1257 addInstr(env, AMD64Instr_Sh64(
1258 Ash_SHR, shift, AMD64RM_Reg(dst)));
1259 return dst;
1260 }
sewardj176ad2f2005-04-27 11:55:08 +00001261 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001262 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001263 case Iop_1Uto8: {
1264 HReg dst = newVRegI(env);
1265 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1266 addInstr(env, AMD64Instr_Set64(cond,dst));
1267 return dst;
1268 }
sewardja64f8ad2005-04-24 00:26:37 +00001269 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001270 case Iop_1Sto16:
1271 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001272 case Iop_1Sto64: {
1273 /* could do better than this, but for now ... */
1274 HReg dst = newVRegI(env);
1275 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1276 addInstr(env, AMD64Instr_Set64(cond,dst));
1277 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, AMD64RM_Reg(dst)));
1278 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, AMD64RM_Reg(dst)));
1279 return dst;
1280 }
sewardjf53b7352005-04-06 20:01:56 +00001281 case Iop_Ctz64: {
1282 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1283 HReg dst = newVRegI(env);
1284 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1285 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1286 return dst;
1287 }
sewardj537cab02005-04-07 02:03:52 +00001288 case Iop_Clz64: {
1289 /* Count leading zeroes. Do 'bsrq' to establish the index
1290 of the highest set bit, and subtract that value from
1291 63. */
1292 HReg tmp = newVRegI(env);
1293 HReg dst = newVRegI(env);
1294 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1295 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1296 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1297 AMD64RMI_Imm(63), dst));
1298 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1299 AMD64RMI_Reg(tmp), dst));
1300 return dst;
1301 }
sewardj176ad2f2005-04-27 11:55:08 +00001302 case Iop_Neg8:
1303 case Iop_Neg16:
1304 case Iop_Neg32:
1305 case Iop_Neg64: {
1306 HReg dst = newVRegI(env);
1307 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
1308 addInstr(env, mk_iMOVsd_RR(reg,dst));
1309 addInstr(env, AMD64Instr_Unary64(Aun_NEG,AMD64RM_Reg(dst)));
1310 return dst;
1311 }
sewardj537cab02005-04-07 02:03:52 +00001312
sewardj478fe702005-04-23 01:15:47 +00001313 case Iop_V128to32: {
1314 HReg dst = newVRegI(env);
1315 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1316 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1317 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1318 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1319 return dst;
1320 }
sewardj1a01e652005-02-23 11:39:21 +00001321
1322 /* V128{HI}to64 */
1323 case Iop_V128HIto64:
1324 case Iop_V128to64: {
1325 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
1326 HReg dst = newVRegI(env);
1327 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1328 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
1329 AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
1330 sub_from_rsp(env, 16);
1331 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
1332 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1333 AMD64RMI_Mem(rspN), dst ));
1334 add_to_rsp(env, 16);
1335 return dst;
1336 }
1337
sewardj924215b2005-03-26 21:50:31 +00001338 /* ReinterpF64asI64(e) */
1339 /* Given an IEEE754 double, produce an I64 with the same bit
1340 pattern. */
1341 case Iop_ReinterpF64asI64: {
1342 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1343 HReg dst = newVRegI(env);
1344 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1345 /* paranoia */
1346 set_SSE_rounding_default(env);
1347 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1348 addInstr(env, AMD64Instr_Alu64R(
1349 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1350 return dst;
1351 }
1352
sewardj85520e42005-02-19 15:22:38 +00001353 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001354 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001355 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001356 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001357 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001358 case Iop_64to32:
1359 /* These are no-ops. */
1360 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001361
1362 default:
1363 break;
1364 }
1365 break;
1366 }
sewardj8258a8c2005-02-02 03:11:24 +00001367
1368 /* --------- GET --------- */
1369 case Iex_Get: {
1370 if (ty == Ity_I64) {
1371 HReg dst = newVRegI(env);
1372 addInstr(env, AMD64Instr_Alu64R(
1373 Aalu_MOV,
1374 AMD64RMI_Mem(
1375 AMD64AMode_IR(e->Iex.Get.offset,
1376 hregAMD64_RBP())),
1377 dst));
1378 return dst;
1379 }
1380 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1381 HReg dst = newVRegI(env);
1382 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001383 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001384 False,
1385 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1386 dst));
1387 return dst;
1388 }
1389 break;
1390 }
1391
sewardj8d965312005-02-25 02:48:47 +00001392 case Iex_GetI: {
1393 AMD64AMode* am
1394 = genGuestArrayOffset(
1395 env, e->Iex.GetI.descr,
1396 e->Iex.GetI.ix, e->Iex.GetI.bias );
1397 HReg dst = newVRegI(env);
1398 if (ty == Ity_I8) {
1399 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1400 return dst;
1401 }
sewardj1e015d82005-04-23 23:41:46 +00001402 if (ty == Ity_I64) {
1403 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1404 return dst;
1405 }
sewardj8d965312005-02-25 02:48:47 +00001406 break;
1407 }
sewardj05b3b6a2005-02-04 01:44:33 +00001408
1409 /* --------- CCALL --------- */
1410 case Iex_CCall: {
1411 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001412 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001413
1414 /* be very restrictive for now. Only 64-bit ints allowed
1415 for args, and 64 bits for return type. */
1416 if (e->Iex.CCall.retty != Ity_I64)
1417 goto irreducible;
1418
sewardj7f039c42005-02-04 21:13:55 +00001419 /* Marshal args, do the call. */
sewardj05b3b6a2005-02-04 01:44:33 +00001420 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1421
1422 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1423 return dst;
1424 }
1425
sewardj7f039c42005-02-04 21:13:55 +00001426 /* --------- LITERAL --------- */
1427 /* 64/32/16/8-bit literals */
1428 case Iex_Const:
1429 if (ty == Ity_I64) {
1430 HReg r = newVRegI(env);
1431 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1432 return r;
1433 } else {
1434 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1435 HReg r = newVRegI(env);
1436 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1437 return r;
1438 }
sewardj05b3b6a2005-02-04 01:44:33 +00001439
1440 /* --------- MULTIPLEX --------- */
1441 case Iex_Mux0X: {
1442 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1443 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1444 HReg r8;
1445 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1446 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1447 HReg dst = newVRegI(env);
1448 addInstr(env, mk_iMOVsd_RR(rX,dst));
1449 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
1450 addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(0xFF), AMD64RM_Reg(r8)));
1451 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
1452 return dst;
1453 }
1454 break;
1455 }
sewardj8258a8c2005-02-02 03:11:24 +00001456
1457 default:
1458 break;
1459 } /* switch (e->tag) */
1460
1461 /* We get here if no pattern matched. */
1462 irreducible:
1463 ppIRExpr(e);
1464 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1465}
sewardj614b3fb2005-02-02 02:16:03 +00001466
1467
1468/*---------------------------------------------------------*/
1469/*--- ISEL: Integer expression auxiliaries ---*/
1470/*---------------------------------------------------------*/
1471
1472/* --------------------- AMODEs --------------------- */
1473
1474/* Return an AMode which computes the value of the specified
1475 expression, possibly also adding insns to the code list as a
1476 result. The expression may only be a 32-bit one.
1477*/
1478
sewardj8258a8c2005-02-02 03:11:24 +00001479static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1480{
1481 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1482 vassert(sane_AMode(am));
1483 return am;
1484}
1485
1486/* DO NOT CALL THIS DIRECTLY ! */
1487static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1488{
sewardj05b3b6a2005-02-04 01:44:33 +00001489 MatchInfo mi;
1490 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001491 IRType ty = typeOfIRExpr(env->type_env,e);
1492 vassert(ty == Ity_I64);
1493
sewardj05b3b6a2005-02-04 01:44:33 +00001494 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1495 /* bind0 bind1 bind2 bind3 */
1496 DEFINE_PATTERN(p_complex,
1497 binop( Iop_Add64,
1498 binop( Iop_Add64,
1499 bind(0),
1500 binop(Iop_Shl64, bind(1), bind(2))
1501 ),
1502 bind(3)
1503 )
1504 );
1505 if (matchIRExpr(&mi, p_complex, e)) {
1506 IRExpr* expr1 = mi.bindee[0];
1507 IRExpr* expr2 = mi.bindee[1];
1508 IRExpr* imm8 = mi.bindee[2];
1509 IRExpr* simm32 = mi.bindee[3];
1510 if (imm8->tag == Iex_Const
1511 && imm8->Iex.Const.con->tag == Ico_U8
1512 && imm8->Iex.Const.con->Ico.U8 < 4
1513 /* imm8 is OK, now check simm32 */
1514 && simm32->tag == Iex_Const
1515 && simm32->Iex.Const.con->tag == Ico_U64
1516 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1517 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001518 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001519 HReg r1 = iselIntExpr_R(env, expr1);
1520 HReg r2 = iselIntExpr_R(env, expr2);
1521 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1522 return AMD64AMode_IRRS(offset, r1, r2, shift);
1523 }
1524 }
1525
sewardj8258a8c2005-02-02 03:11:24 +00001526 /* Add64(expr1, Shl64(expr2, imm)) */
1527 if (e->tag == Iex_Binop
1528 && e->Iex.Binop.op == Iop_Add64
1529 && e->Iex.Binop.arg2->tag == Iex_Binop
1530 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1531 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1532 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1533 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1534 if (shift == 1 || shift == 2 || shift == 3) {
1535 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1536 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1537 return AMD64AMode_IRRS(0, r1, r2, shift);
1538 }
1539 }
1540
1541 /* Add64(expr,i) */
1542 if (e->tag == Iex_Binop
1543 && e->Iex.Binop.op == Iop_Add64
1544 && e->Iex.Binop.arg2->tag == Iex_Const
1545 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
1546 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
1547 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1548 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00001549 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00001550 r1
1551 );
1552 }
1553
1554 /* Doesn't match anything in particular. Generate it into
1555 a register and use that. */
1556 {
1557 HReg r1 = iselIntExpr_R(env, e);
1558 return AMD64AMode_IR(0, r1);
1559 }
1560}
sewardj614b3fb2005-02-02 02:16:03 +00001561
1562
1563/* --------------------- RMIs --------------------- */
1564
1565/* Similarly, calculate an expression into an X86RMI operand. As with
1566 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1567
1568static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1569{
1570 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1571 /* sanity checks ... */
1572 switch (rmi->tag) {
1573 case Armi_Imm:
1574 return rmi;
1575 case Armi_Reg:
1576 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
1577 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
1578 return rmi;
1579 case Armi_Mem:
1580 vassert(sane_AMode(rmi->Armi.Mem.am));
1581 return rmi;
1582 default:
1583 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
1584 }
1585}
1586
1587/* DO NOT CALL THIS DIRECTLY ! */
1588static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1589{
1590 IRType ty = typeOfIRExpr(env->type_env,e);
1591 vassert(ty == Ity_I64 || ty == Ity_I32
1592 || ty == Ity_I16 || ty == Ity_I8);
1593
1594 /* special case: immediate 64/32/16/8 */
1595 if (e->tag == Iex_Const) {
1596 switch (e->Iex.Const.con->tag) {
1597 case Ico_U64:
1598 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00001599 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00001600 }
1601 break;
1602 case Ico_U32:
1603 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
1604 case Ico_U16:
1605 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
1606 case Ico_U8:
1607 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
1608 default:
1609 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1610 }
1611 }
1612
1613 /* special case: 64-bit GET */
1614 if (e->tag == Iex_Get && ty == Ity_I64) {
1615 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1616 hregAMD64_RBP()));
1617 }
1618
sewardj0852a132005-02-21 08:28:46 +00001619 /* special case: 64-bit load from memory */
1620 if (e->tag == Iex_LDle && ty == Ity_I64) {
1621 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
1622 return AMD64RMI_Mem(am);
1623 }
sewardj614b3fb2005-02-02 02:16:03 +00001624
1625 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00001626 {
1627 HReg r = iselIntExpr_R ( env, e );
1628 return AMD64RMI_Reg(r);
1629 }
sewardj614b3fb2005-02-02 02:16:03 +00001630}
1631
1632
sewardjf67eadf2005-02-03 03:53:52 +00001633/* --------------------- RIs --------------------- */
1634
1635/* Calculate an expression into an AMD64RI operand. As with
1636 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1637 bits. */
1638
1639static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1640{
1641 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
1642 /* sanity checks ... */
1643 switch (ri->tag) {
1644 case Ari_Imm:
1645 return ri;
1646 case Armi_Reg:
1647 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
1648 vassert(hregIsVirtual(ri->Ari.Reg.reg));
1649 return ri;
1650 default:
1651 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
1652 }
1653}
1654
1655/* DO NOT CALL THIS DIRECTLY ! */
1656static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1657{
1658 IRType ty = typeOfIRExpr(env->type_env,e);
1659 vassert(ty == Ity_I64 || ty == Ity_I32
1660 || ty == Ity_I16 || ty == Ity_I8);
1661
1662 /* special case: immediate */
1663 if (e->tag == Iex_Const) {
1664 switch (e->Iex.Const.con->tag) {
1665 case Ico_U64:
1666 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00001667 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00001668 }
1669 break;
1670 case Ico_U32:
1671 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
1672 case Ico_U16:
1673 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
1674 case Ico_U8:
1675 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
1676 default:
1677 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1678 }
1679 }
1680
1681 /* default case: calculate into a register and return that */
1682 {
1683 HReg r = iselIntExpr_R ( env, e );
1684 return AMD64RI_Reg(r);
1685 }
1686}
1687
1688
sewardj05b3b6a2005-02-04 01:44:33 +00001689/* --------------------- RMs --------------------- */
1690
1691/* Similarly, calculate an expression into an AMD64RM operand. As
1692 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1693 bits. */
1694
1695static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1696{
1697 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
1698 /* sanity checks ... */
1699 switch (rm->tag) {
1700 case Arm_Reg:
1701 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
1702 vassert(hregIsVirtual(rm->Arm.Reg.reg));
1703 return rm;
1704 case Arm_Mem:
1705 vassert(sane_AMode(rm->Arm.Mem.am));
1706 return rm;
1707 default:
1708 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
1709 }
1710}
1711
1712/* DO NOT CALL THIS DIRECTLY ! */
1713static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1714{
1715 IRType ty = typeOfIRExpr(env->type_env,e);
1716 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1717
1718 /* special case: 64-bit GET */
1719 if (e->tag == Iex_Get && ty == Ity_I64) {
1720 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1721 hregAMD64_RBP()));
1722 }
1723
1724 /* special case: load from memory */
1725
1726 /* default case: calculate into a register and return that */
1727 {
1728 HReg r = iselIntExpr_R ( env, e );
1729 return AMD64RM_Reg(r);
1730 }
1731}
1732
1733
1734/* --------------------- CONDCODE --------------------- */
1735
1736/* Generate code to evaluated a bit-typed expression, returning the
1737 condition code which would correspond when the expression would
1738 notionally have returned 1. */
1739
1740static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1741{
1742 /* Uh, there's nothing we can sanity check here, unfortunately. */
1743 return iselCondCode_wrk(env,e);
1744}
1745
1746/* DO NOT CALL THIS DIRECTLY ! */
1747static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1748{
sewardjf8c37f72005-02-07 18:55:29 +00001749 MatchInfo mi;
1750 DECLARE_PATTERN(p_32to1_64to32);
sewardja3e98302005-02-01 15:55:05 +00001751//.. DECLARE_PATTERN(p_1Uto32_then_32to1);
1752//.. DECLARE_PATTERN(p_1Sto32_then_32to1);
sewardj05b3b6a2005-02-04 01:44:33 +00001753
sewardj0af46ab2005-04-26 01:52:29 +00001754 DECLARE_PATTERN(p_1Uto64_then_64to1);
1755
sewardj05b3b6a2005-02-04 01:44:33 +00001756 vassert(e);
1757 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1758
sewardj176ad2f2005-04-27 11:55:08 +00001759 /* var */
1760 if (e->tag == Iex_Tmp) {
1761 HReg r64 = lookupIRTemp(env, e->Iex.Tmp.tmp);
1762 HReg dst = newVRegI(env);
1763 addInstr(env, mk_iMOVsd_RR(r64,dst));
1764 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
1765 return Acc_NZ;
1766 }
1767
sewardja3e98302005-02-01 15:55:05 +00001768//.. /* Constant 1:Bit */
1769//.. if (e->tag == Iex_Const && e->Iex.Const.con->Ico.U1 == True) {
1770//.. HReg r;
1771//.. vassert(e->Iex.Const.con->tag == Ico_U1);
1772//.. r = newVRegI(env);
1773//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1774//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1775//.. return Xcc_Z;
1776//.. }
sewardj486074e2005-02-08 20:10:04 +00001777
1778 /* Not1(...) */
1779 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1780 /* Generate code for the arg, and negate the test condition */
1781 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1782 }
1783
sewardj176ad2f2005-04-27 11:55:08 +00001784 /* --- patterns rooted at: 64to1 --- */
1785
1786 /* 64to1(1Uto64(expr1)) ==> expr1 */
1787 DEFINE_PATTERN( p_1Uto64_then_64to1,
1788 unop(Iop_64to1, unop(Iop_1Uto64, bind(0))) );
sewardj0af46ab2005-04-26 01:52:29 +00001789 if (matchIRExpr(&mi,p_1Uto64_then_64to1,e)) {
1790 IRExpr* expr1 = mi.bindee[0];
1791 return iselCondCode(env, expr1);
1792 }
1793
sewardja3e98302005-02-01 15:55:05 +00001794//.. /* 32to1(1Uto32(expr1)) -- the casts are pointless, ignore them */
1795//.. DEFINE_PATTERN(p_1Uto32_then_32to1,
1796//.. unop(Iop_32to1,unop(Iop_1Uto32,bind(0))));
1797//.. if (matchIRExpr(&mi,p_1Uto32_then_32to1,e)) {
1798//.. IRExpr* expr1 = mi.bindee[0];
1799//.. return iselCondCode(env, expr1);
1800//.. }
1801//..
1802//.. /* 32to1(1Sto32(expr1)) -- the casts are pointless, ignore them */
1803//.. DEFINE_PATTERN(p_1Sto32_then_32to1,
1804//.. unop(Iop_32to1,unop(Iop_1Sto32,bind(0))));
1805//.. if (matchIRExpr(&mi,p_1Sto32_then_32to1,e)) {
1806//.. IRExpr* expr1 = mi.bindee[0];
1807//.. return iselCondCode(env, expr1);
1808//.. }
sewardjf8c37f72005-02-07 18:55:29 +00001809
sewardj176ad2f2005-04-27 11:55:08 +00001810 /* 64to1 */
1811 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
1812 AMD64RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
sewardjf8c37f72005-02-07 18:55:29 +00001813 addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(1),rm));
1814 return Acc_NZ;
1815 }
1816
sewardj176ad2f2005-04-27 11:55:08 +00001817 /* --- patterns rooted at: CmpNEZ8 --- */
1818
1819 /* CmpNEZ8(x) */
1820 if (e->tag == Iex_Unop
1821 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1822 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
1823 addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(0xFF),AMD64RM_Reg(r)));
1824 return Acc_NZ;
1825 }
1826
1827 /* --- patterns rooted at: CmpNEZ32 --- */
1828
1829 /* CmpNEZ32(x) */
1830 if (e->tag == Iex_Unop
1831 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1832 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1833 HReg tmp = newVRegI(env);
1834 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
1835 addInstr(env, AMD64Instr_MovZLQ(r1,tmp));
1836 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
1837 return Acc_NZ;
1838 }
1839
1840 /* --- patterns rooted at: CmpNEZ64 --- */
1841
1842 /* CmpNEZ64(x) */
1843 if (e->tag == Iex_Unop
1844 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1845 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1846 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
1847 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
1848 return Acc_NZ;
1849 }
1850
1851 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
1852
sewardj42322b52005-04-20 22:57:11 +00001853 /* CmpEQ8 / CmpNE8 */
1854 if (e->tag == Iex_Binop
1855 && (e->Iex.Binop.op == Iop_CmpEQ8
1856 || e->Iex.Binop.op == Iop_CmpNE8)) {
1857 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1858 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1859 HReg r = newVRegI(env);
1860 addInstr(env, mk_iMOVsd_RR(r1,r));
1861 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
1862 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
1863 switch (e->Iex.Binop.op) {
1864 case Iop_CmpEQ8: return Acc_Z;
1865 case Iop_CmpNE8: return Acc_NZ;
1866 default: vpanic("iselCondCode(amd64): CmpXX8");
1867 }
1868 }
1869
sewardj0af46ab2005-04-26 01:52:29 +00001870 /* CmpEQ16 / CmpNE16 */
1871 if (e->tag == Iex_Binop
1872 && (e->Iex.Binop.op == Iop_CmpEQ16
1873 || e->Iex.Binop.op == Iop_CmpNE16)) {
1874 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1875 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1876 HReg r = newVRegI(env);
1877 addInstr(env, mk_iMOVsd_RR(r1,r));
1878 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
1879 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
1880 switch (e->Iex.Binop.op) {
1881 case Iop_CmpEQ16: return Acc_Z;
1882 case Iop_CmpNE16: return Acc_NZ;
1883 default: vpanic("iselCondCode(amd64): CmpXX16");
1884 }
1885 }
1886
sewardj478fe702005-04-23 01:15:47 +00001887 /* CmpEQ32 / CmpNE32 */
1888 if (e->tag == Iex_Binop
1889 && (e->Iex.Binop.op == Iop_CmpEQ32
1890 || e->Iex.Binop.op == Iop_CmpNE32)) {
1891 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1892 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1893 HReg r = newVRegI(env);
1894 addInstr(env, mk_iMOVsd_RR(r1,r));
1895 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
1896 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, AMD64RM_Reg(r)));
1897 switch (e->Iex.Binop.op) {
1898 case Iop_CmpEQ32: return Acc_Z;
1899 case Iop_CmpNE32: return Acc_NZ;
sewardj176ad2f2005-04-27 11:55:08 +00001900 default: vpanic("iselCondCode(amd64): CmpXX32");
sewardj42322b52005-04-20 22:57:11 +00001901 }
1902 }
sewardjd0a12df2005-02-10 02:07:43 +00001903
1904 /* Cmp*64*(x,y) */
1905 if (e->tag == Iex_Binop
1906 && (e->Iex.Binop.op == Iop_CmpEQ64
1907 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00001908 || e->Iex.Binop.op == Iop_CmpLT64S
1909 || e->Iex.Binop.op == Iop_CmpLT64U
1910 || e->Iex.Binop.op == Iop_CmpLE64S
sewardjd0a12df2005-02-10 02:07:43 +00001911 //|| e->Iex.Binop.op == Iop_CmpLE64U
1912 )) {
1913 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1914 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1915 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
1916 switch (e->Iex.Binop.op) {
1917 case Iop_CmpEQ64: return Acc_Z;
1918 case Iop_CmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00001919 case Iop_CmpLT64S: return Acc_L;
1920 case Iop_CmpLT64U: return Acc_B;
1921 case Iop_CmpLE64S: return Acc_LE;
sewardjd0a12df2005-02-10 02:07:43 +00001922 //case Iop_CmpLE64U: return Acc_BE;
1923 default: vpanic("iselCondCode(amd64): CmpXX64");
1924 }
1925 }
1926
sewardja3e98302005-02-01 15:55:05 +00001927//.. /* CmpNE64(1Sto64(b), 0) ==> b */
1928//.. {
1929//.. DECLARE_PATTERN(p_CmpNE64_1Sto64);
1930//.. DEFINE_PATTERN(
1931//.. p_CmpNE64_1Sto64,
1932//.. binop(Iop_CmpNE64, unop(Iop_1Sto64,bind(0)), mkU64(0)));
1933//.. if (matchIRExpr(&mi, p_CmpNE64_1Sto64, e)) {
1934//.. return iselCondCode(env, mi.bindee[0]);
1935//.. }
1936//.. }
1937//..
1938//.. /* CmpNE64(x, 0) */
1939//.. {
1940//.. DECLARE_PATTERN(p_CmpNE64_x_zero);
1941//.. DEFINE_PATTERN(
1942//.. p_CmpNE64_x_zero,
1943//.. binop(Iop_CmpNE64, bind(0), mkU64(0)) );
1944//.. if (matchIRExpr(&mi, p_CmpNE64_x_zero, e)) {
1945//.. HReg hi, lo;
1946//.. IRExpr* x = mi.bindee[0];
1947//.. HReg tmp = newVRegI(env);
1948//.. iselInt64Expr( &hi, &lo, env, x );
1949//.. addInstr(env, mk_iMOVsd_RR(hi, tmp));
1950//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1951//.. return Xcc_NZ;
1952//.. }
1953//.. }
1954//..
1955//.. /* CmpNE64 */
1956//.. if (e->tag == Iex_Binop
1957//.. && e->Iex.Binop.op == Iop_CmpNE64) {
1958//.. HReg hi1, hi2, lo1, lo2;
1959//.. HReg tHi = newVRegI(env);
1960//.. HReg tLo = newVRegI(env);
1961//.. iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
1962//.. iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
1963//.. addInstr(env, mk_iMOVsd_RR(hi1, tHi));
1964//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
1965//.. addInstr(env, mk_iMOVsd_RR(lo1, tLo));
1966//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
1967//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
1968//.. switch (e->Iex.Binop.op) {
1969//.. case Iop_CmpNE64: return Xcc_NZ;
1970//.. default: vpanic("iselCondCode(x86): CmpXX64");
1971//.. }
1972//.. }
sewardja5bd0af2005-03-24 20:40:12 +00001973
sewardj05b3b6a2005-02-04 01:44:33 +00001974 ppIRExpr(e);
1975 vpanic("iselCondCode(amd64)");
1976}
1977
1978
sewardj9b967672005-02-08 11:13:09 +00001979/*---------------------------------------------------------*/
1980/*--- ISEL: Integer expressions (128 bit) ---*/
1981/*---------------------------------------------------------*/
1982
1983/* Compute a 128-bit value into a register pair, which is returned as
1984 the first two parameters. As with iselIntExpr_R, these may be
1985 either real or virtual regs; in any case they must not be changed
1986 by subsequent code emitted by the caller. */
1987
1988static void iselInt128Expr ( HReg* rHi, HReg* rLo,
1989 ISelEnv* env, IRExpr* e )
1990{
1991 iselInt128Expr_wrk(rHi, rLo, env, e);
1992# if 0
1993 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1994# endif
1995 vassert(hregClass(*rHi) == HRcInt64);
1996 vassert(hregIsVirtual(*rHi));
1997 vassert(hregClass(*rLo) == HRcInt64);
1998 vassert(hregIsVirtual(*rLo));
1999}
2000
2001/* DO NOT CALL THIS DIRECTLY ! */
2002static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2003 ISelEnv* env, IRExpr* e )
2004{
sewardja3e98302005-02-01 15:55:05 +00002005//.. HWord fn = 0; /* helper fn for most SIMD64 stuff */
sewardj9b967672005-02-08 11:13:09 +00002006 vassert(e);
2007 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2008
sewardja3e98302005-02-01 15:55:05 +00002009//.. /* 64-bit literal */
2010//.. if (e->tag == Iex_Const) {
2011//.. ULong w64 = e->Iex.Const.con->Ico.U64;
2012//.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
2013//.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
2014//.. HReg tLo = newVRegI(env);
2015//.. HReg tHi = newVRegI(env);
2016//.. vassert(e->Iex.Const.con->tag == Ico_U64);
2017//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2018//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2019//.. *rHi = tHi;
2020//.. *rLo = tLo;
2021//.. return;
2022//.. }
sewardj9b967672005-02-08 11:13:09 +00002023
2024 /* read 128-bit IRTemp */
2025 if (e->tag == Iex_Tmp) {
2026 lookupIRTemp128( rHi, rLo, env, e->Iex.Tmp.tmp);
2027 return;
2028 }
2029
sewardja3e98302005-02-01 15:55:05 +00002030//.. /* 64-bit load */
2031//.. if (e->tag == Iex_LDle) {
2032//.. HReg tLo, tHi;
2033//.. X86AMode *am0, *am4;
2034//.. vassert(e->Iex.LDle.ty == Ity_I64);
2035//.. tLo = newVRegI(env);
2036//.. tHi = newVRegI(env);
2037//.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2038//.. am4 = advance4(am0);
2039//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2040//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2041//.. *rHi = tHi;
2042//.. *rLo = tLo;
2043//.. return;
2044//.. }
2045//..
2046//.. /* 64-bit GET */
2047//.. if (e->tag == Iex_Get) {
2048//.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2049//.. X86AMode* am4 = advance4(am);
2050//.. HReg tLo = newVRegI(env);
2051//.. HReg tHi = newVRegI(env);
2052//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2053//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2054//.. *rHi = tHi;
2055//.. *rLo = tLo;
2056//.. return;
2057//.. }
2058//..
2059//.. /* 64-bit GETI */
2060//.. if (e->tag == Iex_GetI) {
2061//.. X86AMode* am
2062//.. = genGuestArrayOffset( env, e->Iex.GetI.descr,
2063//.. e->Iex.GetI.ix, e->Iex.GetI.bias );
2064//.. X86AMode* am4 = advance4(am);
2065//.. HReg tLo = newVRegI(env);
2066//.. HReg tHi = newVRegI(env);
2067//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2068//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2069//.. *rHi = tHi;
2070//.. *rLo = tLo;
2071//.. return;
2072//.. }
2073//..
2074//.. /* 64-bit Mux0X */
2075//.. if (e->tag == Iex_Mux0X) {
2076//.. HReg e0Lo, e0Hi, eXLo, eXHi, r8;
2077//.. HReg tLo = newVRegI(env);
2078//.. HReg tHi = newVRegI(env);
2079//.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2080//.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2081//.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2082//.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2083//.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2084//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
2085//.. /* This assumes the first cmov32 doesn't trash the condition
2086//.. codes, so they are still available for the second cmov32 */
2087//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2088//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2089//.. *rHi = tHi;
2090//.. *rLo = tLo;
2091//.. return;
2092//.. }
sewardj9b967672005-02-08 11:13:09 +00002093
2094 /* --------- BINARY ops --------- */
2095 if (e->tag == Iex_Binop) {
2096 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002097 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002098 case Iop_MullU64:
2099 case Iop_MullS64: {
2100 /* get one operand into %rax, and the other into a R/M.
2101 Need to make an educated guess about which is better in
2102 which. */
2103 HReg tLo = newVRegI(env);
2104 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002105 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002106 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2107 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2108 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
2109 addInstr(env, AMD64Instr_MulL(syned, 8, rmLeft));
2110 /* Result is now in RDX:RAX. Tell the caller. */
2111 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2112 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2113 *rHi = tHi;
2114 *rLo = tLo;
2115 return;
2116 }
sewardj7de0d3c2005-02-13 02:26:41 +00002117
sewardja6b93d12005-02-17 09:28:28 +00002118 /* 128 x 64 -> (64(rem),64(div)) division */
2119 case Iop_DivModU128to64:
2120 case Iop_DivModS128to64: {
2121 /* Get the 128-bit operand into rdx:rax, and the other into
2122 any old R/M. */
2123 HReg sHi, sLo;
2124 HReg tLo = newVRegI(env);
2125 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002126 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002127 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2128 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2129 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2130 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2131 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2132 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2133 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2134 *rHi = tHi;
2135 *rLo = tLo;
2136 return;
2137 }
2138
2139 /* 64HLto128(e1,e2) */
2140 case Iop_64HLto128:
2141 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2142 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2143 return;
2144
sewardja3e98302005-02-01 15:55:05 +00002145//.. /* Or64/And64/Xor64 */
2146//.. case Iop_Or64:
2147//.. case Iop_And64:
2148//.. case Iop_Xor64: {
2149//.. HReg xLo, xHi, yLo, yHi;
2150//.. HReg tLo = newVRegI(env);
2151//.. HReg tHi = newVRegI(env);
2152//.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2153//.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2154//.. : Xalu_XOR;
2155//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2156//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2157//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2158//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2159//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2160//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2161//.. *rHi = tHi;
2162//.. *rLo = tLo;
2163//.. return;
2164//.. }
2165//..
2166//.. /* Add64/Sub64 */
2167//.. case Iop_Add64:
2168//.. case Iop_Sub64: {
2169//.. HReg xLo, xHi, yLo, yHi;
2170//.. HReg tLo = newVRegI(env);
2171//.. HReg tHi = newVRegI(env);
2172//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2173//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2174//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2175//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2176//.. if (e->Iex.Binop.op==Iop_Add64) {
2177//.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2178//.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2179//.. } else {
2180//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2181//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2182//.. }
2183//.. *rHi = tHi;
2184//.. *rLo = tLo;
2185//.. return;
2186//.. }
2187//..
2188//.. /* 32HLto64(e1,e2) */
2189//.. case Iop_32HLto64:
2190//.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2191//.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2192//.. return;
2193//..
2194//.. /* 64-bit shifts */
2195//.. case Iop_Shl64: {
2196//.. /* We use the same ingenious scheme as gcc. Put the value
2197//.. to be shifted into %hi:%lo, and the shift amount into
2198//.. %cl. Then (dsts on right, a la ATT syntax):
2199//..
2200//.. shldl %cl, %lo, %hi -- make %hi be right for the
2201//.. -- shift amt %cl % 32
2202//.. shll %cl, %lo -- make %lo be right for the
2203//.. -- shift amt %cl % 32
2204//..
2205//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2206//.. we have to do a fixup, which puts the result low half
2207//.. into the result high half, and zeroes the low half:
2208//..
2209//.. testl $32, %ecx
2210//..
2211//.. cmovnz %lo, %hi
2212//.. movl $0, %tmp -- sigh; need yet another reg
2213//.. cmovnz %tmp, %lo
2214//.. */
2215//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2216//.. tLo = newVRegI(env);
2217//.. tHi = newVRegI(env);
2218//.. tTemp = newVRegI(env);
2219//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2220//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2221//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2222//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2223//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2224//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2225//.. and those regs are legitimately modifiable. */
2226//.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2227//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
2228//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2229//.. X86RM_Reg(hregX86_ECX())));
2230//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2231//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2232//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2233//.. *rHi = tHi;
2234//.. *rLo = tLo;
2235//.. return;
2236//.. }
2237//..
2238//.. case Iop_Shr64: {
2239//.. /* We use the same ingenious scheme as gcc. Put the value
2240//.. to be shifted into %hi:%lo, and the shift amount into
2241//.. %cl. Then:
2242//..
2243//.. shrdl %cl, %hi, %lo -- make %lo be right for the
2244//.. -- shift amt %cl % 32
2245//.. shrl %cl, %hi -- make %hi be right for the
2246//.. -- shift amt %cl % 32
2247//..
2248//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2249//.. we have to do a fixup, which puts the result high half
2250//.. into the result low half, and zeroes the high half:
2251//..
2252//.. testl $32, %ecx
2253//..
2254//.. cmovnz %hi, %lo
2255//.. movl $0, %tmp -- sigh; need yet another reg
2256//.. cmovnz %tmp, %hi
2257//.. */
2258//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2259//.. tLo = newVRegI(env);
2260//.. tHi = newVRegI(env);
2261//.. tTemp = newVRegI(env);
2262//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2263//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2264//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2265//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2266//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2267//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2268//.. and those regs are legitimately modifiable. */
2269//.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2270//.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
2271//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2272//.. X86RM_Reg(hregX86_ECX())));
2273//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2274//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2275//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2276//.. *rHi = tHi;
2277//.. *rLo = tLo;
2278//.. return;
2279//.. }
2280//..
2281//.. /* F64 -> I64 */
2282//.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2283//.. case. Unfortunately I see no easy way to avoid the
2284//.. duplication. */
2285//.. case Iop_F64toI64: {
2286//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2287//.. HReg tLo = newVRegI(env);
2288//.. HReg tHi = newVRegI(env);
2289//..
2290//.. /* Used several times ... */
2291//.. /* Careful ... this sharing is only safe because
2292//.. zero_esp/four_esp do not hold any registers which the
2293//.. register allocator could attempt to swizzle later. */
2294//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2295//.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2296//..
2297//.. /* rf now holds the value to be converted, and rrm holds
2298//.. the rounding mode value, encoded as per the
2299//.. IRRoundingMode enum. The first thing to do is set the
2300//.. FPU's rounding mode accordingly. */
2301//..
2302//.. /* Create a space for the format conversion. */
2303//.. /* subl $8, %esp */
2304//.. sub_from_esp(env, 8);
2305//..
2306//.. /* Set host rounding mode */
2307//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2308//..
2309//.. /* gistll %rf, 0(%esp) */
2310//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2311//..
2312//.. /* movl 0(%esp), %dstLo */
2313//.. /* movl 4(%esp), %dstHi */
2314//.. addInstr(env, X86Instr_Alu32R(
2315//.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2316//.. addInstr(env, X86Instr_Alu32R(
2317//.. Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2318//..
2319//.. /* Restore default FPU rounding. */
2320//.. set_FPU_rounding_default( env );
2321//..
2322//.. /* addl $8, %esp */
2323//.. add_to_esp(env, 8);
2324//..
2325//.. *rHi = tHi;
2326//.. *rLo = tLo;
2327//.. return;
2328//.. }
2329//..
2330//.. case Iop_Add8x8:
2331//.. fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2332//.. case Iop_Add16x4:
2333//.. fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2334//.. case Iop_Add32x2:
2335//.. fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2336//..
2337//.. case Iop_Avg8Ux8:
2338//.. fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2339//.. case Iop_Avg16Ux4:
2340//.. fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2341//..
2342//.. case Iop_CmpEQ8x8:
2343//.. fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2344//.. case Iop_CmpEQ16x4:
2345//.. fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2346//.. case Iop_CmpEQ32x2:
2347//.. fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2348//..
2349//.. case Iop_CmpGT8Sx8:
2350//.. fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2351//.. case Iop_CmpGT16Sx4:
2352//.. fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2353//.. case Iop_CmpGT32Sx2:
2354//.. fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2355//..
2356//.. case Iop_InterleaveHI8x8:
2357//.. fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2358//.. case Iop_InterleaveLO8x8:
2359//.. fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2360//.. case Iop_InterleaveHI16x4:
2361//.. fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2362//.. case Iop_InterleaveLO16x4:
2363//.. fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2364//.. case Iop_InterleaveHI32x2:
2365//.. fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2366//.. case Iop_InterleaveLO32x2:
2367//.. fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2368//..
2369//.. case Iop_Max8Ux8:
2370//.. fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2371//.. case Iop_Max16Sx4:
2372//.. fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2373//.. case Iop_Min8Ux8:
2374//.. fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2375//.. case Iop_Min16Sx4:
2376//.. fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2377//..
2378//.. case Iop_Mul16x4:
2379//.. fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2380//.. case Iop_MulHi16Sx4:
2381//.. fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2382//.. case Iop_MulHi16Ux4:
2383//.. fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2384//..
2385//.. case Iop_QAdd8Sx8:
2386//.. fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2387//.. case Iop_QAdd16Sx4:
2388//.. fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2389//.. case Iop_QAdd8Ux8:
2390//.. fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2391//.. case Iop_QAdd16Ux4:
2392//.. fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2393//..
2394//.. case Iop_QNarrow32Sx2:
2395//.. fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish;
2396//.. case Iop_QNarrow16Sx4:
2397//.. fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish;
2398//.. case Iop_QNarrow16Ux4:
2399//.. fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish;
2400//..
2401//.. case Iop_QSub8Sx8:
2402//.. fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2403//.. case Iop_QSub16Sx4:
2404//.. fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2405//.. case Iop_QSub8Ux8:
2406//.. fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2407//.. case Iop_QSub16Ux4:
2408//.. fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2409//..
2410//.. case Iop_Sub8x8:
2411//.. fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2412//.. case Iop_Sub16x4:
2413//.. fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2414//.. case Iop_Sub32x2:
2415//.. fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2416//..
2417//.. binnish: {
2418//.. /* Note: the following assumes all helpers are of
2419//.. signature
2420//.. ULong fn ( ULong, ULong ), and they are
2421//.. not marked as regparm functions.
2422//.. */
2423//.. HReg xLo, xHi, yLo, yHi;
2424//.. HReg tLo = newVRegI(env);
2425//.. HReg tHi = newVRegI(env);
2426//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2427//.. addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2428//.. addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2429//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2430//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2431//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2432//.. addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2433//.. add_to_esp(env, 4*4);
2434//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2435//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2436//.. *rHi = tHi;
2437//.. *rLo = tLo;
2438//.. return;
2439//.. }
2440//..
2441//.. case Iop_ShlN32x2:
2442//.. fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2443//.. case Iop_ShlN16x4:
2444//.. fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2445//.. case Iop_ShrN32x2:
2446//.. fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2447//.. case Iop_ShrN16x4:
2448//.. fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2449//.. case Iop_SarN32x2:
2450//.. fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2451//.. case Iop_SarN16x4:
2452//.. fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2453//.. shifty: {
2454//.. /* Note: the following assumes all helpers are of
2455//.. signature
2456//.. ULong fn ( ULong, UInt ), and they are
2457//.. not marked as regparm functions.
2458//.. */
2459//.. HReg xLo, xHi;
2460//.. HReg tLo = newVRegI(env);
2461//.. HReg tHi = newVRegI(env);
2462//.. X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2463//.. addInstr(env, X86Instr_Push(y));
2464//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2465//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2466//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2467//.. addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2468//.. add_to_esp(env, 3*4);
2469//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2470//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2471//.. *rHi = tHi;
2472//.. *rLo = tLo;
2473//.. return;
2474//.. }
sewardj9b967672005-02-08 11:13:09 +00002475
2476 default:
2477 break;
2478 }
2479 } /* if (e->tag == Iex_Binop) */
2480
2481
sewardja3e98302005-02-01 15:55:05 +00002482//.. /* --------- UNARY ops --------- */
2483//.. if (e->tag == Iex_Unop) {
2484//.. switch (e->Iex.Unop.op) {
2485//..
2486//.. /* 32Sto64(e) */
2487//.. case Iop_32Sto64: {
2488//.. HReg tLo = newVRegI(env);
2489//.. HReg tHi = newVRegI(env);
2490//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2491//.. addInstr(env, mk_iMOVsd_RR(src,tHi));
2492//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2493//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
2494//.. *rHi = tHi;
2495//.. *rLo = tLo;
2496//.. return;
2497//.. }
2498//..
2499//.. /* 32Uto64(e) */
2500//.. case Iop_32Uto64: {
2501//.. HReg tLo = newVRegI(env);
2502//.. HReg tHi = newVRegI(env);
2503//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2504//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2505//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2506//.. *rHi = tHi;
2507//.. *rLo = tLo;
2508//.. return;
2509//.. }
sewardj1a01e652005-02-23 11:39:21 +00002510
sewardja3e98302005-02-01 15:55:05 +00002511//.. /* could do better than this, but for now ... */
2512//.. case Iop_1Sto64: {
2513//.. HReg tLo = newVRegI(env);
2514//.. HReg tHi = newVRegI(env);
2515//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2516//.. addInstr(env, X86Instr_Set32(cond,tLo));
2517//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
2518//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
2519//.. addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2520//.. *rHi = tHi;
2521//.. *rLo = tLo;
2522//.. return;
2523//.. }
2524//..
2525//.. /* Not64(e) */
2526//.. case Iop_Not64: {
2527//.. HReg tLo = newVRegI(env);
2528//.. HReg tHi = newVRegI(env);
2529//.. HReg sHi, sLo;
2530//.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2531//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2532//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2533//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
2534//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
2535//.. *rHi = tHi;
2536//.. *rLo = tLo;
2537//.. return;
2538//.. }
2539//..
sewardja3e98302005-02-01 15:55:05 +00002540//.. case Iop_CmpNEZ32x2:
2541//.. fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2542//.. case Iop_CmpNEZ16x4:
2543//.. fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2544//.. case Iop_CmpNEZ8x8:
2545//.. fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2546//.. unish: {
2547//.. /* Note: the following assumes all helpers are of
2548//.. signature
2549//.. ULong fn ( ULong ), and they are
2550//.. not marked as regparm functions.
2551//.. */
2552//.. HReg xLo, xHi;
2553//.. HReg tLo = newVRegI(env);
2554//.. HReg tHi = newVRegI(env);
2555//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2556//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2557//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2558//.. addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2559//.. add_to_esp(env, 2*4);
2560//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2561//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2562//.. *rHi = tHi;
2563//.. *rLo = tLo;
2564//.. return;
2565//.. }
2566//..
2567//.. default:
2568//.. break;
2569//.. }
2570//.. } /* if (e->tag == Iex_Unop) */
2571//..
2572//..
2573//.. /* --------- CCALL --------- */
2574//.. if (e->tag == Iex_CCall) {
2575//.. HReg tLo = newVRegI(env);
2576//.. HReg tHi = newVRegI(env);
2577//..
2578//.. /* Marshal args, do the call, clear stack. */
2579//.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2580//..
2581//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2582//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2583//.. *rHi = tHi;
2584//.. *rLo = tLo;
2585//.. return;
2586//.. }
sewardj9b967672005-02-08 11:13:09 +00002587
2588 ppIRExpr(e);
2589 vpanic("iselInt128Expr");
2590}
2591
2592
sewardj8d965312005-02-25 02:48:47 +00002593/*---------------------------------------------------------*/
2594/*--- ISEL: Floating point expressions (32 bit) ---*/
2595/*---------------------------------------------------------*/
2596
2597/* Nothing interesting here; really just wrappers for
2598 64-bit stuff. */
2599
2600static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2601{
2602 HReg r = iselFltExpr_wrk( env, e );
2603# if 0
2604 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2605# endif
2606 vassert(hregClass(r) == HRcVec128);
2607 vassert(hregIsVirtual(r));
2608 return r;
2609}
2610
2611/* DO NOT CALL THIS DIRECTLY */
2612static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2613{
2614 IRType ty = typeOfIRExpr(env->type_env,e);
2615 vassert(ty == Ity_F32);
2616
sewardjc49ce232005-02-25 13:03:03 +00002617 if (e->tag == Iex_Tmp) {
2618 return lookupIRTemp(env, e->Iex.Tmp.tmp);
2619 }
2620
2621 if (e->tag == Iex_LDle) {
2622 AMD64AMode* am;
2623 HReg res = newVRegV(env);
2624 vassert(e->Iex.LDle.ty == Ity_F32);
2625 am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2626 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2627 return res;
2628 }
sewardj8d965312005-02-25 02:48:47 +00002629
2630 if (e->tag == Iex_Binop
2631 && e->Iex.Binop.op == Iop_F64toF32) {
2632 /* Although the result is still held in a standard SSE register,
2633 we need to round it to reflect the loss of accuracy/range
2634 entailed in casting it to a 32-bit float. */
2635 HReg dst = newVRegV(env);
2636 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2637 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2638 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2639 set_SSE_rounding_default( env );
2640 return dst;
2641 }
2642
sewardjc49ce232005-02-25 13:03:03 +00002643 if (e->tag == Iex_Get) {
2644 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2645 hregAMD64_RBP() );
2646 HReg res = newVRegV(env);
2647 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2648 return res;
2649 }
2650
sewardja3e98302005-02-01 15:55:05 +00002651//.. if (e->tag == Iex_Unop
2652//.. && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2653//.. /* Given an I32, produce an IEEE754 float with the same bit
2654//.. pattern. */
2655//.. HReg dst = newVRegF(env);
2656//.. X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2657//.. /* paranoia */
2658//.. addInstr(env, X86Instr_Push(rmi));
2659//.. addInstr(env, X86Instr_FpLdSt(
2660//.. True/*load*/, 4, dst,
2661//.. X86AMode_IR(0, hregX86_ESP())));
2662//.. add_to_esp(env, 4);
2663//.. return dst;
2664//.. }
sewardj8d965312005-02-25 02:48:47 +00002665
2666 ppIRExpr(e);
2667 vpanic("iselFltExpr_wrk");
2668}
sewardj18303862005-02-21 12:36:54 +00002669
2670
2671/*---------------------------------------------------------*/
2672/*--- ISEL: Floating point expressions (64 bit) ---*/
2673/*---------------------------------------------------------*/
2674
2675/* Compute a 64-bit floating point value into the lower half of an xmm
2676 register, the identity of which is returned. As with
2677 iselIntExpr_R, the returned reg will be virtual, and it must not be
2678 changed by subsequent code emitted by the caller.
2679*/
2680
2681/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2682
2683 Type S (1 bit) E (11 bits) F (52 bits)
2684 ---- --------- ----------- -----------
2685 signalling NaN u 2047 (max) .0uuuuu---u
2686 (with at least
2687 one 1 bit)
2688 quiet NaN u 2047 (max) .1uuuuu---u
2689
2690 negative infinity 1 2047 (max) .000000---0
2691
2692 positive infinity 0 2047 (max) .000000---0
2693
2694 negative zero 1 0 .000000---0
2695
2696 positive zero 0 0 .000000---0
2697*/
2698
2699static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2700{
2701 HReg r = iselDblExpr_wrk( env, e );
2702# if 0
2703 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2704# endif
2705 vassert(hregClass(r) == HRcVec128);
2706 vassert(hregIsVirtual(r));
2707 return r;
2708}
2709
2710/* DO NOT CALL THIS DIRECTLY */
2711static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2712{
2713 IRType ty = typeOfIRExpr(env->type_env,e);
2714 vassert(e);
2715 vassert(ty == Ity_F64);
2716
2717 if (e->tag == Iex_Tmp) {
2718 return lookupIRTemp(env, e->Iex.Tmp.tmp);
2719 }
2720
sewardj8d965312005-02-25 02:48:47 +00002721 if (e->tag == Iex_Const) {
2722 union { ULong u64; Double f64; } u;
2723 HReg res = newVRegV(env);
2724 HReg tmp = newVRegI(env);
2725 vassert(sizeof(u) == 8);
2726 vassert(sizeof(u.u64) == 8);
2727 vassert(sizeof(u.f64) == 8);
2728
2729 if (e->Iex.Const.con->tag == Ico_F64) {
2730 u.f64 = e->Iex.Const.con->Ico.F64;
2731 }
2732 else if (e->Iex.Const.con->tag == Ico_F64i) {
2733 u.u64 = e->Iex.Const.con->Ico.F64i;
2734 }
2735 else
2736 vpanic("iselDblExpr(amd64): const");
2737
2738 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2739 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2740 addInstr(env, AMD64Instr_SseLdSt(
2741 True/*load*/, 8, res,
2742 AMD64AMode_IR(0, hregAMD64_RSP())
2743 ));
2744 add_to_rsp(env, 8);
2745 return res;
2746 }
sewardj9da16972005-02-21 13:58:26 +00002747
2748 if (e->tag == Iex_LDle) {
2749 AMD64AMode* am;
2750 HReg res = newVRegV(env);
2751 vassert(e->Iex.LDle.ty == Ity_F64);
2752 am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2753 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2754 return res;
2755 }
sewardj18303862005-02-21 12:36:54 +00002756
2757 if (e->tag == Iex_Get) {
2758 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2759 hregAMD64_RBP() );
2760 HReg res = newVRegV(env);
2761 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2762 return res;
2763 }
2764
sewardj8d965312005-02-25 02:48:47 +00002765 if (e->tag == Iex_GetI) {
2766 AMD64AMode* am
2767 = genGuestArrayOffset(
2768 env, e->Iex.GetI.descr,
2769 e->Iex.GetI.ix, e->Iex.GetI.bias );
2770 HReg res = newVRegV(env);
2771 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2772 return res;
2773 }
2774
sewardj137015d2005-03-27 04:01:15 +00002775 if (e->tag == Iex_Binop) {
2776 AMD64SseOp op = Asse_INVALID;
2777 switch (e->Iex.Binop.op) {
2778 case Iop_AddF64: op = Asse_ADDF; break;
2779 case Iop_SubF64: op = Asse_SUBF; break;
2780 case Iop_MulF64: op = Asse_MULF; break;
2781 case Iop_DivF64: op = Asse_DIVF; break;
2782 default: break;
2783 }
2784 if (op != Asse_INVALID) {
2785 HReg dst = newVRegV(env);
2786 HReg argL = iselDblExpr(env, e->Iex.Binop.arg1);
2787 HReg argR = iselDblExpr(env, e->Iex.Binop.arg2);
2788 addInstr(env, mk_vMOVsd_RR(argL, dst));
2789 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2790 return dst;
2791 }
2792 }
2793
sewardja3e98302005-02-01 15:55:05 +00002794//.. if (e->tag == Iex_Binop) {
2795//.. X86FpOp fpop = Xfp_INVALID;
2796//.. switch (e->Iex.Binop.op) {
2797//.. case Iop_AddF64: fpop = Xfp_ADD; break;
2798//.. case Iop_SubF64: fpop = Xfp_SUB; break;
2799//.. case Iop_MulF64: fpop = Xfp_MUL; break;
2800//.. case Iop_DivF64: fpop = Xfp_DIV; break;
2801//.. case Iop_ScaleF64: fpop = Xfp_SCALE; break;
2802//.. case Iop_AtanF64: fpop = Xfp_ATAN; break;
2803//.. case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
2804//.. case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
2805//.. case Iop_PRemF64: fpop = Xfp_PREM; break;
2806//.. case Iop_PRem1F64: fpop = Xfp_PREM1; break;
2807//.. default: break;
2808//.. }
2809//.. if (fpop != Xfp_INVALID) {
2810//.. HReg res = newVRegF(env);
2811//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
2812//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
2813//.. addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
2814//.. if (fpop != Xfp_ADD && fpop != Xfp_SUB
2815//.. && fpop != Xfp_MUL && fpop != Xfp_DIV)
2816//.. roundToF64(env, res);
2817//.. return res;
2818//.. }
2819//.. }
2820//..
2821//.. if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) {
2822//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2823//.. HReg dst = newVRegF(env);
2824//..
2825//.. /* rf now holds the value to be rounded. The first thing to do
2826//.. is set the FPU's rounding mode accordingly. */
2827//..
2828//.. /* Set host rounding mode */
2829//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2830//..
2831//.. /* grndint %rf, %dst */
2832//.. addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2833//..
2834//.. /* Restore default FPU rounding. */
2835//.. set_FPU_rounding_default( env );
2836//..
2837//.. return dst;
2838//.. }
sewardj1a01e652005-02-23 11:39:21 +00002839
2840 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64toF64) {
2841 HReg dst = newVRegV(env);
2842 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2843 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2844 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2845 set_SSE_rounding_default( env );
2846 return dst;
2847 }
2848
2849 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32toF64) {
2850 HReg dst = newVRegV(env);
2851 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2852 set_SSE_rounding_default( env );
2853 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2854 return dst;
2855 }
2856
sewardj137015d2005-03-27 04:01:15 +00002857 if (e->tag == Iex_Unop
2858 && (e->Iex.Unop.op == Iop_NegF64
2859 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00002860 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00002861 /* Get the 128-bit literal 00---0 10---0 into a register
2862 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00002863 HReg r1 = newVRegI(env);
2864 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00002865 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00002866 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2867 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00002868 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00002869 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2870 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
2871 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00002872 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
2873
2874 if (e->Iex.Unop.op == Iop_NegF64)
2875 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
2876 else
2877 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
2878
sewardj8d965312005-02-25 02:48:47 +00002879 add_to_rsp(env, 16);
2880 return dst;
2881 }
2882
sewardja3e98302005-02-01 15:55:05 +00002883//.. if (e->tag == Iex_Unop) {
2884//.. X86FpOp fpop = Xfp_INVALID;
2885//.. switch (e->Iex.Unop.op) {
2886//.. case Iop_NegF64: fpop = Xfp_NEG; break;
2887//.. case Iop_AbsF64: fpop = Xfp_ABS; break;
2888//.. case Iop_SqrtF64: fpop = Xfp_SQRT; break;
2889//.. case Iop_SinF64: fpop = Xfp_SIN; break;
2890//.. case Iop_CosF64: fpop = Xfp_COS; break;
2891//.. case Iop_TanF64: fpop = Xfp_TAN; break;
2892//.. case Iop_2xm1F64: fpop = Xfp_2XM1; break;
2893//.. default: break;
2894//.. }
2895//.. if (fpop != Xfp_INVALID) {
2896//.. HReg res = newVRegF(env);
2897//.. HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2898//.. addInstr(env, X86Instr_FpUnary(fpop,src,res));
2899//.. if (fpop != Xfp_SQRT
2900//.. && fpop != Xfp_NEG && fpop != Xfp_ABS)
2901//.. roundToF64(env, res);
2902//.. return res;
2903//.. }
2904//.. }
sewardjc49ce232005-02-25 13:03:03 +00002905
2906 if (e->tag == Iex_Unop) {
2907 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00002908//.. case Iop_I32toF64: {
2909//.. HReg dst = newVRegF(env);
2910//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
2911//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
2912//.. set_FPU_rounding_default(env);
2913//.. addInstr(env, X86Instr_FpLdStI(
2914//.. True/*load*/, 4, dst,
2915//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00002916//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00002917//.. return dst;
2918//.. }
sewardj924215b2005-03-26 21:50:31 +00002919 case Iop_ReinterpI64asF64: {
2920 /* Given an I64, produce an IEEE754 double with the same
2921 bit pattern. */
2922 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2923 HReg dst = newVRegV(env);
2924 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
2925 /* paranoia */
2926 set_SSE_rounding_default(env);
2927 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
2928 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2929 return dst;
2930 }
sewardjc49ce232005-02-25 13:03:03 +00002931 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00002932 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00002933 HReg f64 = newVRegV(env);
2934 /* this shouldn't be necessary, but be paranoid ... */
2935 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00002936 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00002937 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
2938 return f64;
2939 }
2940 default:
2941 break;
2942 }
2943 }
sewardj8d965312005-02-25 02:48:47 +00002944
2945 /* --------- MULTIPLEX --------- */
2946 if (e->tag == Iex_Mux0X) {
2947 HReg r8, rX, r0, dst;
2948 vassert(ty == Ity_F64);
2949 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
2950 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2951 rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
2952 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
2953 dst = newVRegV(env);
2954 addInstr(env, mk_vMOVsd_RR(rX,dst));
2955 addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(0xFF), AMD64RM_Reg(r8)));
2956 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
2957 return dst;
2958 }
sewardj18303862005-02-21 12:36:54 +00002959
2960 ppIRExpr(e);
2961 vpanic("iselDblExpr_wrk");
2962}
sewardjc2bcb6f2005-02-07 00:17:12 +00002963
sewardj0852a132005-02-21 08:28:46 +00002964
2965/*---------------------------------------------------------*/
2966/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
2967/*---------------------------------------------------------*/
2968
2969static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
2970{
2971 HReg r = iselVecExpr_wrk( env, e );
2972# if 0
2973 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2974# endif
2975 vassert(hregClass(r) == HRcVec128);
2976 vassert(hregIsVirtual(r));
2977 return r;
2978}
2979
2980
2981/* DO NOT CALL THIS DIRECTLY */
2982static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
2983{
sewardj9da16972005-02-21 13:58:26 +00002984 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00002985 AMD64SseOp op = Asse_INVALID;
2986 IRType ty = typeOfIRExpr(env->type_env,e);
2987 vassert(e);
2988 vassert(ty == Ity_V128);
2989
2990 if (e->tag == Iex_Tmp) {
2991 return lookupIRTemp(env, e->Iex.Tmp.tmp);
2992 }
2993
2994 if (e->tag == Iex_Get) {
2995 HReg dst = newVRegV(env);
2996 addInstr(env, AMD64Instr_SseLdSt(
2997 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00002998 16,
sewardj0852a132005-02-21 08:28:46 +00002999 dst,
3000 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3001 )
3002 );
3003 return dst;
3004 }
3005
sewardj1a01e652005-02-23 11:39:21 +00003006 if (e->tag == Iex_LDle) {
3007 HReg dst = newVRegV(env);
3008 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
3009 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3010 return dst;
3011 }
3012
3013 if (e->tag == Iex_Const) {
3014 HReg dst = newVRegV(env);
3015 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj8d965312005-02-25 02:48:47 +00003016 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
sewardj1a01e652005-02-23 11:39:21 +00003017 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
3018 return dst;
sewardj8d965312005-02-25 02:48:47 +00003019 } else
3020 if (e->Iex.Const.con->Ico.V128 == 0x00FF) {
3021 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3022 /* Both of these literals are sign-extended to 64 bits. */
3023 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3024 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0xFFFFFFFF)));
3025 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3026 add_to_rsp(env, 16);
3027 return dst;
sewardj3aba9eb2005-03-30 23:20:47 +00003028 } else
3029 if (e->Iex.Const.con->Ico.V128 == 0x000F) {
3030 HReg tmp = newVRegI(env);
3031 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3032 addInstr(env, AMD64Instr_Imm64(0xFFFFFFFFULL, tmp));
3033 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3034 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
3035 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3036 add_to_rsp(env, 16);
3037 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003038 } else {
3039 goto vec_fail;
sewardj8d965312005-02-25 02:48:47 +00003040# if 0
3041 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3042 return dst;
3043# endif
sewardj1a01e652005-02-23 11:39:21 +00003044 }
3045 }
sewardj0852a132005-02-21 08:28:46 +00003046
3047 if (e->tag == Iex_Unop) {
3048 switch (e->Iex.Unop.op) {
3049
sewardj8d965312005-02-25 02:48:47 +00003050 case Iop_NotV128: {
3051 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3052 return do_sse_NotV128(env, arg);
3053 }
3054
sewardja3e98302005-02-01 15:55:05 +00003055//.. case Iop_CmpNEZ64x2: {
3056//.. /* We can use SSE2 instructions for this. */
3057//.. /* Ideally, we want to do a 64Ix2 comparison against zero of
3058//.. the operand. Problem is no such insn exists. Solution
3059//.. therefore is to do a 32Ix4 comparison instead, and bitwise-
3060//.. negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3061//.. let the not'd result of this initial comparison be a:b:c:d.
3062//.. What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3063//.. pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3064//.. giving the required result.
3065//..
3066//.. The required selection sequence is 2,3,0,1, which
3067//.. according to Intel's documentation means the pshufd
3068//.. literal value is 0xB1, that is,
3069//.. (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3070//.. */
3071//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3072//.. HReg tmp = newVRegV(env);
3073//.. HReg dst = newVRegV(env);
3074//.. REQUIRE_SSE2;
3075//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3076//.. addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3077//.. tmp = do_sse_Not128(env, tmp);
3078//.. addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3079//.. addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3080//.. return dst;
3081//.. }
3082//..
3083//.. case Iop_CmpNEZ32x4: {
3084//.. /* Sigh, we have to generate lousy code since this has to
3085//.. work on SSE1 hosts */
3086//.. /* basically, the idea is: for each lane:
3087//.. movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3088//.. sbbl %r, %r (now %r = 1Sto32(CF))
3089//.. movl %r, lane
3090//.. */
3091//.. Int i;
3092//.. X86AMode* am;
3093//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3094//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3095//.. HReg dst = newVRegV(env);
3096//.. HReg r32 = newVRegI(env);
3097//.. sub_from_esp(env, 16);
3098//.. addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3099//.. for (i = 0; i < 4; i++) {
3100//.. am = X86AMode_IR(i*4, hregX86_ESP());
3101//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3102//.. addInstr(env, X86Instr_Unary32(Xun_NEG, X86RM_Reg(r32)));
3103//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3104//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3105//.. }
3106//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3107//.. add_to_esp(env, 16);
3108//.. return dst;
3109//.. }
3110//..
3111//.. case Iop_CmpNEZ8x16:
3112//.. case Iop_CmpNEZ16x8: {
3113//.. /* We can use SSE2 instructions for this. */
3114//.. HReg arg;
3115//.. HReg vec0 = newVRegV(env);
3116//.. HReg vec1 = newVRegV(env);
3117//.. HReg dst = newVRegV(env);
3118//.. X86SseOp cmpOp
3119//.. = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3120//.. : Xsse_CMPEQ8;
3121//.. REQUIRE_SSE2;
3122//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3123//.. addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3124//.. addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3125//.. /* defer arg computation to here so as to give CMPEQF as long
3126//.. as possible to complete */
3127//.. arg = iselVecExpr(env, e->Iex.Unop.arg);
3128//.. /* vec0 is all 0s; vec1 is all 1s */
3129//.. addInstr(env, mk_vMOVsd_RR(arg, dst));
3130//.. /* 16x8 or 8x16 comparison == */
3131//.. addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3132//.. /* invert result */
3133//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3134//.. return dst;
3135//.. }
3136//..
3137//.. case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3138//.. case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3139//.. case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
3140//.. do_32Fx4_unary:
3141//.. {
3142//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3143//.. HReg dst = newVRegV(env);
3144//.. addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3145//.. return dst;
3146//.. }
3147//..
3148//.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3149//.. case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
3150//.. case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
3151//.. do_64Fx2_unary:
3152//.. {
3153//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3154//.. HReg dst = newVRegV(env);
3155//.. REQUIRE_SSE2;
3156//.. addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
3157//.. return dst;
3158//.. }
3159//..
3160//.. case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3161//.. case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3162//.. case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3163//.. do_32F0x4_unary:
3164//.. {
3165//.. /* A bit subtle. We have to copy the arg to the result
3166//.. register first, because actually doing the SSE scalar insn
3167//.. leaves the upper 3/4 of the destination register
3168//.. unchanged. Whereas the required semantics of these
3169//.. primops is that the upper 3/4 is simply copied in from the
3170//.. argument. */
3171//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3172//.. HReg dst = newVRegV(env);
3173//.. addInstr(env, mk_vMOVsd_RR(arg, dst));
3174//.. addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3175//.. return dst;
3176//.. }
3177//..
3178//.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3179//.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
sewardj0852a132005-02-21 08:28:46 +00003180 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3181 do_64F0x2_unary:
3182 {
3183 /* A bit subtle. We have to copy the arg to the result
3184 register first, because actually doing the SSE scalar insn
3185 leaves the upper half of the destination register
3186 unchanged. Whereas the required semantics of these
3187 primops is that the upper half is simply copied in from the
3188 argument. */
3189 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3190 HReg dst = newVRegV(env);
3191 addInstr(env, mk_vMOVsd_RR(arg, dst));
3192 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3193 return dst;
3194 }
3195
sewardj8d965312005-02-25 02:48:47 +00003196 case Iop_32UtoV128: {
3197 HReg dst = newVRegV(env);
3198 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3199 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3200 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3201 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3202 return dst;
3203 }
sewardj0852a132005-02-21 08:28:46 +00003204
3205 case Iop_64UtoV128: {
3206 HReg dst = newVRegV(env);
3207 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3208 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3209 addInstr(env, AMD64Instr_Push(rmi));
3210 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3211 add_to_rsp(env, 8);
3212 return dst;
3213 }
3214
3215 default:
3216 break;
3217 } /* switch (e->Iex.Unop.op) */
3218 } /* if (e->tag == Iex_Unop) */
3219
3220 if (e->tag == Iex_Binop) {
3221 switch (e->Iex.Binop.op) {
3222
sewardj18303862005-02-21 12:36:54 +00003223 case Iop_SetV128lo64: {
3224 HReg dst = newVRegV(env);
3225 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3226 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003227 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3228 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3229 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3230 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3231 return dst;
3232 }
3233
3234 case Iop_SetV128lo32: {
3235 HReg dst = newVRegV(env);
3236 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3237 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3238 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3239 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3240 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3241 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003242 return dst;
3243 }
3244
sewardj1a01e652005-02-23 11:39:21 +00003245 case Iop_64HLtoV128: {
3246 AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
3247 HReg dst = newVRegV(env);
3248 /* do this via the stack (easy, convenient, etc) */
3249 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
3250 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
3251 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
3252 add_to_rsp(env, 16);
3253 return dst;
3254 }
3255
sewardja3e98302005-02-01 15:55:05 +00003256//.. case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3257//.. case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3258//.. case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3259//.. case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
3260//.. case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
3261//.. case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3262//.. case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3263//.. case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
3264//.. case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
3265//.. do_32Fx4:
3266//.. {
3267//.. HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3268//.. HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3269//.. HReg dst = newVRegV(env);
3270//.. addInstr(env, mk_vMOVsd_RR(argL, dst));
3271//.. addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3272//.. return dst;
3273//.. }
3274//..
3275//.. case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3276//.. case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3277//.. case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3278//.. case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
3279//.. case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
3280//.. case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3281//.. case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3282//.. case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
3283//.. case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
3284//.. do_64Fx2:
3285//.. {
3286//.. HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3287//.. HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3288//.. HReg dst = newVRegV(env);
3289//.. REQUIRE_SSE2;
3290//.. addInstr(env, mk_vMOVsd_RR(argL, dst));
3291//.. addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3292//.. return dst;
3293//.. }
sewardj8d965312005-02-25 02:48:47 +00003294
sewardja3e98302005-02-01 15:55:05 +00003295//.. case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003296 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardja3e98302005-02-01 15:55:05 +00003297//.. case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003298 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003299 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003300 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3301 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003302 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3303 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3304 do_32F0x4: {
3305 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3306 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3307 HReg dst = newVRegV(env);
3308 addInstr(env, mk_vMOVsd_RR(argL, dst));
3309 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3310 return dst;
3311 }
3312
sewardj137015d2005-03-27 04:01:15 +00003313 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003314 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003315 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003316 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3317 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003318 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003319 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003320 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3321 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3322 do_64F0x2: {
3323 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3324 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3325 HReg dst = newVRegV(env);
3326 addInstr(env, mk_vMOVsd_RR(argL, dst));
3327 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3328 return dst;
3329 }
3330
sewardja3e98302005-02-01 15:55:05 +00003331//.. case Iop_QNarrow32Sx4:
3332//.. op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3333//.. case Iop_QNarrow16Sx8:
3334//.. op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3335//.. case Iop_QNarrow16Ux8:
3336//.. op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3337//..
3338//.. case Iop_InterleaveHI8x16:
3339//.. op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3340//.. case Iop_InterleaveHI16x8:
3341//.. op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3342//.. case Iop_InterleaveHI32x4:
3343//.. op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3344//.. case Iop_InterleaveHI64x2:
3345//.. op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3346//..
3347//.. case Iop_InterleaveLO8x16:
3348//.. op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3349//.. case Iop_InterleaveLO16x8:
3350//.. op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3351//.. case Iop_InterleaveLO32x4:
3352//.. op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3353//.. case Iop_InterleaveLO64x2:
3354//.. op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3355//..
sewardj1a01e652005-02-23 11:39:21 +00003356 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003357 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003358 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardja3e98302005-02-01 15:55:05 +00003359//.. case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3360//.. case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3361//.. case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3362//.. case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3363//.. case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3364//.. case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3365//.. case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3366//.. case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3367//.. case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3368//.. case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3369//.. case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3370//.. case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3371//.. case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3372//.. case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3373//.. case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3374//.. case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3375//.. case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3376//.. case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3377//.. case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3378//.. case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3379//.. case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3380//.. case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3381//.. case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3382//.. case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3383//.. case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3384//.. case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3385//.. case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3386//.. case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3387//.. case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3388//.. case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3389//.. case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003390 do_SseReRg: {
3391 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3392 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3393 HReg dst = newVRegV(env);
3394 if (arg1isEReg) {
3395 goto vec_fail; /* awaiting test case */
3396 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3397 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3398 } else {
3399 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3400 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3401 }
3402 return dst;
3403 }
3404
sewardja3e98302005-02-01 15:55:05 +00003405//.. case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3406//.. case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3407//.. case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3408//.. case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3409//.. case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3410//.. case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3411//.. case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3412//.. case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3413//.. do_SseShift: {
3414//.. HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3415//.. X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3416//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3417//.. HReg ereg = newVRegV(env);
3418//.. HReg dst = newVRegV(env);
3419//.. REQUIRE_SSE2;
3420//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3421//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3422//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3423//.. addInstr(env, X86Instr_Push(rmi));
3424//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
sewardj0852a132005-02-21 08:28:46 +00003425//.. addInstr(env, mk_vMOVsd_RR(greg, dst));
sewardja3e98302005-02-01 15:55:05 +00003426//.. addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3427//.. add_to_esp(env, 16);
3428//.. return dst;
3429//.. }
sewardj0852a132005-02-21 08:28:46 +00003430
3431 default:
3432 break;
3433 } /* switch (e->Iex.Binop.op) */
3434 } /* if (e->tag == Iex_Binop) */
3435
sewardja3e98302005-02-01 15:55:05 +00003436//.. if (e->tag == Iex_Mux0X) {
3437//.. HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3438//.. HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3439//.. HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3440//.. HReg dst = newVRegV(env);
3441//.. addInstr(env, mk_vMOVsd_RR(rX,dst));
3442//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
3443//.. addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
3444//.. return dst;
3445//.. }
3446//..
sewardj9da16972005-02-21 13:58:26 +00003447 vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003448 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
3449 LibVEX_ppVexSubArch(env->subarch));
3450 ppIRExpr(e);
3451 vpanic("iselVecExpr_wrk");
3452}
sewardjc33671d2005-02-01 20:30:00 +00003453
3454
3455/*---------------------------------------------------------*/
3456/*--- ISEL: Statements ---*/
3457/*---------------------------------------------------------*/
3458
3459static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3460{
3461 if (vex_traceflags & VEX_TRACE_VCODE) {
3462 vex_printf("\n-- ");
3463 ppIRStmt(stmt);
3464 vex_printf("\n");
3465 }
3466
3467 switch (stmt->tag) {
3468
sewardj05b3b6a2005-02-04 01:44:33 +00003469 /* --------- STORE --------- */
3470 case Ist_STle: {
3471 AMD64AMode* am;
3472 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.STle.addr);
3473 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.STle.data);
3474 vassert(tya == Ity_I64);
3475 am = iselIntExpr_AMode(env, stmt->Ist.STle.addr);
sewardj31191072005-02-05 18:24:47 +00003476 if (tyd == Ity_I64) {
3477 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.STle.data);
3478 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
3479 return;
3480 }
sewardj05b3b6a2005-02-04 01:44:33 +00003481 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
3482 HReg r = iselIntExpr_R(env, stmt->Ist.STle.data);
sewardj428fabd2005-03-21 03:11:17 +00003483 addInstr(env, AMD64Instr_Store(
3484 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
3485 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00003486 return;
3487 }
sewardj8d965312005-02-25 02:48:47 +00003488 if (tyd == Ity_F64) {
3489 HReg r = iselDblExpr(env, stmt->Ist.STle.data);
3490 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
3491 return;
3492 }
sewardjc49ce232005-02-25 13:03:03 +00003493 if (tyd == Ity_F32) {
3494 HReg r = iselFltExpr(env, stmt->Ist.STle.data);
3495 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
3496 return;
3497 }
sewardja3e98302005-02-01 15:55:05 +00003498//.. if (tyd == Ity_I64) {
3499//.. HReg vHi, vLo, rA;
3500//.. iselInt64Expr(&vHi, &vLo, env, stmt->Ist.STle.data);
3501//.. rA = iselIntExpr_R(env, stmt->Ist.STle.addr);
3502//.. addInstr(env, X86Instr_Alu32M(
3503//.. Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3504//.. addInstr(env, X86Instr_Alu32M(
3505//.. Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3506//.. return;
3507//.. }
sewardj0852a132005-02-21 08:28:46 +00003508 if (tyd == Ity_V128) {
3509 HReg r = iselVecExpr(env, stmt->Ist.STle.data);
sewardj18303862005-02-21 12:36:54 +00003510 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00003511 return;
3512 }
3513 break;
sewardj05b3b6a2005-02-04 01:44:33 +00003514 }
sewardjf67eadf2005-02-03 03:53:52 +00003515
3516 /* --------- PUT --------- */
3517 case Ist_Put: {
3518 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3519 if (ty == Ity_I64) {
3520 /* We're going to write to memory, so compute the RHS into an
3521 AMD64RI. */
3522 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3523 addInstr(env,
3524 AMD64Instr_Alu64M(
3525 Aalu_MOV,
3526 ri,
3527 AMD64AMode_IR(stmt->Ist.Put.offset,
3528 hregAMD64_RBP())
3529 ));
3530 return;
3531 }
sewardjf67eadf2005-02-03 03:53:52 +00003532 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
3533 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3534 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00003535 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00003536 r,
3537 AMD64AMode_IR(stmt->Ist.Put.offset,
3538 hregAMD64_RBP())));
3539 return;
3540 }
sewardj0852a132005-02-21 08:28:46 +00003541 if (ty == Ity_V128) {
3542 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3543 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
3544 hregAMD64_RBP());
sewardj18303862005-02-21 12:36:54 +00003545 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
sewardj0852a132005-02-21 08:28:46 +00003546 return;
3547 }
sewardj8d965312005-02-25 02:48:47 +00003548 if (ty == Ity_F32) {
3549 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3550 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
3551 set_SSE_rounding_default(env); /* paranoia */
3552 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
3553 return;
3554 }
sewardj1a01e652005-02-23 11:39:21 +00003555 if (ty == Ity_F64) {
3556 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3557 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
3558 hregAMD64_RBP() );
3559 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
3560 return;
3561 }
sewardjf67eadf2005-02-03 03:53:52 +00003562 break;
3563 }
3564
sewardj8d965312005-02-25 02:48:47 +00003565 /* --------- Indexed PUT --------- */
3566 case Ist_PutI: {
3567 AMD64AMode* am
3568 = genGuestArrayOffset(
3569 env, stmt->Ist.PutI.descr,
3570 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3571
3572 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3573 if (ty == Ity_F64) {
3574 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3575 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
3576 return;
3577 }
3578 if (ty == Ity_I8) {
3579 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3580 addInstr(env, AMD64Instr_Store( 1, r, am ));
3581 return;
3582 }
sewardj1e015d82005-04-23 23:41:46 +00003583 if (ty == Ity_I64) {
3584 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data);
3585 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
3586 return;
3587 }
sewardj8d965312005-02-25 02:48:47 +00003588 break;
3589 }
sewardj614b3fb2005-02-02 02:16:03 +00003590
3591 /* --------- TMP --------- */
3592 case Ist_Tmp: {
3593 IRTemp tmp = stmt->Ist.Tmp.tmp;
3594 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj9b967672005-02-08 11:13:09 +00003595 if (ty == Ity_I64 || ty == Ity_I32
3596 || ty == Ity_I16 || ty == Ity_I8) {
sewardj614b3fb2005-02-02 02:16:03 +00003597 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.Tmp.data);
3598 HReg dst = lookupIRTemp(env, tmp);
3599 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
3600 return;
3601 }
sewardj9b967672005-02-08 11:13:09 +00003602 if (ty == Ity_I128) {
3603 HReg rHi, rLo, dstHi, dstLo;
3604 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Tmp.data);
3605 lookupIRTemp128( &dstHi, &dstLo, env, tmp);
3606 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3607 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3608 return;
3609 }
sewardja5bd0af2005-03-24 20:40:12 +00003610 if (ty == Ity_I1) {
3611 AMD64CondCode cond = iselCondCode(env, stmt->Ist.Tmp.data);
3612 HReg dst = lookupIRTemp(env, tmp);
3613 addInstr(env, AMD64Instr_Set64(cond, dst));
3614 return;
3615 }
sewardj18303862005-02-21 12:36:54 +00003616 if (ty == Ity_F64) {
3617 HReg dst = lookupIRTemp(env, tmp);
3618 HReg src = iselDblExpr(env, stmt->Ist.Tmp.data);
3619 addInstr(env, mk_vMOVsd_RR(src, dst));
3620 return;
3621 }
sewardjc49ce232005-02-25 13:03:03 +00003622 if (ty == Ity_F32) {
3623 HReg dst = lookupIRTemp(env, tmp);
3624 HReg src = iselFltExpr(env, stmt->Ist.Tmp.data);
3625 addInstr(env, mk_vMOVsd_RR(src, dst));
3626 return;
3627 }
sewardj0852a132005-02-21 08:28:46 +00003628 if (ty == Ity_V128) {
3629 HReg dst = lookupIRTemp(env, tmp);
3630 HReg src = iselVecExpr(env, stmt->Ist.Tmp.data);
sewardj18303862005-02-21 12:36:54 +00003631 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00003632 return;
3633 }
sewardj614b3fb2005-02-02 02:16:03 +00003634 break;
3635 }
3636
sewardjd0a12df2005-02-10 02:07:43 +00003637 /* --------- Call to DIRTY helper --------- */
3638 case Ist_Dirty: {
3639 IRType retty;
3640 IRDirty* d = stmt->Ist.Dirty.details;
3641 Bool passBBP = False;
3642
3643 if (d->nFxState == 0)
3644 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00003645
3646 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
sewardjd0a12df2005-02-10 02:07:43 +00003647
3648 /* Marshal args, do the call, clear stack. */
3649 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3650
3651 /* Now figure out what to do with the returned value, if any. */
3652 if (d->tmp == IRTemp_INVALID)
3653 /* No return value. Nothing to do. */
3654 return;
3655
3656 retty = typeOfIRTemp(env->type_env, d->tmp);
sewardj478fe702005-04-23 01:15:47 +00003657 if (retty == Ity_I64 || retty == Ity_I32
3658 || retty == Ity_I16 || retty == Ity_I8) {
sewardjd0a12df2005-02-10 02:07:43 +00003659 /* The returned value is in %rax. Park it in the register
3660 associated with tmp. */
3661 HReg dst = lookupIRTemp(env, d->tmp);
3662 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
3663 return;
3664 }
3665 break;
3666 }
3667
3668 /* --------- MEM FENCE --------- */
3669 case Ist_MFence:
3670 addInstr(env, AMD64Instr_MFence());
3671 return;
sewardjf8c37f72005-02-07 18:55:29 +00003672
sewardjd20b2902005-03-22 00:15:00 +00003673 /* --------- INSTR MARK --------- */
3674 /* Doesn't generate any executable code ... */
3675 case Ist_IMark:
3676 return;
3677
3678 /* --------- NO-OP --------- */
3679 case Ist_NoOp:
3680 return;
3681
sewardjf8c37f72005-02-07 18:55:29 +00003682 /* --------- EXIT --------- */
3683 case Ist_Exit: {
3684 AMD64RI* dst;
3685 AMD64CondCode cc;
3686 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3687 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
3688 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
3689 cc = iselCondCode(env,stmt->Ist.Exit.guard);
3690 addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
3691 return;
3692 }
sewardjc33671d2005-02-01 20:30:00 +00003693
3694 default: break;
3695 }
3696 ppIRStmt(stmt);
3697 vpanic("iselStmt(amd64)");
3698}
3699
3700
3701/*---------------------------------------------------------*/
3702/*--- ISEL: Basic block terminators (Nexts) ---*/
3703/*---------------------------------------------------------*/
3704
3705static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
sewardjf67eadf2005-02-03 03:53:52 +00003706{
3707 AMD64RI* ri;
3708 if (vex_traceflags & VEX_TRACE_VCODE) {
3709 vex_printf("\n-- goto {");
3710 ppIRJumpKind(jk);
3711 vex_printf("} ");
3712 ppIRExpr(next);
3713 vex_printf("\n");
3714 }
3715 ri = iselIntExpr_RI(env, next);
3716 addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
sewardjc33671d2005-02-01 20:30:00 +00003717}
3718
3719
3720/*---------------------------------------------------------*/
3721/*--- Insn selector top-level ---*/
3722/*---------------------------------------------------------*/
3723
3724/* Translate an entire BB to amd64 code. */
3725
3726HInstrArray* iselBB_AMD64 ( IRBB* bb, VexSubArch subarch_host )
3727{
sewardj9a036bf2005-03-14 18:19:08 +00003728 Int i, j;
3729 HReg hreg, hregHI;
3730 ISelEnv* env;
sewardjc33671d2005-02-01 20:30:00 +00003731
3732 /* sanity ... */
3733 vassert(subarch_host == VexSubArch_NONE);
3734
3735 /* Make up an initial environment to use. */
sewardj9a036bf2005-03-14 18:19:08 +00003736 env = LibVEX_Alloc(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00003737 env->vreg_ctr = 0;
3738
3739 /* Set up output code array. */
3740 env->code = newHInstrArray();
3741
3742 /* Copy BB's type env. */
3743 env->type_env = bb->tyenv;
3744
3745 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
3746 change as we go along. */
3747 env->n_vregmap = bb->tyenv->types_used;
3748 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardj9b967672005-02-08 11:13:09 +00003749 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00003750
3751 /* and finally ... */
3752 env->subarch = subarch_host;
3753
3754 /* For each IR temporary, allocate a suitably-kinded virtual
3755 register. */
3756 j = 0;
3757 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00003758 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00003759 switch (bb->tyenv->types[i]) {
3760 case Ity_I1:
3761 case Ity_I8:
3762 case Ity_I16:
3763 case Ity_I32:
sewardj9b967672005-02-08 11:13:09 +00003764 case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break;
3765 case Ity_I128: hreg = mkHReg(j++, HRcInt64, True);
3766 hregHI = mkHReg(j++, HRcInt64, True); break;
sewardjc33671d2005-02-01 20:30:00 +00003767 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00003768 case Ity_F64:
sewardj9b967672005-02-08 11:13:09 +00003769 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardjc33671d2005-02-01 20:30:00 +00003770 default: ppIRType(bb->tyenv->types[i]);
3771 vpanic("iselBB(amd64): IRTemp type");
3772 }
3773 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00003774 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00003775 }
3776 env->vreg_ctr = j;
3777
3778 /* Ok, finally we can iterate over the statements. */
3779 for (i = 0; i < bb->stmts_used; i++)
3780 if (bb->stmts[i])
3781 iselStmt(env,bb->stmts[i]);
3782
3783 iselNext(env,bb->next,bb->jumpkind);
3784
3785 /* record the number of vregs we used. */
3786 env->code->n_vregs = env->vreg_ctr;
3787 return env->code;
3788}
sewardja3e98302005-02-01 15:55:05 +00003789
3790
3791/*---------------------------------------------------------------*/
3792/*--- end host-amd64/isel.c ---*/
3793/*---------------------------------------------------------------*/