blob: 01718ddec3c0ba68bc45c05bbfa8c1cf5ceae8d7 [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
3/*--- ---*/
4/*--- This file (host-amd64/isel.c) is ---*/
5/*--- Copyright (c) 2005 OpenWorks LLP. All rights reserved. ---*/
6/*--- ---*/
7/*---------------------------------------------------------------*/
8
9/*
10 This file is part of LibVEX, a library for dynamic binary
11 instrumentation and translation.
12
13 Copyright (C) 2004-2005 OpenWorks, LLP.
14
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; Version 2 dated June 1991 of the
18 license.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or liability
23 for damages. See the GNU General Public License for more details.
24
25 Neither the names of the U.S. Department of Energy nor the
26 University of California nor the names of its contributors may be
27 used to endorse or promote products derived from this software
28 without prior written permission.
29
30 You should have received a copy of the GNU General Public License
31 along with this program; if not, write to the Free Software
32 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
33 USA.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39
sewardj05b3b6a2005-02-04 01:44:33 +000040#include "ir/irmatch.h"
sewardjc33671d2005-02-01 20:30:00 +000041#include "main/vex_util.h"
42#include "main/vex_globals.h"
43#include "host-generic/h_generic_regs.h"
sewardja3e98302005-02-01 15:55:05 +000044//.. #include "host-generic/h_generic_simd64.h"
sewardjc33671d2005-02-01 20:30:00 +000045#include "host-amd64/hdefs.h"
sewardj1a01e652005-02-23 11:39:21 +000046
47
48/*---------------------------------------------------------*/
49/*--- x87/SSE control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, precision = 53 bits. This
54 corresponds to a FPU control word value of 0x027F.
55
56 Similarly the SSE control word (%mxcsr) should be 0x1F80.
57
58 %fpucw and %mxcsr should have these values on entry to
59 Vex-generated code, and should those values should be
60 unchanged at exit.
61*/
62
63#define DEFAULT_FPUCW 0x027F
64
65#define DEFAULT_MXCSR 0x1F80
66
67/* debugging only, do not use */
68/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000069
70
71/*---------------------------------------------------------*/
72/*--- misc helpers ---*/
73/*---------------------------------------------------------*/
74
75/* These are duplicated in guest-amd64/toIR.c */
76static IRExpr* unop ( IROp op, IRExpr* a )
77{
78 return IRExpr_Unop(op, a);
79}
80
81static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
82{
83 return IRExpr_Binop(op, a1, a2);
84}
85
sewardja3e98302005-02-01 15:55:05 +000086//.. static IRExpr* mkU64 ( ULong i )
87//.. {
88//.. return IRExpr_Const(IRConst_U64(i));
89//.. }
90//..
91//.. static IRExpr* mkU32 ( UInt i )
92//.. {
93//.. return IRExpr_Const(IRConst_U32(i));
94//.. }
sewardj05b3b6a2005-02-04 01:44:33 +000095
96static IRExpr* bind ( Int binder )
97{
98 return IRExpr_Binder(binder);
99}
sewardjc33671d2005-02-01 20:30:00 +0000100
101
sewardjc33671d2005-02-01 20:30:00 +0000102/*---------------------------------------------------------*/
103/*--- ISelEnv ---*/
104/*---------------------------------------------------------*/
105
106/* This carries around:
107
108 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
109 might encounter. This is computed before insn selection starts,
110 and does not change.
111
112 - A mapping from IRTemp to HReg. This tells the insn selector
113 which virtual register is associated with each IRTemp
114 temporary. This is computed before insn selection starts, and
115 does not change. We expect this mapping to map precisely the
116 same set of IRTemps as the type mapping does.
117
sewardj9b967672005-02-08 11:13:09 +0000118 - vregmap holds the primary register for the IRTemp.
119 - vregmapHI is only used for 128-bit integer-typed
120 IRTemps. It holds the identity of a second
121 64-bit virtual HReg, which holds the high half
122 of the value.
123
sewardjc33671d2005-02-01 20:30:00 +0000124 - The code array, that is, the insns selected so far.
125
126 - A counter, for generating new virtual registers.
127
128 - The host subarchitecture we are selecting insns for.
129 This is set at the start and does not change.
130
131 Note, this is all host-independent. (JRS 20050201: well, kinda
132 ... not completely. Compare with ISelEnv for X86.)
133*/
134
135typedef
136 struct {
137 IRTypeEnv* type_env;
138
139 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000140 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000141 Int n_vregmap;
142
143 HInstrArray* code;
144
145 Int vreg_ctr;
146
147 VexSubArch subarch;
148 }
149 ISelEnv;
150
151
152static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
153{
154 vassert(tmp >= 0);
155 vassert(tmp < env->n_vregmap);
156 return env->vregmap[tmp];
157}
158
sewardj9b967672005-02-08 11:13:09 +0000159static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
160 ISelEnv* env, IRTemp tmp )
161{
162 vassert(tmp >= 0);
163 vassert(tmp < env->n_vregmap);
164 vassert(env->vregmapHI[tmp] != INVALID_HREG);
165 *vrLO = env->vregmap[tmp];
166 *vrHI = env->vregmapHI[tmp];
167}
sewardj614b3fb2005-02-02 02:16:03 +0000168
169static void addInstr ( ISelEnv* env, AMD64Instr* instr )
170{
171 addHInstr(env->code, instr);
172 if (vex_traceflags & VEX_TRACE_VCODE) {
173 ppAMD64Instr(instr);
174 vex_printf("\n");
175 }
176}
177
sewardj8258a8c2005-02-02 03:11:24 +0000178static HReg newVRegI ( ISelEnv* env )
179{
180 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
181 env->vreg_ctr++;
182 return reg;
183}
184
sewardja3e98302005-02-01 15:55:05 +0000185//.. static HReg newVRegF ( ISelEnv* env )
186//.. {
187//.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
188//.. env->vreg_ctr++;
189//.. return reg;
190//.. }
sewardj0852a132005-02-21 08:28:46 +0000191
192static HReg newVRegV ( ISelEnv* env )
193{
194 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
195 env->vreg_ctr++;
196 return reg;
197}
sewardj614b3fb2005-02-02 02:16:03 +0000198
199
200/*---------------------------------------------------------*/
201/*--- ISEL: Forward declarations ---*/
202/*---------------------------------------------------------*/
203
204/* These are organised as iselXXX and iselXXX_wrk pairs. The
205 iselXXX_wrk do the real work, but are not to be called directly.
206 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
207 checks that all returned registers are virtual. You should not
208 call the _wrk version directly.
209*/
210static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
211static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
212
213static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
214static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
215
216static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
217static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
218
219static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
220static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
221
222static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
223static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
224
sewardj9b967672005-02-08 11:13:09 +0000225static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
226 ISelEnv* env, IRExpr* e );
227static void iselInt128Expr ( HReg* rHi, HReg* rLo,
228 ISelEnv* env, IRExpr* e );
229
sewardj614b3fb2005-02-02 02:16:03 +0000230static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
231static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
232
sewardj18303862005-02-21 12:36:54 +0000233static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
234static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000235
sewardj8d965312005-02-25 02:48:47 +0000236static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
237static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000238
sewardj0852a132005-02-21 08:28:46 +0000239static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
240static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000241
242
243/*---------------------------------------------------------*/
244/*--- ISEL: Misc helpers ---*/
245/*---------------------------------------------------------*/
246
247static Bool sane_AMode ( AMD64AMode* am )
248{
249 switch (am->tag) {
250 case Aam_IR:
251 return hregClass(am->Aam.IR.reg) == HRcInt64
252 && (hregIsVirtual(am->Aam.IR.reg)
253 || am->Aam.IR.reg == hregAMD64_RBP());
254 case Aam_IRRS:
255 return hregClass(am->Aam.IRRS.base) == HRcInt64
256 && hregIsVirtual(am->Aam.IRRS.base)
257 && hregClass(am->Aam.IRRS.index) == HRcInt64
258 && hregIsVirtual(am->Aam.IRRS.index);
259 default:
260 vpanic("sane_AMode: unknown amd64 amode tag");
261 }
262}
263
264
265/* Can the lower 32 bits be signedly widened to produce the whole
266 64-bit value? In other words, are the top 33 bits either all 0 or
267 all 1 ? */
268static Bool fitsIn32Bits ( ULong x )
269{
270 Long y0 = (Long)x;
271 Long y1 = y0;
272 y1 <<= 32;
273 y1 >>=/*s*/ 32;
274 return toBool(x == y1);
275}
276
sewardja3e98302005-02-01 15:55:05 +0000277//.. /* Is this a 32-bit zero expression? */
278//..
279//.. static Bool isZero32 ( IRExpr* e )
280//.. {
281//.. return e->tag == Iex_Const
282//.. && e->Iex.Const.con->tag == Ico_U32
283//.. && e->Iex.Const.con->Ico.U32 == 0;
284//.. }
sewardj8258a8c2005-02-02 03:11:24 +0000285
286/* Make a int reg-reg move. */
287
288static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
289{
290 vassert(hregClass(src) == HRcInt64);
291 vassert(hregClass(dst) == HRcInt64);
292 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
293}
294
sewardj0852a132005-02-21 08:28:46 +0000295/* Make a vector reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000296
sewardj0852a132005-02-21 08:28:46 +0000297static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
298{
299 vassert(hregClass(src) == HRcVec128);
300 vassert(hregClass(dst) == HRcVec128);
301 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
302}
303
304/* Advance/retreat %rsp by n. */
305
306static void add_to_rsp ( ISelEnv* env, Int n )
307{
308 vassert(n > 0 && n < 256 && (n%8) == 0);
309 addInstr(env,
310 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
311 hregAMD64_RSP()));
312}
313
sewardj18303862005-02-21 12:36:54 +0000314static void sub_from_rsp ( ISelEnv* env, Int n )
315{
316 vassert(n > 0 && n < 256 && (n%8) == 0);
317 addInstr(env,
318 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
319 hregAMD64_RSP()));
320}
321
322
sewardja3e98302005-02-01 15:55:05 +0000323//.. /* Given an amode, return one which references 4 bytes further
324//.. along. */
325//..
326//.. static X86AMode* advance4 ( X86AMode* am )
327//.. {
328//.. X86AMode* am4 = dopyX86AMode(am);
329//.. switch (am4->tag) {
330//.. case Xam_IRRS:
331//.. am4->Xam.IRRS.imm += 4; break;
332//.. case Xam_IR:
333//.. am4->Xam.IR.imm += 4; break;
334//.. default:
335//.. vpanic("advance4(x86,host)");
336//.. }
337//.. return am4;
338//.. }
339//..
340//..
341//.. /* Push an arg onto the host stack, in preparation for a call to a
342//.. helper function of some kind. Returns the number of 32-bit words
343//.. pushed. */
344//..
345//.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
346//.. {
347//.. IRType arg_ty = typeOfIRExpr(env->type_env, arg);
348//.. if (arg_ty == Ity_I32) {
349//.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
350//.. return 1;
351//.. } else
352//.. if (arg_ty == Ity_I64) {
353//.. HReg rHi, rLo;
354//.. iselInt64Expr(&rHi, &rLo, env, arg);
355//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
356//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
357//.. return 2;
358//.. }
359//.. ppIRExpr(arg);
360//.. vpanic("pushArg(x86): can't handle arg of this type");
361//.. }
sewardj05b3b6a2005-02-04 01:44:33 +0000362
363
364/* Used only in doHelperCall. See big comment in doHelperCall re
365 handling of register-parameter args. This function figures out
366 whether evaluation of an expression might require use of a fixed
367 register. If in doubt return True (safe but suboptimal).
368*/
369static
370Bool mightRequireFixedRegs ( IRExpr* e )
371{
372 switch (e->tag) {
373 case Iex_Tmp: case Iex_Const: case Iex_Get:
374 return False;
375 default:
376 return True;
377 }
378}
379
380
381/* Do a complete function call. guard is a Ity_Bit expression
382 indicating whether or not the call happens. If guard==NULL, the
383 call is unconditional. */
384
385static
386void doHelperCall ( ISelEnv* env,
387 Bool passBBP,
388 IRExpr* guard, IRCallee* cee, IRExpr** args )
389{
390 AMD64CondCode cc;
391 HReg argregs[6];
392 HReg tmpregs[6];
393 Bool go_fast;
394 Int n_args, i, argreg;
395
396 /* Marshal args for a call and do the call.
397
398 If passBBP is True, %rbp (the baseblock pointer) is to be passed
399 as the first arg.
400
401 This function only deals with a tiny set of possibilities, which
402 cover all helpers in practice. The restrictions are that only
403 arguments in registers are supported, hence only 6x64 integer
404 bits in total can be passed. In fact the only supported arg
405 type is I64.
406
407 Generating code which is both efficient and correct when
408 parameters are to be passed in registers is difficult, for the
409 reasons elaborated in detail in comments attached to
410 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
411 of the method described in those comments.
412
413 The problem is split into two cases: the fast scheme and the
414 slow scheme. In the fast scheme, arguments are computed
415 directly into the target (real) registers. This is only safe
416 when we can be sure that computation of each argument will not
417 trash any real registers set by computation of any other
418 argument.
419
420 In the slow scheme, all args are first computed into vregs, and
421 once they are all done, they are moved to the relevant real
422 regs. This always gives correct code, but it also gives a bunch
423 of vreg-to-rreg moves which are usually redundant but are hard
424 for the register allocator to get rid of.
425
426 To decide which scheme to use, all argument expressions are
427 first examined. If they are all so simple that it is clear they
428 will be evaluated without use of any fixed registers, use the
429 fast scheme, else use the slow scheme. Note also that only
430 unconditional calls may use the fast scheme, since having to
431 compute a condition expression could itself trash real
432 registers.
433
434 Note this requires being able to examine an expression and
435 determine whether or not evaluation of it might use a fixed
436 register. That requires knowledge of how the rest of this insn
437 selector works. Currently just the following 3 are regarded as
438 safe -- hopefully they cover the majority of arguments in
439 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
440 */
441
442 /* Note that the cee->regparms field is meaningless on AMD64 host
443 (since there is only one calling convention) and so we always
444 ignore it. */
445
446 n_args = 0;
447 for (i = 0; args[i]; i++)
448 n_args++;
449
450 if (6 < n_args + (passBBP ? 1 : 0))
451 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
452
453 argregs[0] = hregAMD64_RDI();
454 argregs[1] = hregAMD64_RSI();
455 argregs[2] = hregAMD64_RDX();
456 argregs[3] = hregAMD64_RCX();
457 argregs[4] = hregAMD64_R8();
458 argregs[5] = hregAMD64_R9();
459
460 tmpregs[0] = tmpregs[1] = tmpregs[2] =
461 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
462
463 /* First decide which scheme (slow or fast) is to be used. First
464 assume the fast scheme, and select slow if any contraindications
465 (wow) appear. */
466
467 go_fast = True;
468
469 if (guard) {
470 if (guard->tag == Iex_Const
471 && guard->Iex.Const.con->tag == Ico_U1
472 && guard->Iex.Const.con->Ico.U1 == True) {
473 /* unconditional */
474 } else {
475 /* Not manifestly unconditional -- be conservative. */
476 go_fast = False;
477 }
478 }
479
480 if (go_fast) {
481 for (i = 0; i < n_args; i++) {
482 if (mightRequireFixedRegs(args[i])) {
483 go_fast = False;
484 break;
485 }
486 }
487 }
488
489 /* At this point the scheme to use has been established. Generate
490 code to get the arg values into the argument rregs. */
491
492 if (go_fast) {
493
494 /* FAST SCHEME */
495 argreg = 0;
496 if (passBBP) {
497 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]));
498 argreg++;
499 }
500
501 for (i = 0; i < n_args; i++) {
502 vassert(argreg < 6);
503 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
504 addInstr(env, AMD64Instr_Alu64R(
505 Aalu_MOV,
506 iselIntExpr_RMI(env, args[i]),
507 argregs[argreg]
508 )
509 );
510 argreg++;
511 }
512
513 /* Fast scheme only applies for unconditional calls. Hence: */
514 cc = Acc_ALWAYS;
515
516 } else {
517
518 /* SLOW SCHEME; move via temporaries */
519 argreg = 0;
520
521 if (passBBP) {
522 /* This is pretty stupid; better to move directly to rdi
523 after the rest of the args are done. */
524 tmpregs[argreg] = newVRegI(env);
525 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
526 argreg++;
527 }
528
529 for (i = 0; i < n_args; i++) {
530 vassert(argreg < 6);
531 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
532 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
533 argreg++;
534 }
535
536 /* Now we can compute the condition. We can't do it earlier
537 because the argument computations could trash the condition
538 codes. Be a bit clever to handle the common case where the
539 guard is 1:Bit. */
540 cc = Acc_ALWAYS;
541 if (guard) {
542 if (guard->tag == Iex_Const
543 && guard->Iex.Const.con->tag == Ico_U1
544 && guard->Iex.Const.con->Ico.U1 == True) {
545 /* unconditional -- do nothing */
546 } else {
547 cc = iselCondCode( env, guard );
548 }
549 }
550
551 /* Move the args to their final destinations. */
552 for (i = 0; i < argreg; i++) {
553 /* None of these insns, including any spill code that might
554 be generated, may alter the condition codes. */
555 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
556 }
557
558 }
559
560 /* Finally, the call itself. */
561 addInstr(env, AMD64Instr_Call(
562 cc,
sewardjf3992bd2005-02-07 00:20:43 +0000563 Ptr_to_ULong(cee->addr),
sewardj05b3b6a2005-02-04 01:44:33 +0000564 n_args + (passBBP ? 1 : 0)
565 )
566 );
567}
568
569
sewardj8d965312005-02-25 02:48:47 +0000570/* Given a guest-state array descriptor, an index expression and a
571 bias, generate an AMD64AMode holding the relevant guest state
572 offset. */
573
574static
575AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRArray* descr,
576 IRExpr* off, Int bias )
577{
578 HReg tmp, roff;
579 Int elemSz = sizeofIRType(descr->elemTy);
580 Int nElems = descr->nElems;
581
582 /* Throw out any cases not generated by an amd64 front end. In
583 theory there might be a day where we need to handle them -- if
584 we ever run non-amd64-guest on amd64 host. */
585
586 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
587 vpanic("genGuestArrayOffset(amd64 host)");
588
589 /* Compute off into a reg, %off. Then return:
590
591 movq %off, %tmp
592 addq $bias, %tmp (if bias != 0)
593 andq %tmp, 7
594 ... base(%rbp, %tmp, shift) ...
595 */
596 tmp = newVRegI(env);
597 roff = iselIntExpr_R(env, off);
598 addInstr(env, mk_iMOVsd_RR(roff, tmp));
599 if (bias != 0) {
600 /* Make sure the bias is sane, in the sense that there are
601 no significant bits above bit 30 in it. */
602 vassert(-10000 < bias && bias < 10000);
603 addInstr(env,
604 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
605 }
606 addInstr(env,
607 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
608 vassert(elemSz == 1 || elemSz == 8);
609 return
610 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
611 elemSz==8 ? 3 : 0);
612}
613
sewardj1a01e652005-02-23 11:39:21 +0000614
615/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
616static
617void set_SSE_rounding_default ( ISelEnv* env )
618{
619 /* pushq $DEFAULT_MXCSR
620 ldmxcsr 0(%rsp)
621 addq $8, %rsp
622 */
623 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
624 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
625 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
626 add_to_rsp(env, 8);
627}
628
sewardja3e98302005-02-01 15:55:05 +0000629//.. /* Mess with the FPU's rounding mode: set to the default rounding mode
630//.. (DEFAULT_FPUCW). */
631//.. static
632//.. void set_FPU_rounding_default ( ISelEnv* env )
633//.. {
634//.. /* pushl $DEFAULT_FPUCW
635//.. fldcw 0(%esp)
636//.. addl $4, %esp
637//.. */
638//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
639//.. addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
640//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
641//.. add_to_esp(env, 4);
642//.. }
sewardj1a01e652005-02-23 11:39:21 +0000643
644
645/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
646 expression denoting a value in the range 0 .. 3, indicating a round
647 mode encoded as per type IRRoundingMode. Set the SSE machinery to
648 have the same rounding.
649*/
650static
651void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
652{
653 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
654 both rounding bits == 0. If that wasn't the case, we couldn't
655 create a new rounding field simply by ORing the new value into
656 place. */
657
658 /* movq $3, %reg
659 andq [[mode]], %reg -- shouldn't be needed; paranoia
660 shlq $13, %reg
661 orq $DEFAULT_MXCSR, %reg
662 pushq %reg
663 ldmxcsr 0(%esp)
664 addq $8, %rsp
665 */
666 HReg reg = newVRegI(env);
667 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
668 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
669 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
670 iselIntExpr_RMI(env, mode), reg));
671 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, AMD64RM_Reg(reg)));
672 addInstr(env, AMD64Instr_Alu64R(
673 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
674 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
675 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
676 add_to_rsp(env, 8);
677}
678
679
sewardja3e98302005-02-01 15:55:05 +0000680//.. /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
681//.. expression denoting a value in the range 0 .. 3, indicating a round
682//.. mode encoded as per type IRRoundingMode. Set the x87 FPU to have
683//.. the same rounding.
684//.. */
685//.. static
686//.. void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
687//.. {
688//.. HReg rrm = iselIntExpr_R(env, mode);
689//.. HReg rrm2 = newVRegI(env);
690//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
691//..
692//.. /* movl %rrm, %rrm2
693//.. andl $3, %rrm2 -- shouldn't be needed; paranoia
694//.. shll $10, %rrm2
695//.. orl $DEFAULT_FPUCW, %rrm2
696//.. pushl %rrm2
697//.. fldcw 0(%esp)
698//.. addl $4, %esp
699//.. */
700//.. addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
701//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
702//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, X86RM_Reg(rrm2)));
703//.. addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
704//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
705//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
706//.. add_to_esp(env, 4);
707//.. }
sewardj8d965312005-02-25 02:48:47 +0000708
709
710/* Generate !src into a new vector register, and be sure that the code
711 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
712 way to do this.
713*/
714static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
715{
716 HReg dst = newVRegV(env);
717 /* Set dst to zero. Not strictly necessary, but the idea of doing
718 a FP comparison on whatever junk happens to be floating around
719 in it is just too scary. */
720 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
721 /* And now make it all 1s ... */
722 addInstr(env, AMD64Instr_Sse32Fx4(Asse_CMPEQF, dst, dst));
723 /* Finally, xor 'src' into it. */
724 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
725 return dst;
726}
727
728
sewardja3e98302005-02-01 15:55:05 +0000729//.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
730//.. after most non-simple FPU operations (simple = +, -, *, / and
731//.. sqrt).
732//..
733//.. This could be done a lot more efficiently if needed, by loading
734//.. zero and adding it to the value to be rounded (fldz ; faddp?).
735//.. */
736//.. static void roundToF64 ( ISelEnv* env, HReg reg )
737//.. {
738//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
739//.. sub_from_esp(env, 8);
740//.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
741//.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
742//.. add_to_esp(env, 8);
743//.. }
sewardj8258a8c2005-02-02 03:11:24 +0000744
745
746/*---------------------------------------------------------*/
747/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
748/*---------------------------------------------------------*/
749
750/* Select insns for an integer-typed expression, and add them to the
751 code list. Return a reg holding the result. This reg will be a
752 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
753 want to modify it, ask for a new vreg, copy it in there, and modify
754 the copy. The register allocator will do its best to map both
755 vregs to the same real register, so the copies will often disappear
756 later in the game.
757
758 This should handle expressions of 64, 32, 16 and 8-bit type. All
759 results are returned in a 64-bit register. For 32-, 16- and 8-bit
760 expressions, the upper 32/16/24 bits are arbitrary, so you should
761 mask or sign extend partial values if necessary.
762*/
763
764static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
765{
766 HReg r = iselIntExpr_R_wrk(env, e);
767 /* sanity checks ... */
768# if 0
769 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
770# endif
771 vassert(hregClass(r) == HRcInt64);
772 vassert(hregIsVirtual(r));
773 return r;
774}
775
776/* DO NOT CALL THIS DIRECTLY ! */
777static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
778{
sewardj05b3b6a2005-02-04 01:44:33 +0000779 MatchInfo mi;
sewardj7f039c42005-02-04 21:13:55 +0000780 DECLARE_PATTERN(p_8Uto64);
sewardj05b3b6a2005-02-04 01:44:33 +0000781 DECLARE_PATTERN(p_16Uto64);
782 DECLARE_PATTERN(p_1Uto8_32to1_64to32);
sewardja3e98302005-02-01 15:55:05 +0000783//.. DECLARE_PATTERN(p_32to1_then_1Uto8);
sewardj8258a8c2005-02-02 03:11:24 +0000784
785 IRType ty = typeOfIRExpr(env->type_env,e);
786 vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
787
788 switch (e->tag) {
789
790 /* --------- TEMP --------- */
791 case Iex_Tmp: {
792 return lookupIRTemp(env, e->Iex.Tmp.tmp);
793 }
794
795 /* --------- LOAD --------- */
796 case Iex_LDle: {
797 HReg dst = newVRegI(env);
798 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.LDle.addr );
sewardjf67eadf2005-02-03 03:53:52 +0000799 if (ty == Ity_I64) {
800 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
801 AMD64RMI_Mem(amode), dst) );
802 return dst;
803 }
sewardj8258a8c2005-02-02 03:11:24 +0000804 if (ty == Ity_I32) {
805 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
806 return dst;
807 }
sewardj05b3b6a2005-02-04 01:44:33 +0000808 if (ty == Ity_I16) {
809 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
810 return dst;
811 }
sewardj7f039c42005-02-04 21:13:55 +0000812 if (ty == Ity_I8) {
813 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
814 return dst;
815 }
sewardj8258a8c2005-02-02 03:11:24 +0000816 break;
817 }
818
819 /* --------- BINARY OP --------- */
820 case Iex_Binop: {
821 AMD64AluOp aluOp;
822 AMD64ShiftOp shOp;
sewardja3e98302005-02-01 15:55:05 +0000823//..
824//.. /* Pattern: Sub32(0,x) */
825//.. if (e->Iex.Binop.op == Iop_Sub32 && isZero32(e->Iex.Binop.arg1)) {
826//.. HReg dst = newVRegI(env);
827//.. HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
828//.. addInstr(env, mk_iMOVsd_RR(reg,dst));
829//.. addInstr(env, X86Instr_Unary32(Xun_NEG,X86RM_Reg(dst)));
830//.. return dst;
831//.. }
832//..
sewardj8258a8c2005-02-02 03:11:24 +0000833 /* Is it an addition or logical style op? */
834 switch (e->Iex.Binop.op) {
835 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
836 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000837 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
838 aluOp = Aalu_SUB; break;
839 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
840 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000841 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000842 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000843 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
844 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000845 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000846 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000847 default:
848 aluOp = Aalu_INVALID; break;
849 }
850 /* For commutative ops we assume any literal
851 values are on the second operand. */
852 if (aluOp != Aalu_INVALID) {
853 HReg dst = newVRegI(env);
854 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
855 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
856 addInstr(env, mk_iMOVsd_RR(reg,dst));
857 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
858 return dst;
859 }
860
861 /* Perhaps a shift op? */
862 switch (e->Iex.Binop.op) {
863 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
864 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +0000865 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
866 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000867 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
868 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +0000869 default:
870 shOp = Ash_INVALID; break;
871 }
872 if (shOp != Ash_INVALID) {
873 HReg dst = newVRegI(env);
874
875 /* regL = the value to be shifted */
876 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
877 addInstr(env, mk_iMOVsd_RR(regL,dst));
878
879 /* Do any necessary widening for 32/16/8 bit operands */
880 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +0000881 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
882 break;
sewardj85520e42005-02-19 15:22:38 +0000883 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +0000884 break;
sewardj85520e42005-02-19 15:22:38 +0000885 case Iop_Shr8:
886 addInstr(env, AMD64Instr_Alu64R(
887 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
888 break;
889 case Iop_Shr16:
890 addInstr(env, AMD64Instr_Alu64R(
891 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
892 break;
sewardjb095fba2005-02-13 14:13:04 +0000893 case Iop_Shr32:
sewardj909c06d2005-02-19 22:47:41 +0000894 addInstr(env, AMD64Instr_MovZLQ(dst,dst));
sewardjb095fba2005-02-13 14:13:04 +0000895 break;
sewardja3e98302005-02-01 15:55:05 +0000896//.. case Iop_Sar8:
897//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, X86RM_Reg(dst)));
898//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, X86RM_Reg(dst)));
899//.. break;
900//.. case Iop_Sar16:
901//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, X86RM_Reg(dst)));
902//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, X86RM_Reg(dst)));
903//.. break;
sewardj05b3b6a2005-02-04 01:44:33 +0000904 case Iop_Sar32:
905 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, AMD64RM_Reg(dst)));
906 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 32, AMD64RM_Reg(dst)));
907 break;
908 default:
sewardj909c06d2005-02-19 22:47:41 +0000909 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +0000910 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +0000911 }
912
913 /* Now consider the shift amount. If it's a literal, we
914 can do a much better job than the general case. */
915 if (e->Iex.Binop.arg2->tag == Iex_Const) {
916 /* assert that the IR is well-typed */
917 Int nshift;
918 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
919 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
920 vassert(nshift >= 0);
921 if (nshift > 0)
922 /* Can't allow nshift==0 since that means %cl */
923 addInstr(env, AMD64Instr_Sh64(
924 shOp,
925 nshift,
926 AMD64RM_Reg(dst)));
927 } else {
928 /* General case; we have to force the amount into %cl. */
929 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
930 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
931 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, AMD64RM_Reg(dst)));
932 }
933 return dst;
934 }
935
sewardj7de0d3c2005-02-13 02:26:41 +0000936 /* Handle misc other ops. */
937
938 if (e->Iex.Binop.op == Iop_DivModS64to32
939 || e->Iex.Binop.op == Iop_DivModU64to32) {
940 /* 64 x 32 -> (32(rem),32(div)) division */
941 /* Get the 64-bit operand into edx:eax, and the other into
942 any old R/M. */
943 HReg rax = hregAMD64_RAX();
944 HReg rdx = hregAMD64_RDX();
945 HReg dst = newVRegI(env);
946 Bool syned = e->Iex.Binop.op == Iop_DivModS64to32;
947 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +0000948 /* Compute the left operand into a reg, and then
949 put the top half in edx and the bottom in eax. */
950 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +0000951 addInstr(env, mk_iMOVsd_RR(left64, rdx));
952 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj909c06d2005-02-19 22:47:41 +0000953 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, AMD64RM_Reg(rdx)));
sewardj7de0d3c2005-02-13 02:26:41 +0000954 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardj909c06d2005-02-19 22:47:41 +0000955 addInstr(env, AMD64Instr_MovZLQ(rdx,rdx));
956 addInstr(env, AMD64Instr_MovZLQ(rax,rax));
sewardj7de0d3c2005-02-13 02:26:41 +0000957 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, AMD64RM_Reg(rdx)));
958 addInstr(env, mk_iMOVsd_RR(rax, dst));
959 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
960 return dst;
961 }
962
963 if (e->Iex.Binop.op == Iop_32HLto64) {
964 HReg hi32 = newVRegI(env);
965 HReg lo32 = newVRegI(env);
966 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
967 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
968 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
969 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
970 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, AMD64RM_Reg(hi32)));
sewardj909c06d2005-02-19 22:47:41 +0000971 addInstr(env, AMD64Instr_MovZLQ(lo32,lo32));
sewardj7de0d3c2005-02-13 02:26:41 +0000972 addInstr(env, AMD64Instr_Alu64R(
973 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
974 return hi32;
975 }
976
sewardj85520e42005-02-19 15:22:38 +0000977 if (e->Iex.Binop.op == Iop_16HLto32) {
978 HReg hi16 = newVRegI(env);
979 HReg lo16 = newVRegI(env);
980 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
981 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
982 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
983 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
984 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, AMD64RM_Reg(hi16)));
985 addInstr(env, AMD64Instr_Alu64R(
986 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
987 addInstr(env, AMD64Instr_Alu64R(
988 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
989 return hi16;
990 }
sewardj7de0d3c2005-02-13 02:26:41 +0000991
sewardja3e98302005-02-01 15:55:05 +0000992//.. if (e->Iex.Binop.op == Iop_8HLto16) {
993//.. HReg hi8 = newVRegI(env);
994//.. HReg lo8 = newVRegI(env);
995//.. HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
996//.. HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
997//.. addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
998//.. addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
999//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, X86RM_Reg(hi8)));
1000//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
1001//.. addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
1002//.. return hi8;
1003//.. }
1004//..
1005//.. if (e->Iex.Binop.op == Iop_16HLto32) {
1006//.. HReg hi16 = newVRegI(env);
1007//.. HReg lo16 = newVRegI(env);
1008//.. HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1009//.. HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1010//.. addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1011//.. addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1012//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, X86RM_Reg(hi16)));
1013//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
1014//.. addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
1015//.. return hi16;
1016//.. }
sewardj85520e42005-02-19 15:22:38 +00001017
1018 if (e->Iex.Binop.op == Iop_MullS32
1019 || e->Iex.Binop.op == Iop_MullS16
1020 || e->Iex.Binop.op == Iop_MullS8
1021 || e->Iex.Binop.op == Iop_MullU32
1022 || e->Iex.Binop.op == Iop_MullU16
1023 || e->Iex.Binop.op == Iop_MullU8) {
1024 HReg a32 = newVRegI(env);
1025 HReg b32 = newVRegI(env);
1026 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1027 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1028 Int shift = 0;
1029 AMD64ShiftOp shr_op = Ash_SHR;
1030 switch (e->Iex.Binop.op) {
1031 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1032 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1033 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1034 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1035 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1036 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1037 default: vassert(0);
1038 }
1039
1040 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1041 addInstr(env, mk_iMOVsd_RR(b32s, b32));
1042 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, AMD64RM_Reg(a32)));
1043 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, AMD64RM_Reg(b32)));
1044 addInstr(env, AMD64Instr_Sh64(shr_op, shift, AMD64RM_Reg(a32)));
1045 addInstr(env, AMD64Instr_Sh64(shr_op, shift, AMD64RM_Reg(b32)));
1046 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1047 return b32;
1048 }
1049
sewardj18303862005-02-21 12:36:54 +00001050 if (e->Iex.Binop.op == Iop_CmpF64) {
1051 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1052 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1053 HReg dst = newVRegI(env);
1054 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1055 /* Mask out irrelevant parts of the result so as to conform
1056 to the CmpF64 definition. */
1057 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1058 return dst;
1059 }
1060
sewardj1a01e652005-02-23 11:39:21 +00001061 if (e->Iex.Binop.op == Iop_F64toI32) {
1062 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1063 HReg dst = newVRegI(env);
1064 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
1065 addInstr(env, AMD64Instr_SseSF2SI( 8, 4, rf, dst ));
1066 set_SSE_rounding_default(env);
1067 return dst;
1068 }
1069
sewardja3e98302005-02-01 15:55:05 +00001070//.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
1071//.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
1072//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1073//.. HReg dst = newVRegI(env);
1074//..
1075//.. /* Used several times ... */
1076//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1077//..
sewardj7de0d3c2005-02-13 02:26:41 +00001078//.. /* rf now holds the value to be converted, and rrm holds the
sewardja3e98302005-02-01 15:55:05 +00001079//.. rounding mode value, encoded as per the IRRoundingMode
1080//.. enum. The first thing to do is set the FPU's rounding
1081//.. mode accordingly. */
1082//..
1083//.. /* Create a space for the format conversion. */
1084//.. /* subl $4, %esp */
1085//.. sub_from_esp(env, 4);
1086//..
1087//.. /* Set host rounding mode */
1088//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1089//..
1090//.. /* gistw/l %rf, 0(%esp) */
1091//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
1092//..
1093//.. if (sz == 2) {
1094//.. /* movzwl 0(%esp), %dst */
1095//.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1096//.. } else {
1097//.. /* movl 0(%esp), %dst */
1098//.. vassert(sz == 4);
1099//.. addInstr(env, X86Instr_Alu32R(
1100//.. Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1101//.. }
1102//..
1103//.. /* Restore default FPU rounding. */
1104//.. set_FPU_rounding_default( env );
1105//..
1106//.. /* addl $4, %esp */
1107//.. add_to_esp(env, 4);
1108//.. return dst;
1109//.. }
1110//..
1111//.. /* C3210 flags following FPU partial remainder (fprem), both
1112//.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1113//.. if (e->Iex.Binop.op == Iop_PRemC3210F64
1114//.. || e->Iex.Binop.op == Iop_PRem1C3210F64) {
1115//.. HReg junk = newVRegF(env);
1116//.. HReg dst = newVRegI(env);
1117//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1118//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1119//.. addInstr(env, X86Instr_FpBinary(
1120//.. e->Iex.Binop.op==Iop_PRemC3210F64
1121//.. ? Xfp_PREM : Xfp_PREM1,
1122//.. srcL,srcR,junk
1123//.. ));
1124//.. /* The previous pseudo-insn will have left the FPU's C3210
1125//.. flags set correctly. So bag them. */
1126//.. addInstr(env, X86Instr_FpStSW_AX());
1127//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1128//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1129//.. return dst;
1130//.. }
sewardj8258a8c2005-02-02 03:11:24 +00001131
1132 break;
1133 }
1134
sewardjf67eadf2005-02-03 03:53:52 +00001135 /* --------- UNARY OP --------- */
1136 case Iex_Unop: {
sewardj05b3b6a2005-02-04 01:44:33 +00001137 /* 32Uto64(16Uto32(expr16)) */
1138 DEFINE_PATTERN(p_16Uto64,
1139 unop(Iop_32Uto64, unop(Iop_16Uto32, bind(0)) ) );
1140 if (matchIRExpr(&mi,p_16Uto64,e)) {
1141 IRExpr* expr16 = mi.bindee[0];
sewardj7f039c42005-02-04 21:13:55 +00001142 HReg dst = newVRegI(env);
1143 HReg src = iselIntExpr_R(env, expr16);
sewardj05b3b6a2005-02-04 01:44:33 +00001144 addInstr(env, mk_iMOVsd_RR(src,dst) );
1145 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, AMD64RM_Reg(dst)));
1146 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 48, AMD64RM_Reg(dst)));
1147 return dst;
1148 }
1149
sewardj7f039c42005-02-04 21:13:55 +00001150 /* 32Uto64(8Uto32(expr16)) */
1151 DEFINE_PATTERN(p_8Uto64,
1152 unop(Iop_32Uto64, unop(Iop_8Uto32, bind(0)) ) );
1153 if (matchIRExpr(&mi,p_8Uto64,e)) {
1154 IRExpr* expr8 = mi.bindee[0];
1155 HReg dst = newVRegI(env);
1156 HReg src = iselIntExpr_R(env, expr8);
1157 addInstr(env, mk_iMOVsd_RR(src,dst) );
1158 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, AMD64RM_Reg(dst)));
1159 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 56, AMD64RM_Reg(dst)));
1160 return dst;
1161 }
1162
sewardj05b3b6a2005-02-04 01:44:33 +00001163 /* 1Uto8(32to1(64to32(expr64))) */
1164 DEFINE_PATTERN(p_1Uto8_32to1_64to32,
1165 unop(Iop_1Uto8,
1166 unop(Iop_32to1, unop(Iop_64to32, bind(0)))));
1167 if (matchIRExpr(&mi,p_1Uto8_32to1_64to32,e)) {
1168 IRExpr* expr64 = mi.bindee[0];
1169 HReg dst = newVRegI(env);
1170 HReg src = iselIntExpr_R(env, expr64);
1171 addInstr(env, mk_iMOVsd_RR(src,dst) );
1172 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1173 AMD64RMI_Imm(1), dst));
1174 return dst;
1175 }
1176
sewardja3e98302005-02-01 15:55:05 +00001177//.. /* 16Uto32(LDle(expr32)) */
1178//.. {
1179//.. DECLARE_PATTERN(p_LDle16_then_16Uto32);
1180//.. DEFINE_PATTERN(p_LDle16_then_16Uto32,
1181//.. unop(Iop_16Uto32,IRExpr_LDle(Ity_I16,bind(0))) );
1182//.. if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1183//.. HReg dst = newVRegI(env);
1184//.. X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1185//.. addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1186//.. return dst;
1187//.. }
1188//.. }
sewardjf67eadf2005-02-03 03:53:52 +00001189
1190 switch (e->Iex.Unop.op) {
1191 case Iop_32Uto64: {
1192 HReg dst = newVRegI(env);
1193 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1194 addInstr(env, AMD64Instr_MovZLQ(src,dst) );
1195 return dst;
1196 }
sewardj05b3b6a2005-02-04 01:44:33 +00001197 case Iop_32Sto64: {
1198 HReg dst = newVRegI(env);
1199 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1200 UInt amt = 32;
1201 addInstr(env, mk_iMOVsd_RR(src,dst) );
1202 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, AMD64RM_Reg(dst)));
1203 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, AMD64RM_Reg(dst)));
1204 return dst;
1205 }
sewardj9b967672005-02-08 11:13:09 +00001206 case Iop_128HIto64: {
1207 HReg rHi, rLo;
1208 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1209 return rHi; /* and abandon rLo */
1210 }
1211 case Iop_128to64: {
1212 HReg rHi, rLo;
1213 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1214 return rLo; /* and abandon rHi */
1215 }
sewardj85520e42005-02-19 15:22:38 +00001216 case Iop_8Uto16:
sewardja3e98302005-02-01 15:55:05 +00001217//.. case Iop_8Uto32:
sewardj85520e42005-02-19 15:22:38 +00001218 case Iop_16Uto32: {
sewardj7de0d3c2005-02-13 02:26:41 +00001219 HReg dst = newVRegI(env);
1220 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1221 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1222 addInstr(env, mk_iMOVsd_RR(src,dst) );
1223 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1224 AMD64RMI_Imm(mask), dst));
1225 return dst;
1226 }
sewardj85520e42005-02-19 15:22:38 +00001227 case Iop_8Sto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001228 case Iop_8Sto32:
1229 case Iop_16Sto32: {
sewardj486074e2005-02-08 20:10:04 +00001230 HReg dst = newVRegI(env);
1231 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1232 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 48 : 56;
1233 addInstr(env, mk_iMOVsd_RR(src,dst) );
1234 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, AMD64RM_Reg(dst)));
1235 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, AMD64RM_Reg(dst)));
1236 return dst;
1237 }
sewardj85520e42005-02-19 15:22:38 +00001238 case Iop_Not8:
1239 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001240 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001241 case Iop_Not64: {
1242 HReg dst = newVRegI(env);
1243 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1244 addInstr(env, mk_iMOVsd_RR(src,dst) );
1245 addInstr(env, AMD64Instr_Unary64(Aun_NOT,AMD64RM_Reg(dst)));
1246 return dst;
1247 }
sewardja3e98302005-02-01 15:55:05 +00001248//.. case Iop_64HIto32: {
1249//.. HReg rHi, rLo;
1250//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1251//.. return rHi; /* and abandon rLo .. poor wee thing :-) */
1252//.. }
1253//.. case Iop_64to32: {
1254//.. HReg rHi, rLo;
1255//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1256//.. return rLo; /* similar stupid comment to the above ... */
1257//.. }
1258//.. case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001259 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001260 case Iop_64HIto32: {
1261 HReg dst = newVRegI(env);
1262 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1263 Int shift = 0;
1264 switch (e->Iex.Unop.op) {
sewardj85520e42005-02-19 15:22:38 +00001265 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001266 case Iop_64HIto32: shift = 32; break;
1267 default: vassert(0);
1268 }
1269 addInstr(env, mk_iMOVsd_RR(src,dst) );
1270 addInstr(env, AMD64Instr_Sh64(
1271 Ash_SHR, shift, AMD64RM_Reg(dst)));
1272 return dst;
1273 }
sewardja3e98302005-02-01 15:55:05 +00001274//.. case Iop_1Uto32:
1275//.. case Iop_1Uto8: {
1276//.. HReg dst = newVRegI(env);
1277//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1278//.. addInstr(env, X86Instr_Set32(cond,dst));
1279//.. return dst;
1280//.. }
1281//.. case Iop_1Sto8:
1282//.. case Iop_1Sto16:
1283//.. case Iop_1Sto32: {
1284//.. /* could do better than this, but for now ... */
1285//.. HReg dst = newVRegI(env);
1286//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1287//.. addInstr(env, X86Instr_Set32(cond,dst));
1288//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(dst)));
1289//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(dst)));
1290//.. return dst;
1291//.. }
1292//.. case Iop_Ctz32: {
1293//.. /* Count trailing zeroes, implemented by x86 'bsfl' */
1294//.. HReg dst = newVRegI(env);
1295//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1296//.. addInstr(env, X86Instr_Bsfr32(True,src,dst));
1297//.. return dst;
1298//.. }
1299//.. case Iop_Clz32: {
1300//.. /* Count leading zeroes. Do 'bsrl' to establish the index
1301//.. of the highest set bit, and subtract that value from
1302//.. 31. */
1303//.. HReg tmp = newVRegI(env);
1304//.. HReg dst = newVRegI(env);
1305//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1306//.. addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1307//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1308//.. X86RMI_Imm(31), dst));
1309//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1310//.. X86RMI_Reg(tmp), dst));
1311//.. return dst;
1312//.. }
1313//..
1314//.. case Iop_128to32: {
1315//.. HReg dst = newVRegI(env);
1316//.. HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1317//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1318//.. sub_from_esp(env, 16);
1319//.. addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1320//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1321//.. add_to_esp(env, 16);
1322//.. return dst;
1323//.. }
sewardj85520e42005-02-19 15:22:38 +00001324
sewardj1a01e652005-02-23 11:39:21 +00001325
1326 /* V128{HI}to64 */
1327 case Iop_V128HIto64:
1328 case Iop_V128to64: {
1329 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
1330 HReg dst = newVRegI(env);
1331 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1332 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
1333 AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
1334 sub_from_rsp(env, 16);
1335 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
1336 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1337 AMD64RMI_Mem(rspN), dst ));
1338 add_to_rsp(env, 16);
1339 return dst;
1340 }
1341
sewardj85520e42005-02-19 15:22:38 +00001342 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001343 case Iop_32to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001344 case Iop_32to16:
sewardj486074e2005-02-08 20:10:04 +00001345 case Iop_64to32:
1346 /* These are no-ops. */
1347 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001348
1349 default:
1350 break;
1351 }
1352 break;
1353 }
sewardj8258a8c2005-02-02 03:11:24 +00001354
1355 /* --------- GET --------- */
1356 case Iex_Get: {
1357 if (ty == Ity_I64) {
1358 HReg dst = newVRegI(env);
1359 addInstr(env, AMD64Instr_Alu64R(
1360 Aalu_MOV,
1361 AMD64RMI_Mem(
1362 AMD64AMode_IR(e->Iex.Get.offset,
1363 hregAMD64_RBP())),
1364 dst));
1365 return dst;
1366 }
1367 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1368 HReg dst = newVRegI(env);
1369 addInstr(env, AMD64Instr_LoadEX(
1370 ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4),
1371 False,
1372 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1373 dst));
1374 return dst;
1375 }
1376 break;
1377 }
1378
sewardj8d965312005-02-25 02:48:47 +00001379 case Iex_GetI: {
1380 AMD64AMode* am
1381 = genGuestArrayOffset(
1382 env, e->Iex.GetI.descr,
1383 e->Iex.GetI.ix, e->Iex.GetI.bias );
1384 HReg dst = newVRegI(env);
1385 if (ty == Ity_I8) {
1386 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1387 return dst;
1388 }
1389 break;
1390 }
sewardj05b3b6a2005-02-04 01:44:33 +00001391
1392 /* --------- CCALL --------- */
1393 case Iex_CCall: {
1394 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001395 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001396
1397 /* be very restrictive for now. Only 64-bit ints allowed
1398 for args, and 64 bits for return type. */
1399 if (e->Iex.CCall.retty != Ity_I64)
1400 goto irreducible;
1401
sewardj7f039c42005-02-04 21:13:55 +00001402 /* Marshal args, do the call. */
sewardj05b3b6a2005-02-04 01:44:33 +00001403 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1404
1405 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1406 return dst;
1407 }
1408
sewardj7f039c42005-02-04 21:13:55 +00001409 /* --------- LITERAL --------- */
1410 /* 64/32/16/8-bit literals */
1411 case Iex_Const:
1412 if (ty == Ity_I64) {
1413 HReg r = newVRegI(env);
1414 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1415 return r;
1416 } else {
1417 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1418 HReg r = newVRegI(env);
1419 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1420 return r;
1421 }
sewardj05b3b6a2005-02-04 01:44:33 +00001422
1423 /* --------- MULTIPLEX --------- */
1424 case Iex_Mux0X: {
1425 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1426 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1427 HReg r8;
1428 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1429 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1430 HReg dst = newVRegI(env);
1431 addInstr(env, mk_iMOVsd_RR(rX,dst));
1432 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
1433 addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(0xFF), AMD64RM_Reg(r8)));
1434 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
1435 return dst;
1436 }
1437 break;
1438 }
sewardj8258a8c2005-02-02 03:11:24 +00001439
1440 default:
1441 break;
1442 } /* switch (e->tag) */
1443
1444 /* We get here if no pattern matched. */
1445 irreducible:
1446 ppIRExpr(e);
1447 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1448}
sewardj614b3fb2005-02-02 02:16:03 +00001449
1450
1451/*---------------------------------------------------------*/
1452/*--- ISEL: Integer expression auxiliaries ---*/
1453/*---------------------------------------------------------*/
1454
1455/* --------------------- AMODEs --------------------- */
1456
1457/* Return an AMode which computes the value of the specified
1458 expression, possibly also adding insns to the code list as a
1459 result. The expression may only be a 32-bit one.
1460*/
1461
sewardj8258a8c2005-02-02 03:11:24 +00001462static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1463{
1464 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1465 vassert(sane_AMode(am));
1466 return am;
1467}
1468
1469/* DO NOT CALL THIS DIRECTLY ! */
1470static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1471{
sewardj05b3b6a2005-02-04 01:44:33 +00001472 MatchInfo mi;
1473 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001474 IRType ty = typeOfIRExpr(env->type_env,e);
1475 vassert(ty == Ity_I64);
1476
sewardj05b3b6a2005-02-04 01:44:33 +00001477 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1478 /* bind0 bind1 bind2 bind3 */
1479 DEFINE_PATTERN(p_complex,
1480 binop( Iop_Add64,
1481 binop( Iop_Add64,
1482 bind(0),
1483 binop(Iop_Shl64, bind(1), bind(2))
1484 ),
1485 bind(3)
1486 )
1487 );
1488 if (matchIRExpr(&mi, p_complex, e)) {
1489 IRExpr* expr1 = mi.bindee[0];
1490 IRExpr* expr2 = mi.bindee[1];
1491 IRExpr* imm8 = mi.bindee[2];
1492 IRExpr* simm32 = mi.bindee[3];
1493 if (imm8->tag == Iex_Const
1494 && imm8->Iex.Const.con->tag == Ico_U8
1495 && imm8->Iex.Const.con->Ico.U8 < 4
1496 /* imm8 is OK, now check simm32 */
1497 && simm32->tag == Iex_Const
1498 && simm32->Iex.Const.con->tag == Ico_U64
1499 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1500 UInt shift = imm8->Iex.Const.con->Ico.U8;
1501 UInt offset = (UInt)(0xFFFFFFFF & simm32->Iex.Const.con->Ico.U64);
1502 HReg r1 = iselIntExpr_R(env, expr1);
1503 HReg r2 = iselIntExpr_R(env, expr2);
1504 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1505 return AMD64AMode_IRRS(offset, r1, r2, shift);
1506 }
1507 }
1508
sewardj8258a8c2005-02-02 03:11:24 +00001509 /* Add64(expr1, Shl64(expr2, imm)) */
1510 if (e->tag == Iex_Binop
1511 && e->Iex.Binop.op == Iop_Add64
1512 && e->Iex.Binop.arg2->tag == Iex_Binop
1513 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1514 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1515 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1516 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1517 if (shift == 1 || shift == 2 || shift == 3) {
1518 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1519 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1520 return AMD64AMode_IRRS(0, r1, r2, shift);
1521 }
1522 }
1523
1524 /* Add64(expr,i) */
1525 if (e->tag == Iex_Binop
1526 && e->Iex.Binop.op == Iop_Add64
1527 && e->Iex.Binop.arg2->tag == Iex_Const
1528 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
1529 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
1530 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1531 return AMD64AMode_IR(
1532 (UInt)(0xFFFFFFFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
1533 r1
1534 );
1535 }
1536
1537 /* Doesn't match anything in particular. Generate it into
1538 a register and use that. */
1539 {
1540 HReg r1 = iselIntExpr_R(env, e);
1541 return AMD64AMode_IR(0, r1);
1542 }
1543}
sewardj614b3fb2005-02-02 02:16:03 +00001544
1545
1546/* --------------------- RMIs --------------------- */
1547
1548/* Similarly, calculate an expression into an X86RMI operand. As with
1549 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1550
1551static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1552{
1553 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1554 /* sanity checks ... */
1555 switch (rmi->tag) {
1556 case Armi_Imm:
1557 return rmi;
1558 case Armi_Reg:
1559 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
1560 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
1561 return rmi;
1562 case Armi_Mem:
1563 vassert(sane_AMode(rmi->Armi.Mem.am));
1564 return rmi;
1565 default:
1566 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
1567 }
1568}
1569
1570/* DO NOT CALL THIS DIRECTLY ! */
1571static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1572{
1573 IRType ty = typeOfIRExpr(env->type_env,e);
1574 vassert(ty == Ity_I64 || ty == Ity_I32
1575 || ty == Ity_I16 || ty == Ity_I8);
1576
1577 /* special case: immediate 64/32/16/8 */
1578 if (e->tag == Iex_Const) {
1579 switch (e->Iex.Const.con->tag) {
1580 case Ico_U64:
1581 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
1582 return AMD64RMI_Imm(0xFFFFFFFF & e->Iex.Const.con->Ico.U64);
1583 }
1584 break;
1585 case Ico_U32:
1586 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
1587 case Ico_U16:
1588 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
1589 case Ico_U8:
1590 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
1591 default:
1592 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1593 }
1594 }
1595
1596 /* special case: 64-bit GET */
1597 if (e->tag == Iex_Get && ty == Ity_I64) {
1598 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1599 hregAMD64_RBP()));
1600 }
1601
sewardj0852a132005-02-21 08:28:46 +00001602 /* special case: 64-bit load from memory */
1603 if (e->tag == Iex_LDle && ty == Ity_I64) {
1604 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
1605 return AMD64RMI_Mem(am);
1606 }
sewardj614b3fb2005-02-02 02:16:03 +00001607
1608 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00001609 {
1610 HReg r = iselIntExpr_R ( env, e );
1611 return AMD64RMI_Reg(r);
1612 }
sewardj614b3fb2005-02-02 02:16:03 +00001613}
1614
1615
sewardjf67eadf2005-02-03 03:53:52 +00001616/* --------------------- RIs --------------------- */
1617
1618/* Calculate an expression into an AMD64RI operand. As with
1619 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1620 bits. */
1621
1622static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1623{
1624 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
1625 /* sanity checks ... */
1626 switch (ri->tag) {
1627 case Ari_Imm:
1628 return ri;
1629 case Armi_Reg:
1630 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
1631 vassert(hregIsVirtual(ri->Ari.Reg.reg));
1632 return ri;
1633 default:
1634 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
1635 }
1636}
1637
1638/* DO NOT CALL THIS DIRECTLY ! */
1639static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1640{
1641 IRType ty = typeOfIRExpr(env->type_env,e);
1642 vassert(ty == Ity_I64 || ty == Ity_I32
1643 || ty == Ity_I16 || ty == Ity_I8);
1644
1645 /* special case: immediate */
1646 if (e->tag == Iex_Const) {
1647 switch (e->Iex.Const.con->tag) {
1648 case Ico_U64:
1649 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
1650 return AMD64RI_Imm(0xFFFFFFFF & e->Iex.Const.con->Ico.U64);
1651 }
1652 break;
1653 case Ico_U32:
1654 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
1655 case Ico_U16:
1656 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
1657 case Ico_U8:
1658 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
1659 default:
1660 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1661 }
1662 }
1663
1664 /* default case: calculate into a register and return that */
1665 {
1666 HReg r = iselIntExpr_R ( env, e );
1667 return AMD64RI_Reg(r);
1668 }
1669}
1670
1671
sewardj05b3b6a2005-02-04 01:44:33 +00001672/* --------------------- RMs --------------------- */
1673
1674/* Similarly, calculate an expression into an AMD64RM operand. As
1675 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1676 bits. */
1677
1678static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1679{
1680 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
1681 /* sanity checks ... */
1682 switch (rm->tag) {
1683 case Arm_Reg:
1684 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
1685 vassert(hregIsVirtual(rm->Arm.Reg.reg));
1686 return rm;
1687 case Arm_Mem:
1688 vassert(sane_AMode(rm->Arm.Mem.am));
1689 return rm;
1690 default:
1691 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
1692 }
1693}
1694
1695/* DO NOT CALL THIS DIRECTLY ! */
1696static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1697{
1698 IRType ty = typeOfIRExpr(env->type_env,e);
1699 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1700
1701 /* special case: 64-bit GET */
1702 if (e->tag == Iex_Get && ty == Ity_I64) {
1703 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1704 hregAMD64_RBP()));
1705 }
1706
1707 /* special case: load from memory */
1708
1709 /* default case: calculate into a register and return that */
1710 {
1711 HReg r = iselIntExpr_R ( env, e );
1712 return AMD64RM_Reg(r);
1713 }
1714}
1715
1716
1717/* --------------------- CONDCODE --------------------- */
1718
1719/* Generate code to evaluated a bit-typed expression, returning the
1720 condition code which would correspond when the expression would
1721 notionally have returned 1. */
1722
1723static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1724{
1725 /* Uh, there's nothing we can sanity check here, unfortunately. */
1726 return iselCondCode_wrk(env,e);
1727}
1728
1729/* DO NOT CALL THIS DIRECTLY ! */
1730static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1731{
sewardjf8c37f72005-02-07 18:55:29 +00001732 MatchInfo mi;
1733 DECLARE_PATTERN(p_32to1_64to32);
sewardja3e98302005-02-01 15:55:05 +00001734//.. DECLARE_PATTERN(p_1Uto32_then_32to1);
1735//.. DECLARE_PATTERN(p_1Sto32_then_32to1);
sewardj05b3b6a2005-02-04 01:44:33 +00001736
1737 vassert(e);
1738 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1739
sewardja3e98302005-02-01 15:55:05 +00001740//.. /* Constant 1:Bit */
1741//.. if (e->tag == Iex_Const && e->Iex.Const.con->Ico.U1 == True) {
1742//.. HReg r;
1743//.. vassert(e->Iex.Const.con->tag == Ico_U1);
1744//.. r = newVRegI(env);
1745//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1746//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1747//.. return Xcc_Z;
1748//.. }
sewardj486074e2005-02-08 20:10:04 +00001749
1750 /* Not1(...) */
1751 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1752 /* Generate code for the arg, and negate the test condition */
1753 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1754 }
1755
sewardja3e98302005-02-01 15:55:05 +00001756//.. /* 32to1(1Uto32(expr1)) -- the casts are pointless, ignore them */
1757//.. DEFINE_PATTERN(p_1Uto32_then_32to1,
1758//.. unop(Iop_32to1,unop(Iop_1Uto32,bind(0))));
1759//.. if (matchIRExpr(&mi,p_1Uto32_then_32to1,e)) {
1760//.. IRExpr* expr1 = mi.bindee[0];
1761//.. return iselCondCode(env, expr1);
1762//.. }
1763//..
1764//.. /* 32to1(1Sto32(expr1)) -- the casts are pointless, ignore them */
1765//.. DEFINE_PATTERN(p_1Sto32_then_32to1,
1766//.. unop(Iop_32to1,unop(Iop_1Sto32,bind(0))));
1767//.. if (matchIRExpr(&mi,p_1Sto32_then_32to1,e)) {
1768//.. IRExpr* expr1 = mi.bindee[0];
1769//.. return iselCondCode(env, expr1);
1770//.. }
sewardjf8c37f72005-02-07 18:55:29 +00001771
1772 /* pattern: 32to1(64to32(expr64)) */
1773 DEFINE_PATTERN(p_32to1_64to32,
1774 unop(Iop_32to1,unop(Iop_64to32, bind(0)))
1775 );
1776 if (matchIRExpr(&mi,p_32to1_64to32,e)) {
1777 AMD64RM* rm = iselIntExpr_RM(env, mi.bindee[0]);
1778 addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(1),rm));
1779 return Acc_NZ;
1780 }
1781
sewardja3e98302005-02-01 15:55:05 +00001782//.. /* CmpEQ8 / CmpNE8 */
1783//.. if (e->tag == Iex_Binop
1784//.. && (e->Iex.Binop.op == Iop_CmpEQ8
1785//.. || e->Iex.Binop.op == Iop_CmpNE8)) {
1786//.. HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1787//.. X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1788//.. HReg r = newVRegI(env);
1789//.. addInstr(env, mk_iMOVsd_RR(r1,r));
1790//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1791//.. addInstr(env, X86Instr_Alu32R(Xalu_AND,X86RMI_Imm(0xFF),r));
1792//.. switch (e->Iex.Binop.op) {
1793//.. case Iop_CmpEQ8: return Xcc_Z;
1794//.. case Iop_CmpNE8: return Xcc_NZ;
1795//.. default: vpanic("iselCondCode(x86): CmpXX8");
1796//.. }
1797//.. }
1798//..
1799//.. /* CmpEQ16 / CmpNE16 */
1800//.. if (e->tag == Iex_Binop
1801//.. && (e->Iex.Binop.op == Iop_CmpEQ16
1802//.. || e->Iex.Binop.op == Iop_CmpNE16)) {
1803//.. HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1804//.. X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1805//.. HReg r = newVRegI(env);
1806//.. addInstr(env, mk_iMOVsd_RR(r1,r));
1807//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1808//.. addInstr(env, X86Instr_Alu32R(Xalu_AND,X86RMI_Imm(0xFFFF),r));
1809//.. switch (e->Iex.Binop.op) {
1810//.. case Iop_CmpEQ16: return Xcc_Z;
1811//.. case Iop_CmpNE16: return Xcc_NZ;
1812//.. default: vpanic("iselCondCode(x86): CmpXX16");
1813//.. }
1814//.. }
1815//..
1816//.. /* CmpNE32(1Sto32(b), 0) ==> b */
1817//.. {
1818//.. DECLARE_PATTERN(p_CmpNE32_1Sto32);
1819//.. DEFINE_PATTERN(
1820//.. p_CmpNE32_1Sto32,
1821//.. binop(Iop_CmpNE32, unop(Iop_1Sto32,bind(0)), mkU32(0)));
1822//.. if (matchIRExpr(&mi, p_CmpNE32_1Sto32, e)) {
1823//.. return iselCondCode(env, mi.bindee[0]);
1824//.. }
1825//.. }
sewardjd0a12df2005-02-10 02:07:43 +00001826
1827 /* Cmp*64*(x,y) */
1828 if (e->tag == Iex_Binop
1829 && (e->Iex.Binop.op == Iop_CmpEQ64
1830 || e->Iex.Binop.op == Iop_CmpNE64
1831 //|| e->Iex.Binop.op == Iop_CmpLT64S
1832 //|| e->Iex.Binop.op == Iop_CmpLT64U
1833 //|| e->Iex.Binop.op == Iop_CmpLE64S
1834 //|| e->Iex.Binop.op == Iop_CmpLE64U
1835 )) {
1836 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1837 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1838 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
1839 switch (e->Iex.Binop.op) {
1840 case Iop_CmpEQ64: return Acc_Z;
1841 case Iop_CmpNE64: return Acc_NZ;
1842 //case Iop_CmpLT64S: return Acc_L;
1843 //case Iop_CmpLT64U: return Acc_B;
1844 //case Iop_CmpLE64S: return Acc_LE;
1845 //case Iop_CmpLE64U: return Acc_BE;
1846 default: vpanic("iselCondCode(amd64): CmpXX64");
1847 }
1848 }
1849
sewardja3e98302005-02-01 15:55:05 +00001850//.. /* CmpNE64(1Sto64(b), 0) ==> b */
1851//.. {
1852//.. DECLARE_PATTERN(p_CmpNE64_1Sto64);
1853//.. DEFINE_PATTERN(
1854//.. p_CmpNE64_1Sto64,
1855//.. binop(Iop_CmpNE64, unop(Iop_1Sto64,bind(0)), mkU64(0)));
1856//.. if (matchIRExpr(&mi, p_CmpNE64_1Sto64, e)) {
1857//.. return iselCondCode(env, mi.bindee[0]);
1858//.. }
1859//.. }
1860//..
1861//.. /* CmpNE64(x, 0) */
1862//.. {
1863//.. DECLARE_PATTERN(p_CmpNE64_x_zero);
1864//.. DEFINE_PATTERN(
1865//.. p_CmpNE64_x_zero,
1866//.. binop(Iop_CmpNE64, bind(0), mkU64(0)) );
1867//.. if (matchIRExpr(&mi, p_CmpNE64_x_zero, e)) {
1868//.. HReg hi, lo;
1869//.. IRExpr* x = mi.bindee[0];
1870//.. HReg tmp = newVRegI(env);
1871//.. iselInt64Expr( &hi, &lo, env, x );
1872//.. addInstr(env, mk_iMOVsd_RR(hi, tmp));
1873//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1874//.. return Xcc_NZ;
1875//.. }
1876//.. }
1877//..
1878//.. /* CmpNE64 */
1879//.. if (e->tag == Iex_Binop
1880//.. && e->Iex.Binop.op == Iop_CmpNE64) {
1881//.. HReg hi1, hi2, lo1, lo2;
1882//.. HReg tHi = newVRegI(env);
1883//.. HReg tLo = newVRegI(env);
1884//.. iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
1885//.. iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
1886//.. addInstr(env, mk_iMOVsd_RR(hi1, tHi));
1887//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
1888//.. addInstr(env, mk_iMOVsd_RR(lo1, tLo));
1889//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
1890//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
1891//.. switch (e->Iex.Binop.op) {
1892//.. case Iop_CmpNE64: return Xcc_NZ;
1893//.. default: vpanic("iselCondCode(x86): CmpXX64");
1894//.. }
1895//.. }
1896//..
1897//.. /* var */
1898//.. if (e->tag == Iex_Tmp) {
1899//.. HReg r32 = lookupIRTemp(env, e->Iex.Tmp.tmp);
1900//.. HReg dst = newVRegI(env);
1901//.. addInstr(env, mk_iMOVsd_RR(r32,dst));
1902//.. addInstr(env, X86Instr_Alu32R(Xalu_AND,X86RMI_Imm(1),dst));
1903//.. return Xcc_NZ;
1904//.. }
sewardj05b3b6a2005-02-04 01:44:33 +00001905
1906 ppIRExpr(e);
1907 vpanic("iselCondCode(amd64)");
1908}
1909
1910
sewardj9b967672005-02-08 11:13:09 +00001911/*---------------------------------------------------------*/
1912/*--- ISEL: Integer expressions (128 bit) ---*/
1913/*---------------------------------------------------------*/
1914
1915/* Compute a 128-bit value into a register pair, which is returned as
1916 the first two parameters. As with iselIntExpr_R, these may be
1917 either real or virtual regs; in any case they must not be changed
1918 by subsequent code emitted by the caller. */
1919
1920static void iselInt128Expr ( HReg* rHi, HReg* rLo,
1921 ISelEnv* env, IRExpr* e )
1922{
1923 iselInt128Expr_wrk(rHi, rLo, env, e);
1924# if 0
1925 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1926# endif
1927 vassert(hregClass(*rHi) == HRcInt64);
1928 vassert(hregIsVirtual(*rHi));
1929 vassert(hregClass(*rLo) == HRcInt64);
1930 vassert(hregIsVirtual(*rLo));
1931}
1932
1933/* DO NOT CALL THIS DIRECTLY ! */
1934static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
1935 ISelEnv* env, IRExpr* e )
1936{
sewardja3e98302005-02-01 15:55:05 +00001937//.. HWord fn = 0; /* helper fn for most SIMD64 stuff */
sewardj9b967672005-02-08 11:13:09 +00001938 vassert(e);
1939 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
1940
sewardja3e98302005-02-01 15:55:05 +00001941//.. /* 64-bit literal */
1942//.. if (e->tag == Iex_Const) {
1943//.. ULong w64 = e->Iex.Const.con->Ico.U64;
1944//.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
1945//.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
1946//.. HReg tLo = newVRegI(env);
1947//.. HReg tHi = newVRegI(env);
1948//.. vassert(e->Iex.Const.con->tag == Ico_U64);
1949//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
1950//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1951//.. *rHi = tHi;
1952//.. *rLo = tLo;
1953//.. return;
1954//.. }
sewardj9b967672005-02-08 11:13:09 +00001955
1956 /* read 128-bit IRTemp */
1957 if (e->tag == Iex_Tmp) {
1958 lookupIRTemp128( rHi, rLo, env, e->Iex.Tmp.tmp);
1959 return;
1960 }
1961
sewardja3e98302005-02-01 15:55:05 +00001962//.. /* 64-bit load */
1963//.. if (e->tag == Iex_LDle) {
1964//.. HReg tLo, tHi;
1965//.. X86AMode *am0, *am4;
1966//.. vassert(e->Iex.LDle.ty == Ity_I64);
1967//.. tLo = newVRegI(env);
1968//.. tHi = newVRegI(env);
1969//.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
1970//.. am4 = advance4(am0);
1971//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
1972//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1973//.. *rHi = tHi;
1974//.. *rLo = tLo;
1975//.. return;
1976//.. }
1977//..
1978//.. /* 64-bit GET */
1979//.. if (e->tag == Iex_Get) {
1980//.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
1981//.. X86AMode* am4 = advance4(am);
1982//.. HReg tLo = newVRegI(env);
1983//.. HReg tHi = newVRegI(env);
1984//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
1985//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1986//.. *rHi = tHi;
1987//.. *rLo = tLo;
1988//.. return;
1989//.. }
1990//..
1991//.. /* 64-bit GETI */
1992//.. if (e->tag == Iex_GetI) {
1993//.. X86AMode* am
1994//.. = genGuestArrayOffset( env, e->Iex.GetI.descr,
1995//.. e->Iex.GetI.ix, e->Iex.GetI.bias );
1996//.. X86AMode* am4 = advance4(am);
1997//.. HReg tLo = newVRegI(env);
1998//.. HReg tHi = newVRegI(env);
1999//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2000//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2001//.. *rHi = tHi;
2002//.. *rLo = tLo;
2003//.. return;
2004//.. }
2005//..
2006//.. /* 64-bit Mux0X */
2007//.. if (e->tag == Iex_Mux0X) {
2008//.. HReg e0Lo, e0Hi, eXLo, eXHi, r8;
2009//.. HReg tLo = newVRegI(env);
2010//.. HReg tHi = newVRegI(env);
2011//.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2012//.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2013//.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2014//.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2015//.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2016//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
2017//.. /* This assumes the first cmov32 doesn't trash the condition
2018//.. codes, so they are still available for the second cmov32 */
2019//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2020//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2021//.. *rHi = tHi;
2022//.. *rLo = tLo;
2023//.. return;
2024//.. }
sewardj9b967672005-02-08 11:13:09 +00002025
2026 /* --------- BINARY ops --------- */
2027 if (e->tag == Iex_Binop) {
2028 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002029 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002030 case Iop_MullU64:
2031 case Iop_MullS64: {
2032 /* get one operand into %rax, and the other into a R/M.
2033 Need to make an educated guess about which is better in
2034 which. */
2035 HReg tLo = newVRegI(env);
2036 HReg tHi = newVRegI(env);
2037 Bool syned = e->Iex.Binop.op == Iop_MullS64;
2038 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2039 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2040 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
2041 addInstr(env, AMD64Instr_MulL(syned, 8, rmLeft));
2042 /* Result is now in RDX:RAX. Tell the caller. */
2043 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2044 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2045 *rHi = tHi;
2046 *rLo = tLo;
2047 return;
2048 }
sewardj7de0d3c2005-02-13 02:26:41 +00002049
sewardja6b93d12005-02-17 09:28:28 +00002050 /* 128 x 64 -> (64(rem),64(div)) division */
2051 case Iop_DivModU128to64:
2052 case Iop_DivModS128to64: {
2053 /* Get the 128-bit operand into rdx:rax, and the other into
2054 any old R/M. */
2055 HReg sHi, sLo;
2056 HReg tLo = newVRegI(env);
2057 HReg tHi = newVRegI(env);
2058 Bool syned = e->Iex.Binop.op == Iop_DivModS128to64;
2059 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2060 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2061 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2062 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2063 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2064 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2065 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2066 *rHi = tHi;
2067 *rLo = tLo;
2068 return;
2069 }
2070
2071 /* 64HLto128(e1,e2) */
2072 case Iop_64HLto128:
2073 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2074 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2075 return;
2076
sewardja3e98302005-02-01 15:55:05 +00002077//.. /* Or64/And64/Xor64 */
2078//.. case Iop_Or64:
2079//.. case Iop_And64:
2080//.. case Iop_Xor64: {
2081//.. HReg xLo, xHi, yLo, yHi;
2082//.. HReg tLo = newVRegI(env);
2083//.. HReg tHi = newVRegI(env);
2084//.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2085//.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2086//.. : Xalu_XOR;
2087//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2088//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2089//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2090//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2091//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2092//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2093//.. *rHi = tHi;
2094//.. *rLo = tLo;
2095//.. return;
2096//.. }
2097//..
2098//.. /* Add64/Sub64 */
2099//.. case Iop_Add64:
2100//.. case Iop_Sub64: {
2101//.. HReg xLo, xHi, yLo, yHi;
2102//.. HReg tLo = newVRegI(env);
2103//.. HReg tHi = newVRegI(env);
2104//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2105//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2106//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2107//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2108//.. if (e->Iex.Binop.op==Iop_Add64) {
2109//.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2110//.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2111//.. } else {
2112//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2113//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2114//.. }
2115//.. *rHi = tHi;
2116//.. *rLo = tLo;
2117//.. return;
2118//.. }
2119//..
2120//.. /* 32HLto64(e1,e2) */
2121//.. case Iop_32HLto64:
2122//.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2123//.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2124//.. return;
2125//..
2126//.. /* 64-bit shifts */
2127//.. case Iop_Shl64: {
2128//.. /* We use the same ingenious scheme as gcc. Put the value
2129//.. to be shifted into %hi:%lo, and the shift amount into
2130//.. %cl. Then (dsts on right, a la ATT syntax):
2131//..
2132//.. shldl %cl, %lo, %hi -- make %hi be right for the
2133//.. -- shift amt %cl % 32
2134//.. shll %cl, %lo -- make %lo be right for the
2135//.. -- shift amt %cl % 32
2136//..
2137//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2138//.. we have to do a fixup, which puts the result low half
2139//.. into the result high half, and zeroes the low half:
2140//..
2141//.. testl $32, %ecx
2142//..
2143//.. cmovnz %lo, %hi
2144//.. movl $0, %tmp -- sigh; need yet another reg
2145//.. cmovnz %tmp, %lo
2146//.. */
2147//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2148//.. tLo = newVRegI(env);
2149//.. tHi = newVRegI(env);
2150//.. tTemp = newVRegI(env);
2151//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2152//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2153//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2154//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2155//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2156//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2157//.. and those regs are legitimately modifiable. */
2158//.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2159//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
2160//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2161//.. X86RM_Reg(hregX86_ECX())));
2162//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2163//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2164//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2165//.. *rHi = tHi;
2166//.. *rLo = tLo;
2167//.. return;
2168//.. }
2169//..
2170//.. case Iop_Shr64: {
2171//.. /* We use the same ingenious scheme as gcc. Put the value
2172//.. to be shifted into %hi:%lo, and the shift amount into
2173//.. %cl. Then:
2174//..
2175//.. shrdl %cl, %hi, %lo -- make %lo be right for the
2176//.. -- shift amt %cl % 32
2177//.. shrl %cl, %hi -- make %hi be right for the
2178//.. -- shift amt %cl % 32
2179//..
2180//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2181//.. we have to do a fixup, which puts the result high half
2182//.. into the result low half, and zeroes the high half:
2183//..
2184//.. testl $32, %ecx
2185//..
2186//.. cmovnz %hi, %lo
2187//.. movl $0, %tmp -- sigh; need yet another reg
2188//.. cmovnz %tmp, %hi
2189//.. */
2190//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2191//.. tLo = newVRegI(env);
2192//.. tHi = newVRegI(env);
2193//.. tTemp = newVRegI(env);
2194//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2195//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2196//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2197//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2198//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2199//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2200//.. and those regs are legitimately modifiable. */
2201//.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2202//.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
2203//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2204//.. X86RM_Reg(hregX86_ECX())));
2205//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2206//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2207//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2208//.. *rHi = tHi;
2209//.. *rLo = tLo;
2210//.. return;
2211//.. }
2212//..
2213//.. /* F64 -> I64 */
2214//.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2215//.. case. Unfortunately I see no easy way to avoid the
2216//.. duplication. */
2217//.. case Iop_F64toI64: {
2218//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2219//.. HReg tLo = newVRegI(env);
2220//.. HReg tHi = newVRegI(env);
2221//..
2222//.. /* Used several times ... */
2223//.. /* Careful ... this sharing is only safe because
2224//.. zero_esp/four_esp do not hold any registers which the
2225//.. register allocator could attempt to swizzle later. */
2226//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2227//.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2228//..
2229//.. /* rf now holds the value to be converted, and rrm holds
2230//.. the rounding mode value, encoded as per the
2231//.. IRRoundingMode enum. The first thing to do is set the
2232//.. FPU's rounding mode accordingly. */
2233//..
2234//.. /* Create a space for the format conversion. */
2235//.. /* subl $8, %esp */
2236//.. sub_from_esp(env, 8);
2237//..
2238//.. /* Set host rounding mode */
2239//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2240//..
2241//.. /* gistll %rf, 0(%esp) */
2242//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2243//..
2244//.. /* movl 0(%esp), %dstLo */
2245//.. /* movl 4(%esp), %dstHi */
2246//.. addInstr(env, X86Instr_Alu32R(
2247//.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2248//.. addInstr(env, X86Instr_Alu32R(
2249//.. Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2250//..
2251//.. /* Restore default FPU rounding. */
2252//.. set_FPU_rounding_default( env );
2253//..
2254//.. /* addl $8, %esp */
2255//.. add_to_esp(env, 8);
2256//..
2257//.. *rHi = tHi;
2258//.. *rLo = tLo;
2259//.. return;
2260//.. }
2261//..
2262//.. case Iop_Add8x8:
2263//.. fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2264//.. case Iop_Add16x4:
2265//.. fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2266//.. case Iop_Add32x2:
2267//.. fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2268//..
2269//.. case Iop_Avg8Ux8:
2270//.. fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2271//.. case Iop_Avg16Ux4:
2272//.. fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2273//..
2274//.. case Iop_CmpEQ8x8:
2275//.. fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2276//.. case Iop_CmpEQ16x4:
2277//.. fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2278//.. case Iop_CmpEQ32x2:
2279//.. fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2280//..
2281//.. case Iop_CmpGT8Sx8:
2282//.. fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2283//.. case Iop_CmpGT16Sx4:
2284//.. fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2285//.. case Iop_CmpGT32Sx2:
2286//.. fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2287//..
2288//.. case Iop_InterleaveHI8x8:
2289//.. fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2290//.. case Iop_InterleaveLO8x8:
2291//.. fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2292//.. case Iop_InterleaveHI16x4:
2293//.. fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2294//.. case Iop_InterleaveLO16x4:
2295//.. fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2296//.. case Iop_InterleaveHI32x2:
2297//.. fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2298//.. case Iop_InterleaveLO32x2:
2299//.. fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2300//..
2301//.. case Iop_Max8Ux8:
2302//.. fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2303//.. case Iop_Max16Sx4:
2304//.. fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2305//.. case Iop_Min8Ux8:
2306//.. fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2307//.. case Iop_Min16Sx4:
2308//.. fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2309//..
2310//.. case Iop_Mul16x4:
2311//.. fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2312//.. case Iop_MulHi16Sx4:
2313//.. fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2314//.. case Iop_MulHi16Ux4:
2315//.. fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2316//..
2317//.. case Iop_QAdd8Sx8:
2318//.. fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2319//.. case Iop_QAdd16Sx4:
2320//.. fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2321//.. case Iop_QAdd8Ux8:
2322//.. fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2323//.. case Iop_QAdd16Ux4:
2324//.. fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2325//..
2326//.. case Iop_QNarrow32Sx2:
2327//.. fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish;
2328//.. case Iop_QNarrow16Sx4:
2329//.. fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish;
2330//.. case Iop_QNarrow16Ux4:
2331//.. fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish;
2332//..
2333//.. case Iop_QSub8Sx8:
2334//.. fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2335//.. case Iop_QSub16Sx4:
2336//.. fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2337//.. case Iop_QSub8Ux8:
2338//.. fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2339//.. case Iop_QSub16Ux4:
2340//.. fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2341//..
2342//.. case Iop_Sub8x8:
2343//.. fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2344//.. case Iop_Sub16x4:
2345//.. fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2346//.. case Iop_Sub32x2:
2347//.. fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2348//..
2349//.. binnish: {
2350//.. /* Note: the following assumes all helpers are of
2351//.. signature
2352//.. ULong fn ( ULong, ULong ), and they are
2353//.. not marked as regparm functions.
2354//.. */
2355//.. HReg xLo, xHi, yLo, yHi;
2356//.. HReg tLo = newVRegI(env);
2357//.. HReg tHi = newVRegI(env);
2358//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2359//.. addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2360//.. addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2361//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2362//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2363//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2364//.. addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2365//.. add_to_esp(env, 4*4);
2366//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2367//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2368//.. *rHi = tHi;
2369//.. *rLo = tLo;
2370//.. return;
2371//.. }
2372//..
2373//.. case Iop_ShlN32x2:
2374//.. fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2375//.. case Iop_ShlN16x4:
2376//.. fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2377//.. case Iop_ShrN32x2:
2378//.. fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2379//.. case Iop_ShrN16x4:
2380//.. fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2381//.. case Iop_SarN32x2:
2382//.. fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2383//.. case Iop_SarN16x4:
2384//.. fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2385//.. shifty: {
2386//.. /* Note: the following assumes all helpers are of
2387//.. signature
2388//.. ULong fn ( ULong, UInt ), and they are
2389//.. not marked as regparm functions.
2390//.. */
2391//.. HReg xLo, xHi;
2392//.. HReg tLo = newVRegI(env);
2393//.. HReg tHi = newVRegI(env);
2394//.. X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2395//.. addInstr(env, X86Instr_Push(y));
2396//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2397//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2398//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2399//.. addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2400//.. add_to_esp(env, 3*4);
2401//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2402//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2403//.. *rHi = tHi;
2404//.. *rLo = tLo;
2405//.. return;
2406//.. }
sewardj9b967672005-02-08 11:13:09 +00002407
2408 default:
2409 break;
2410 }
2411 } /* if (e->tag == Iex_Binop) */
2412
2413
sewardja3e98302005-02-01 15:55:05 +00002414//.. /* --------- UNARY ops --------- */
2415//.. if (e->tag == Iex_Unop) {
2416//.. switch (e->Iex.Unop.op) {
2417//..
2418//.. /* 32Sto64(e) */
2419//.. case Iop_32Sto64: {
2420//.. HReg tLo = newVRegI(env);
2421//.. HReg tHi = newVRegI(env);
2422//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2423//.. addInstr(env, mk_iMOVsd_RR(src,tHi));
2424//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2425//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
2426//.. *rHi = tHi;
2427//.. *rLo = tLo;
2428//.. return;
2429//.. }
2430//..
2431//.. /* 32Uto64(e) */
2432//.. case Iop_32Uto64: {
2433//.. HReg tLo = newVRegI(env);
2434//.. HReg tHi = newVRegI(env);
2435//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2436//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2437//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2438//.. *rHi = tHi;
2439//.. *rLo = tLo;
2440//.. return;
2441//.. }
sewardj1a01e652005-02-23 11:39:21 +00002442
sewardja3e98302005-02-01 15:55:05 +00002443//.. /* could do better than this, but for now ... */
2444//.. case Iop_1Sto64: {
2445//.. HReg tLo = newVRegI(env);
2446//.. HReg tHi = newVRegI(env);
2447//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2448//.. addInstr(env, X86Instr_Set32(cond,tLo));
2449//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
2450//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
2451//.. addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2452//.. *rHi = tHi;
2453//.. *rLo = tLo;
2454//.. return;
2455//.. }
2456//..
2457//.. /* Not64(e) */
2458//.. case Iop_Not64: {
2459//.. HReg tLo = newVRegI(env);
2460//.. HReg tHi = newVRegI(env);
2461//.. HReg sHi, sLo;
2462//.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2463//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2464//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2465//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
2466//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
2467//.. *rHi = tHi;
2468//.. *rLo = tLo;
2469//.. return;
2470//.. }
2471//..
2472//.. /* ReinterpF64asI64(e) */
2473//.. /* Given an IEEE754 double, produce an I64 with the same bit
2474//.. pattern. */
2475//.. case Iop_ReinterpF64asI64: {
2476//.. HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2477//.. HReg tLo = newVRegI(env);
2478//.. HReg tHi = newVRegI(env);
2479//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2480//.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2481//.. /* paranoia */
2482//.. set_FPU_rounding_default(env);
2483//.. /* subl $8, %esp */
2484//.. sub_from_esp(env, 8);
2485//.. /* gstD %rf, 0(%esp) */
2486//.. addInstr(env,
2487//.. X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2488//.. /* movl 0(%esp), %tLo */
2489//.. addInstr(env,
2490//.. X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2491//.. /* movl 4(%esp), %tHi */
2492//.. addInstr(env,
2493//.. X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2494//.. /* addl $8, %esp */
2495//.. add_to_esp(env, 8);
2496//.. *rHi = tHi;
2497//.. *rLo = tLo;
2498//.. return;
2499//.. }
2500//..
2501//.. case Iop_CmpNEZ32x2:
2502//.. fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2503//.. case Iop_CmpNEZ16x4:
2504//.. fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2505//.. case Iop_CmpNEZ8x8:
2506//.. fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2507//.. unish: {
2508//.. /* Note: the following assumes all helpers are of
2509//.. signature
2510//.. ULong fn ( ULong ), and they are
2511//.. not marked as regparm functions.
2512//.. */
2513//.. HReg xLo, xHi;
2514//.. HReg tLo = newVRegI(env);
2515//.. HReg tHi = newVRegI(env);
2516//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2517//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2518//.. addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2519//.. addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2520//.. add_to_esp(env, 2*4);
2521//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2522//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2523//.. *rHi = tHi;
2524//.. *rLo = tLo;
2525//.. return;
2526//.. }
2527//..
2528//.. default:
2529//.. break;
2530//.. }
2531//.. } /* if (e->tag == Iex_Unop) */
2532//..
2533//..
2534//.. /* --------- CCALL --------- */
2535//.. if (e->tag == Iex_CCall) {
2536//.. HReg tLo = newVRegI(env);
2537//.. HReg tHi = newVRegI(env);
2538//..
2539//.. /* Marshal args, do the call, clear stack. */
2540//.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2541//..
2542//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2543//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2544//.. *rHi = tHi;
2545//.. *rLo = tLo;
2546//.. return;
2547//.. }
sewardj9b967672005-02-08 11:13:09 +00002548
2549 ppIRExpr(e);
2550 vpanic("iselInt128Expr");
2551}
2552
2553
sewardj8d965312005-02-25 02:48:47 +00002554/*---------------------------------------------------------*/
2555/*--- ISEL: Floating point expressions (32 bit) ---*/
2556/*---------------------------------------------------------*/
2557
2558/* Nothing interesting here; really just wrappers for
2559 64-bit stuff. */
2560
2561static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2562{
2563 HReg r = iselFltExpr_wrk( env, e );
2564# if 0
2565 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2566# endif
2567 vassert(hregClass(r) == HRcVec128);
2568 vassert(hregIsVirtual(r));
2569 return r;
2570}
2571
2572/* DO NOT CALL THIS DIRECTLY */
2573static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2574{
2575 IRType ty = typeOfIRExpr(env->type_env,e);
2576 vassert(ty == Ity_F32);
2577
sewardja3e98302005-02-01 15:55:05 +00002578//.. if (e->tag == Iex_Tmp) {
2579//.. return lookupIRTemp(env, e->Iex.Tmp.tmp);
2580//.. }
2581//..
2582//.. if (e->tag == Iex_LDle) {
2583//.. X86AMode* am;
2584//.. HReg res = newVRegF(env);
2585//.. vassert(e->Iex.LDle.ty == Ity_F32);
2586//.. am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2587//.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2588//.. return res;
2589//.. }
sewardj8d965312005-02-25 02:48:47 +00002590
2591 if (e->tag == Iex_Binop
2592 && e->Iex.Binop.op == Iop_F64toF32) {
2593 /* Although the result is still held in a standard SSE register,
2594 we need to round it to reflect the loss of accuracy/range
2595 entailed in casting it to a 32-bit float. */
2596 HReg dst = newVRegV(env);
2597 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2598 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2599 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2600 set_SSE_rounding_default( env );
2601 return dst;
2602 }
2603
sewardja3e98302005-02-01 15:55:05 +00002604//.. if (e->tag == Iex_Get) {
2605//.. X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2606//.. hregX86_EBP() );
2607//.. HReg res = newVRegF(env);
2608//.. addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2609//.. return res;
2610//.. }
2611//..
2612//.. if (e->tag == Iex_Unop
2613//.. && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2614//.. /* Given an I32, produce an IEEE754 float with the same bit
2615//.. pattern. */
2616//.. HReg dst = newVRegF(env);
2617//.. X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2618//.. /* paranoia */
2619//.. addInstr(env, X86Instr_Push(rmi));
2620//.. addInstr(env, X86Instr_FpLdSt(
2621//.. True/*load*/, 4, dst,
2622//.. X86AMode_IR(0, hregX86_ESP())));
2623//.. add_to_esp(env, 4);
2624//.. return dst;
2625//.. }
sewardj8d965312005-02-25 02:48:47 +00002626
2627 ppIRExpr(e);
2628 vpanic("iselFltExpr_wrk");
2629}
sewardj18303862005-02-21 12:36:54 +00002630
2631
2632/*---------------------------------------------------------*/
2633/*--- ISEL: Floating point expressions (64 bit) ---*/
2634/*---------------------------------------------------------*/
2635
2636/* Compute a 64-bit floating point value into the lower half of an xmm
2637 register, the identity of which is returned. As with
2638 iselIntExpr_R, the returned reg will be virtual, and it must not be
2639 changed by subsequent code emitted by the caller.
2640*/
2641
2642/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2643
2644 Type S (1 bit) E (11 bits) F (52 bits)
2645 ---- --------- ----------- -----------
2646 signalling NaN u 2047 (max) .0uuuuu---u
2647 (with at least
2648 one 1 bit)
2649 quiet NaN u 2047 (max) .1uuuuu---u
2650
2651 negative infinity 1 2047 (max) .000000---0
2652
2653 positive infinity 0 2047 (max) .000000---0
2654
2655 negative zero 1 0 .000000---0
2656
2657 positive zero 0 0 .000000---0
2658*/
2659
2660static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2661{
2662 HReg r = iselDblExpr_wrk( env, e );
2663# if 0
2664 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2665# endif
2666 vassert(hregClass(r) == HRcVec128);
2667 vassert(hregIsVirtual(r));
2668 return r;
2669}
2670
2671/* DO NOT CALL THIS DIRECTLY */
2672static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2673{
2674 IRType ty = typeOfIRExpr(env->type_env,e);
2675 vassert(e);
2676 vassert(ty == Ity_F64);
2677
2678 if (e->tag == Iex_Tmp) {
2679 return lookupIRTemp(env, e->Iex.Tmp.tmp);
2680 }
2681
sewardj8d965312005-02-25 02:48:47 +00002682 if (e->tag == Iex_Const) {
2683 union { ULong u64; Double f64; } u;
2684 HReg res = newVRegV(env);
2685 HReg tmp = newVRegI(env);
2686 vassert(sizeof(u) == 8);
2687 vassert(sizeof(u.u64) == 8);
2688 vassert(sizeof(u.f64) == 8);
2689
2690 if (e->Iex.Const.con->tag == Ico_F64) {
2691 u.f64 = e->Iex.Const.con->Ico.F64;
2692 }
2693 else if (e->Iex.Const.con->tag == Ico_F64i) {
2694 u.u64 = e->Iex.Const.con->Ico.F64i;
2695 }
2696 else
2697 vpanic("iselDblExpr(amd64): const");
2698
2699 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2700 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2701 addInstr(env, AMD64Instr_SseLdSt(
2702 True/*load*/, 8, res,
2703 AMD64AMode_IR(0, hregAMD64_RSP())
2704 ));
2705 add_to_rsp(env, 8);
2706 return res;
2707 }
sewardj9da16972005-02-21 13:58:26 +00002708
2709 if (e->tag == Iex_LDle) {
2710 AMD64AMode* am;
2711 HReg res = newVRegV(env);
2712 vassert(e->Iex.LDle.ty == Ity_F64);
2713 am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2714 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2715 return res;
2716 }
sewardj18303862005-02-21 12:36:54 +00002717
2718 if (e->tag == Iex_Get) {
2719 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2720 hregAMD64_RBP() );
2721 HReg res = newVRegV(env);
2722 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2723 return res;
2724 }
2725
sewardj8d965312005-02-25 02:48:47 +00002726 if (e->tag == Iex_GetI) {
2727 AMD64AMode* am
2728 = genGuestArrayOffset(
2729 env, e->Iex.GetI.descr,
2730 e->Iex.GetI.ix, e->Iex.GetI.bias );
2731 HReg res = newVRegV(env);
2732 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2733 return res;
2734 }
2735
sewardja3e98302005-02-01 15:55:05 +00002736//.. if (e->tag == Iex_Binop) {
2737//.. X86FpOp fpop = Xfp_INVALID;
2738//.. switch (e->Iex.Binop.op) {
2739//.. case Iop_AddF64: fpop = Xfp_ADD; break;
2740//.. case Iop_SubF64: fpop = Xfp_SUB; break;
2741//.. case Iop_MulF64: fpop = Xfp_MUL; break;
2742//.. case Iop_DivF64: fpop = Xfp_DIV; break;
2743//.. case Iop_ScaleF64: fpop = Xfp_SCALE; break;
2744//.. case Iop_AtanF64: fpop = Xfp_ATAN; break;
2745//.. case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
2746//.. case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
2747//.. case Iop_PRemF64: fpop = Xfp_PREM; break;
2748//.. case Iop_PRem1F64: fpop = Xfp_PREM1; break;
2749//.. default: break;
2750//.. }
2751//.. if (fpop != Xfp_INVALID) {
2752//.. HReg res = newVRegF(env);
2753//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
2754//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
2755//.. addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
2756//.. if (fpop != Xfp_ADD && fpop != Xfp_SUB
2757//.. && fpop != Xfp_MUL && fpop != Xfp_DIV)
2758//.. roundToF64(env, res);
2759//.. return res;
2760//.. }
2761//.. }
2762//..
2763//.. if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) {
2764//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2765//.. HReg dst = newVRegF(env);
2766//..
2767//.. /* rf now holds the value to be rounded. The first thing to do
2768//.. is set the FPU's rounding mode accordingly. */
2769//..
2770//.. /* Set host rounding mode */
2771//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2772//..
2773//.. /* grndint %rf, %dst */
2774//.. addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2775//..
2776//.. /* Restore default FPU rounding. */
2777//.. set_FPU_rounding_default( env );
2778//..
2779//.. return dst;
2780//.. }
sewardj1a01e652005-02-23 11:39:21 +00002781
2782 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64toF64) {
2783 HReg dst = newVRegV(env);
2784 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2785 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2786 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2787 set_SSE_rounding_default( env );
2788 return dst;
2789 }
2790
2791 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32toF64) {
2792 HReg dst = newVRegV(env);
2793 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2794 set_SSE_rounding_default( env );
2795 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2796 return dst;
2797 }
2798
sewardj8d965312005-02-25 02:48:47 +00002799 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF64) {
2800 /* Sigh ... very rough code. Could do much better. */
2801 HReg r1 = newVRegI(env);
2802 HReg dst = newVRegV(env);
2803 HReg tv = newVRegV(env);
2804 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2805 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
2806 addInstr(env, mk_vMOVsd_RR(src,dst));
2807 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2808 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
2809 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
2810 addInstr(env, AMD64Instr_SseLdSt(True, 16, tv, rsp0));
2811 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tv, dst));
2812 add_to_rsp(env, 16);
2813 return dst;
2814 }
2815
sewardja3e98302005-02-01 15:55:05 +00002816//.. if (e->tag == Iex_Unop) {
2817//.. X86FpOp fpop = Xfp_INVALID;
2818//.. switch (e->Iex.Unop.op) {
2819//.. case Iop_NegF64: fpop = Xfp_NEG; break;
2820//.. case Iop_AbsF64: fpop = Xfp_ABS; break;
2821//.. case Iop_SqrtF64: fpop = Xfp_SQRT; break;
2822//.. case Iop_SinF64: fpop = Xfp_SIN; break;
2823//.. case Iop_CosF64: fpop = Xfp_COS; break;
2824//.. case Iop_TanF64: fpop = Xfp_TAN; break;
2825//.. case Iop_2xm1F64: fpop = Xfp_2XM1; break;
2826//.. default: break;
2827//.. }
2828//.. if (fpop != Xfp_INVALID) {
2829//.. HReg res = newVRegF(env);
2830//.. HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2831//.. addInstr(env, X86Instr_FpUnary(fpop,src,res));
2832//.. if (fpop != Xfp_SQRT
2833//.. && fpop != Xfp_NEG && fpop != Xfp_ABS)
2834//.. roundToF64(env, res);
2835//.. return res;
2836//.. }
2837//.. }
2838//..
2839//.. if (e->tag == Iex_Unop) {
2840//.. switch (e->Iex.Unop.op) {
2841//.. case Iop_I32toF64: {
2842//.. HReg dst = newVRegF(env);
2843//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
2844//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
2845//.. set_FPU_rounding_default(env);
2846//.. addInstr(env, X86Instr_FpLdStI(
2847//.. True/*load*/, 4, dst,
2848//.. X86AMode_IR(0, hregX86_ESP())));
2849//.. add_to_esp(env, 4);
2850//.. return dst;
2851//.. }
2852//.. case Iop_ReinterpI64asF64: {
2853//.. /* Given an I64, produce an IEEE754 double with the same
2854//.. bit pattern. */
2855//.. HReg dst = newVRegF(env);
2856//.. HReg rHi, rLo;
2857//.. iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
2858//.. /* paranoia */
2859//.. set_FPU_rounding_default(env);
2860//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
2861//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
2862//.. addInstr(env, X86Instr_FpLdSt(
2863//.. True/*load*/, 8, dst,
2864//.. X86AMode_IR(0, hregX86_ESP())));
2865//.. add_to_esp(env, 8);
2866//.. return dst;
2867//.. }
2868//.. case Iop_F32toF64: {
2869//.. /* this is a no-op */
2870//.. HReg res = iselFltExpr(env, e->Iex.Unop.arg);
2871//.. return res;
2872//.. }
2873//.. default:
2874//.. break;
2875//.. }
2876//.. }
sewardj8d965312005-02-25 02:48:47 +00002877
2878 /* --------- MULTIPLEX --------- */
2879 if (e->tag == Iex_Mux0X) {
2880 HReg r8, rX, r0, dst;
2881 vassert(ty == Ity_F64);
2882 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
2883 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2884 rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
2885 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
2886 dst = newVRegV(env);
2887 addInstr(env, mk_vMOVsd_RR(rX,dst));
2888 addInstr(env, AMD64Instr_Test64(AMD64RI_Imm(0xFF), AMD64RM_Reg(r8)));
2889 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
2890 return dst;
2891 }
sewardj18303862005-02-21 12:36:54 +00002892
2893 ppIRExpr(e);
2894 vpanic("iselDblExpr_wrk");
2895}
sewardjc2bcb6f2005-02-07 00:17:12 +00002896
sewardj0852a132005-02-21 08:28:46 +00002897
2898/*---------------------------------------------------------*/
2899/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
2900/*---------------------------------------------------------*/
2901
2902static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
2903{
2904 HReg r = iselVecExpr_wrk( env, e );
2905# if 0
2906 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2907# endif
2908 vassert(hregClass(r) == HRcVec128);
2909 vassert(hregIsVirtual(r));
2910 return r;
2911}
2912
2913
2914/* DO NOT CALL THIS DIRECTLY */
2915static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
2916{
sewardj9da16972005-02-21 13:58:26 +00002917 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00002918 AMD64SseOp op = Asse_INVALID;
2919 IRType ty = typeOfIRExpr(env->type_env,e);
2920 vassert(e);
2921 vassert(ty == Ity_V128);
2922
2923 if (e->tag == Iex_Tmp) {
2924 return lookupIRTemp(env, e->Iex.Tmp.tmp);
2925 }
2926
2927 if (e->tag == Iex_Get) {
2928 HReg dst = newVRegV(env);
2929 addInstr(env, AMD64Instr_SseLdSt(
2930 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00002931 16,
sewardj0852a132005-02-21 08:28:46 +00002932 dst,
2933 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
2934 )
2935 );
2936 return dst;
2937 }
2938
sewardj1a01e652005-02-23 11:39:21 +00002939 if (e->tag == Iex_LDle) {
2940 HReg dst = newVRegV(env);
2941 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2942 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
2943 return dst;
2944 }
2945
2946 if (e->tag == Iex_Const) {
2947 HReg dst = newVRegV(env);
2948 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj8d965312005-02-25 02:48:47 +00002949 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
sewardj1a01e652005-02-23 11:39:21 +00002950 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
2951 return dst;
sewardj8d965312005-02-25 02:48:47 +00002952 } else
2953 if (e->Iex.Const.con->Ico.V128 == 0x00FF) {
2954 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
2955 /* Both of these literals are sign-extended to 64 bits. */
2956 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2957 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0xFFFFFFFF)));
2958 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
2959 add_to_rsp(env, 16);
2960 return dst;
sewardj1a01e652005-02-23 11:39:21 +00002961 } else {
2962 goto vec_fail;
sewardj8d965312005-02-25 02:48:47 +00002963# if 0
2964 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
2965 return dst;
2966# endif
sewardj1a01e652005-02-23 11:39:21 +00002967 }
2968 }
sewardj0852a132005-02-21 08:28:46 +00002969
2970 if (e->tag == Iex_Unop) {
2971 switch (e->Iex.Unop.op) {
2972
sewardj8d965312005-02-25 02:48:47 +00002973 case Iop_NotV128: {
2974 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
2975 return do_sse_NotV128(env, arg);
2976 }
2977
sewardja3e98302005-02-01 15:55:05 +00002978//.. case Iop_CmpNEZ64x2: {
2979//.. /* We can use SSE2 instructions for this. */
2980//.. /* Ideally, we want to do a 64Ix2 comparison against zero of
2981//.. the operand. Problem is no such insn exists. Solution
2982//.. therefore is to do a 32Ix4 comparison instead, and bitwise-
2983//.. negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
2984//.. let the not'd result of this initial comparison be a:b:c:d.
2985//.. What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
2986//.. pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
2987//.. giving the required result.
2988//..
2989//.. The required selection sequence is 2,3,0,1, which
2990//.. according to Intel's documentation means the pshufd
2991//.. literal value is 0xB1, that is,
2992//.. (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
2993//.. */
2994//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
2995//.. HReg tmp = newVRegV(env);
2996//.. HReg dst = newVRegV(env);
2997//.. REQUIRE_SSE2;
2998//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
2999//.. addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3000//.. tmp = do_sse_Not128(env, tmp);
3001//.. addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3002//.. addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3003//.. return dst;
3004//.. }
3005//..
3006//.. case Iop_CmpNEZ32x4: {
3007//.. /* Sigh, we have to generate lousy code since this has to
3008//.. work on SSE1 hosts */
3009//.. /* basically, the idea is: for each lane:
3010//.. movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3011//.. sbbl %r, %r (now %r = 1Sto32(CF))
3012//.. movl %r, lane
3013//.. */
3014//.. Int i;
3015//.. X86AMode* am;
3016//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3017//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3018//.. HReg dst = newVRegV(env);
3019//.. HReg r32 = newVRegI(env);
3020//.. sub_from_esp(env, 16);
3021//.. addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3022//.. for (i = 0; i < 4; i++) {
3023//.. am = X86AMode_IR(i*4, hregX86_ESP());
3024//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3025//.. addInstr(env, X86Instr_Unary32(Xun_NEG, X86RM_Reg(r32)));
3026//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3027//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3028//.. }
3029//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3030//.. add_to_esp(env, 16);
3031//.. return dst;
3032//.. }
3033//..
3034//.. case Iop_CmpNEZ8x16:
3035//.. case Iop_CmpNEZ16x8: {
3036//.. /* We can use SSE2 instructions for this. */
3037//.. HReg arg;
3038//.. HReg vec0 = newVRegV(env);
3039//.. HReg vec1 = newVRegV(env);
3040//.. HReg dst = newVRegV(env);
3041//.. X86SseOp cmpOp
3042//.. = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3043//.. : Xsse_CMPEQ8;
3044//.. REQUIRE_SSE2;
3045//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3046//.. addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3047//.. addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3048//.. /* defer arg computation to here so as to give CMPEQF as long
3049//.. as possible to complete */
3050//.. arg = iselVecExpr(env, e->Iex.Unop.arg);
3051//.. /* vec0 is all 0s; vec1 is all 1s */
3052//.. addInstr(env, mk_vMOVsd_RR(arg, dst));
3053//.. /* 16x8 or 8x16 comparison == */
3054//.. addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3055//.. /* invert result */
3056//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3057//.. return dst;
3058//.. }
3059//..
3060//.. case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3061//.. case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3062//.. case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
3063//.. do_32Fx4_unary:
3064//.. {
3065//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3066//.. HReg dst = newVRegV(env);
3067//.. addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3068//.. return dst;
3069//.. }
3070//..
3071//.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3072//.. case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
3073//.. case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
3074//.. do_64Fx2_unary:
3075//.. {
3076//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3077//.. HReg dst = newVRegV(env);
3078//.. REQUIRE_SSE2;
3079//.. addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
3080//.. return dst;
3081//.. }
3082//..
3083//.. case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3084//.. case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3085//.. case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3086//.. do_32F0x4_unary:
3087//.. {
3088//.. /* A bit subtle. We have to copy the arg to the result
3089//.. register first, because actually doing the SSE scalar insn
3090//.. leaves the upper 3/4 of the destination register
3091//.. unchanged. Whereas the required semantics of these
3092//.. primops is that the upper 3/4 is simply copied in from the
3093//.. argument. */
3094//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3095//.. HReg dst = newVRegV(env);
3096//.. addInstr(env, mk_vMOVsd_RR(arg, dst));
3097//.. addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3098//.. return dst;
3099//.. }
3100//..
3101//.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3102//.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
sewardj0852a132005-02-21 08:28:46 +00003103 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3104 do_64F0x2_unary:
3105 {
3106 /* A bit subtle. We have to copy the arg to the result
3107 register first, because actually doing the SSE scalar insn
3108 leaves the upper half of the destination register
3109 unchanged. Whereas the required semantics of these
3110 primops is that the upper half is simply copied in from the
3111 argument. */
3112 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3113 HReg dst = newVRegV(env);
3114 addInstr(env, mk_vMOVsd_RR(arg, dst));
3115 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3116 return dst;
3117 }
3118
sewardj8d965312005-02-25 02:48:47 +00003119 case Iop_32UtoV128: {
3120 HReg dst = newVRegV(env);
3121 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3122 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3123 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3124 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3125 return dst;
3126 }
sewardj0852a132005-02-21 08:28:46 +00003127
3128 case Iop_64UtoV128: {
3129 HReg dst = newVRegV(env);
3130 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3131 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3132 addInstr(env, AMD64Instr_Push(rmi));
3133 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3134 add_to_rsp(env, 8);
3135 return dst;
3136 }
3137
3138 default:
3139 break;
3140 } /* switch (e->Iex.Unop.op) */
3141 } /* if (e->tag == Iex_Unop) */
3142
3143 if (e->tag == Iex_Binop) {
3144 switch (e->Iex.Binop.op) {
3145
sewardja3e98302005-02-01 15:55:05 +00003146//.. case Iop_Set128lo32: {
3147//.. HReg dst = newVRegV(env);
3148//.. HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3149//.. HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3150//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3151//.. sub_from_esp(env, 16);
3152//.. addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3153//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3154//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3155//.. add_to_esp(env, 16);
3156//.. return dst;
3157//.. }
sewardj18303862005-02-21 12:36:54 +00003158
3159 case Iop_SetV128lo64: {
3160 HReg dst = newVRegV(env);
3161 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3162 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3163 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3164 sub_from_rsp(env, 16);
3165 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp0));
3166 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp0));
3167 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp0));
3168 add_to_rsp(env, 16);
3169 return dst;
3170 }
3171
sewardj1a01e652005-02-23 11:39:21 +00003172 case Iop_64HLtoV128: {
3173 AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
3174 HReg dst = newVRegV(env);
3175 /* do this via the stack (easy, convenient, etc) */
3176 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
3177 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
3178 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
3179 add_to_rsp(env, 16);
3180 return dst;
3181 }
3182
sewardja3e98302005-02-01 15:55:05 +00003183//.. case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3184//.. case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3185//.. case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3186//.. case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
3187//.. case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
3188//.. case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3189//.. case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3190//.. case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
3191//.. case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
3192//.. do_32Fx4:
3193//.. {
3194//.. HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3195//.. HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3196//.. HReg dst = newVRegV(env);
3197//.. addInstr(env, mk_vMOVsd_RR(argL, dst));
3198//.. addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3199//.. return dst;
3200//.. }
3201//..
3202//.. case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3203//.. case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3204//.. case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3205//.. case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
3206//.. case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
3207//.. case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3208//.. case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3209//.. case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
3210//.. case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
3211//.. do_64Fx2:
3212//.. {
3213//.. HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3214//.. HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3215//.. HReg dst = newVRegV(env);
3216//.. REQUIRE_SSE2;
3217//.. addInstr(env, mk_vMOVsd_RR(argL, dst));
3218//.. addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3219//.. return dst;
3220//.. }
sewardj8d965312005-02-25 02:48:47 +00003221
sewardja3e98302005-02-01 15:55:05 +00003222//.. case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3223//.. case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3224//.. case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003225 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardja3e98302005-02-01 15:55:05 +00003226//.. case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3227//.. case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3228//.. case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003229 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3230 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3231 do_32F0x4: {
3232 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3233 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3234 HReg dst = newVRegV(env);
3235 addInstr(env, mk_vMOVsd_RR(argL, dst));
3236 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3237 return dst;
3238 }
3239
sewardja3e98302005-02-01 15:55:05 +00003240//.. case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003241 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardja3e98302005-02-01 15:55:05 +00003242//.. case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003243 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3244 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003245 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardja3e98302005-02-01 15:55:05 +00003246//.. case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003247 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3248 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3249 do_64F0x2: {
3250 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3251 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3252 HReg dst = newVRegV(env);
3253 addInstr(env, mk_vMOVsd_RR(argL, dst));
3254 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3255 return dst;
3256 }
3257
sewardja3e98302005-02-01 15:55:05 +00003258//.. case Iop_QNarrow32Sx4:
3259//.. op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3260//.. case Iop_QNarrow16Sx8:
3261//.. op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3262//.. case Iop_QNarrow16Ux8:
3263//.. op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3264//..
3265//.. case Iop_InterleaveHI8x16:
3266//.. op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3267//.. case Iop_InterleaveHI16x8:
3268//.. op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3269//.. case Iop_InterleaveHI32x4:
3270//.. op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3271//.. case Iop_InterleaveHI64x2:
3272//.. op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3273//..
3274//.. case Iop_InterleaveLO8x16:
3275//.. op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3276//.. case Iop_InterleaveLO16x8:
3277//.. op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3278//.. case Iop_InterleaveLO32x4:
3279//.. op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3280//.. case Iop_InterleaveLO64x2:
3281//.. op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3282//..
sewardj1a01e652005-02-23 11:39:21 +00003283 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003284 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003285 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardja3e98302005-02-01 15:55:05 +00003286//.. case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3287//.. case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3288//.. case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3289//.. case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3290//.. case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3291//.. case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3292//.. case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3293//.. case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3294//.. case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3295//.. case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3296//.. case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3297//.. case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3298//.. case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3299//.. case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3300//.. case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3301//.. case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3302//.. case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3303//.. case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3304//.. case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3305//.. case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3306//.. case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3307//.. case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3308//.. case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3309//.. case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3310//.. case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3311//.. case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3312//.. case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3313//.. case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3314//.. case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3315//.. case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3316//.. case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003317 do_SseReRg: {
3318 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3319 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3320 HReg dst = newVRegV(env);
3321 if (arg1isEReg) {
3322 goto vec_fail; /* awaiting test case */
3323 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3324 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3325 } else {
3326 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3327 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3328 }
3329 return dst;
3330 }
3331
sewardja3e98302005-02-01 15:55:05 +00003332//.. case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3333//.. case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3334//.. case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3335//.. case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3336//.. case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3337//.. case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3338//.. case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3339//.. case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3340//.. do_SseShift: {
3341//.. HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3342//.. X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3343//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3344//.. HReg ereg = newVRegV(env);
3345//.. HReg dst = newVRegV(env);
3346//.. REQUIRE_SSE2;
3347//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3348//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3349//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3350//.. addInstr(env, X86Instr_Push(rmi));
3351//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
sewardj0852a132005-02-21 08:28:46 +00003352//.. addInstr(env, mk_vMOVsd_RR(greg, dst));
sewardja3e98302005-02-01 15:55:05 +00003353//.. addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3354//.. add_to_esp(env, 16);
3355//.. return dst;
3356//.. }
sewardj0852a132005-02-21 08:28:46 +00003357
3358 default:
3359 break;
3360 } /* switch (e->Iex.Binop.op) */
3361 } /* if (e->tag == Iex_Binop) */
3362
sewardja3e98302005-02-01 15:55:05 +00003363//.. if (e->tag == Iex_Mux0X) {
3364//.. HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3365//.. HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3366//.. HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3367//.. HReg dst = newVRegV(env);
3368//.. addInstr(env, mk_vMOVsd_RR(rX,dst));
3369//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
3370//.. addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
3371//.. return dst;
3372//.. }
3373//..
sewardj9da16972005-02-21 13:58:26 +00003374 vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003375 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
3376 LibVEX_ppVexSubArch(env->subarch));
3377 ppIRExpr(e);
3378 vpanic("iselVecExpr_wrk");
3379}
sewardjc33671d2005-02-01 20:30:00 +00003380
3381
3382/*---------------------------------------------------------*/
3383/*--- ISEL: Statements ---*/
3384/*---------------------------------------------------------*/
3385
3386static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3387{
3388 if (vex_traceflags & VEX_TRACE_VCODE) {
3389 vex_printf("\n-- ");
3390 ppIRStmt(stmt);
3391 vex_printf("\n");
3392 }
3393
3394 switch (stmt->tag) {
3395
sewardj05b3b6a2005-02-04 01:44:33 +00003396 /* --------- STORE --------- */
3397 case Ist_STle: {
3398 AMD64AMode* am;
3399 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.STle.addr);
3400 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.STle.data);
3401 vassert(tya == Ity_I64);
3402 am = iselIntExpr_AMode(env, stmt->Ist.STle.addr);
sewardj31191072005-02-05 18:24:47 +00003403 if (tyd == Ity_I64) {
3404 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.STle.data);
3405 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
3406 return;
3407 }
sewardj05b3b6a2005-02-04 01:44:33 +00003408 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
3409 HReg r = iselIntExpr_R(env, stmt->Ist.STle.data);
3410 addInstr(env, AMD64Instr_Store(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4),
3411 r,am));
3412 return;
3413 }
sewardj8d965312005-02-25 02:48:47 +00003414 if (tyd == Ity_F64) {
3415 HReg r = iselDblExpr(env, stmt->Ist.STle.data);
3416 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
3417 return;
3418 }
sewardja3e98302005-02-01 15:55:05 +00003419//.. if (tyd == Ity_F32) {
3420//.. HReg r = iselFltExpr(env, stmt->Ist.STle.data);
3421//.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3422//.. return;
3423//.. }
3424//.. if (tyd == Ity_I64) {
3425//.. HReg vHi, vLo, rA;
3426//.. iselInt64Expr(&vHi, &vLo, env, stmt->Ist.STle.data);
3427//.. rA = iselIntExpr_R(env, stmt->Ist.STle.addr);
3428//.. addInstr(env, X86Instr_Alu32M(
3429//.. Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3430//.. addInstr(env, X86Instr_Alu32M(
3431//.. Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3432//.. return;
3433//.. }
sewardj0852a132005-02-21 08:28:46 +00003434 if (tyd == Ity_V128) {
3435 HReg r = iselVecExpr(env, stmt->Ist.STle.data);
sewardj18303862005-02-21 12:36:54 +00003436 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00003437 return;
3438 }
3439 break;
sewardj05b3b6a2005-02-04 01:44:33 +00003440 }
sewardjf67eadf2005-02-03 03:53:52 +00003441
3442 /* --------- PUT --------- */
3443 case Ist_Put: {
3444 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3445 if (ty == Ity_I64) {
3446 /* We're going to write to memory, so compute the RHS into an
3447 AMD64RI. */
3448 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3449 addInstr(env,
3450 AMD64Instr_Alu64M(
3451 Aalu_MOV,
3452 ri,
3453 AMD64AMode_IR(stmt->Ist.Put.offset,
3454 hregAMD64_RBP())
3455 ));
3456 return;
3457 }
sewardjf67eadf2005-02-03 03:53:52 +00003458 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
3459 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3460 addInstr(env, AMD64Instr_Store(
3461 ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4),
3462 r,
3463 AMD64AMode_IR(stmt->Ist.Put.offset,
3464 hregAMD64_RBP())));
3465 return;
3466 }
sewardj0852a132005-02-21 08:28:46 +00003467 if (ty == Ity_V128) {
3468 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3469 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
3470 hregAMD64_RBP());
sewardj18303862005-02-21 12:36:54 +00003471 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
sewardj0852a132005-02-21 08:28:46 +00003472 return;
3473 }
sewardj8d965312005-02-25 02:48:47 +00003474 if (ty == Ity_F32) {
3475 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3476 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
3477 set_SSE_rounding_default(env); /* paranoia */
3478 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
3479 return;
3480 }
sewardj1a01e652005-02-23 11:39:21 +00003481 if (ty == Ity_F64) {
3482 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3483 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
3484 hregAMD64_RBP() );
3485 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
3486 return;
3487 }
sewardjf67eadf2005-02-03 03:53:52 +00003488 break;
3489 }
3490
sewardj8d965312005-02-25 02:48:47 +00003491 /* --------- Indexed PUT --------- */
3492 case Ist_PutI: {
3493 AMD64AMode* am
3494 = genGuestArrayOffset(
3495 env, stmt->Ist.PutI.descr,
3496 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3497
3498 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3499 if (ty == Ity_F64) {
3500 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3501 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
3502 return;
3503 }
3504 if (ty == Ity_I8) {
3505 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3506 addInstr(env, AMD64Instr_Store( 1, r, am ));
3507 return;
3508 }
sewardja3e98302005-02-01 15:55:05 +00003509//.. if (ty == Ity_I64) {
3510//.. HReg rHi, rLo;
3511//.. X86AMode* am4 = advance4(am);
3512//.. iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data);
3513//.. addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
3514//.. addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
3515//.. return;
3516//.. }
sewardj8d965312005-02-25 02:48:47 +00003517 break;
3518 }
sewardj614b3fb2005-02-02 02:16:03 +00003519
3520 /* --------- TMP --------- */
3521 case Ist_Tmp: {
3522 IRTemp tmp = stmt->Ist.Tmp.tmp;
3523 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj9b967672005-02-08 11:13:09 +00003524 if (ty == Ity_I64 || ty == Ity_I32
3525 || ty == Ity_I16 || ty == Ity_I8) {
sewardj614b3fb2005-02-02 02:16:03 +00003526 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.Tmp.data);
3527 HReg dst = lookupIRTemp(env, tmp);
3528 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
3529 return;
3530 }
sewardj9b967672005-02-08 11:13:09 +00003531 if (ty == Ity_I128) {
3532 HReg rHi, rLo, dstHi, dstLo;
3533 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.Tmp.data);
3534 lookupIRTemp128( &dstHi, &dstLo, env, tmp);
3535 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3536 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3537 return;
3538 }
sewardja3e98302005-02-01 15:55:05 +00003539//.. if (ty == Ity_I1) {
3540//.. X86CondCode cond = iselCondCode(env, stmt->Ist.Tmp.data);
3541//.. HReg dst = lookupIRTemp(env, tmp);
3542//.. addInstr(env, X86Instr_Set32(cond, dst));
3543//.. return;
3544//.. }
sewardj18303862005-02-21 12:36:54 +00003545 if (ty == Ity_F64) {
3546 HReg dst = lookupIRTemp(env, tmp);
3547 HReg src = iselDblExpr(env, stmt->Ist.Tmp.data);
3548 addInstr(env, mk_vMOVsd_RR(src, dst));
3549 return;
3550 }
sewardja3e98302005-02-01 15:55:05 +00003551//.. if (ty == Ity_F32) {
3552//.. HReg dst = lookupIRTemp(env, tmp);
3553//.. HReg src = iselFltExpr(env, stmt->Ist.Tmp.data);
3554//.. addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3555//.. return;
3556//.. }
sewardj0852a132005-02-21 08:28:46 +00003557 if (ty == Ity_V128) {
3558 HReg dst = lookupIRTemp(env, tmp);
3559 HReg src = iselVecExpr(env, stmt->Ist.Tmp.data);
sewardj18303862005-02-21 12:36:54 +00003560 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00003561 return;
3562 }
sewardj614b3fb2005-02-02 02:16:03 +00003563 break;
3564 }
3565
sewardjd0a12df2005-02-10 02:07:43 +00003566 /* --------- Call to DIRTY helper --------- */
3567 case Ist_Dirty: {
3568 IRType retty;
3569 IRDirty* d = stmt->Ist.Dirty.details;
3570 Bool passBBP = False;
3571
3572 if (d->nFxState == 0)
3573 vassert(!d->needsBBP);
3574 passBBP = d->nFxState > 0 && d->needsBBP;
3575
3576 /* Marshal args, do the call, clear stack. */
3577 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3578
3579 /* Now figure out what to do with the returned value, if any. */
3580 if (d->tmp == IRTemp_INVALID)
3581 /* No return value. Nothing to do. */
3582 return;
3583
3584 retty = typeOfIRTemp(env->type_env, d->tmp);
3585 if (retty == Ity_I64) {
3586 /* The returned value is in %rax. Park it in the register
3587 associated with tmp. */
3588 HReg dst = lookupIRTemp(env, d->tmp);
3589 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
3590 return;
3591 }
3592 break;
3593 }
3594
3595 /* --------- MEM FENCE --------- */
3596 case Ist_MFence:
3597 addInstr(env, AMD64Instr_MFence());
3598 return;
sewardjf8c37f72005-02-07 18:55:29 +00003599
3600 /* --------- EXIT --------- */
3601 case Ist_Exit: {
3602 AMD64RI* dst;
3603 AMD64CondCode cc;
3604 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3605 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
3606 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
3607 cc = iselCondCode(env,stmt->Ist.Exit.guard);
3608 addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
3609 return;
3610 }
sewardjc33671d2005-02-01 20:30:00 +00003611
3612 default: break;
3613 }
3614 ppIRStmt(stmt);
3615 vpanic("iselStmt(amd64)");
3616}
3617
3618
3619/*---------------------------------------------------------*/
3620/*--- ISEL: Basic block terminators (Nexts) ---*/
3621/*---------------------------------------------------------*/
3622
3623static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
sewardjf67eadf2005-02-03 03:53:52 +00003624{
3625 AMD64RI* ri;
3626 if (vex_traceflags & VEX_TRACE_VCODE) {
3627 vex_printf("\n-- goto {");
3628 ppIRJumpKind(jk);
3629 vex_printf("} ");
3630 ppIRExpr(next);
3631 vex_printf("\n");
3632 }
3633 ri = iselIntExpr_RI(env, next);
3634 addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
sewardjc33671d2005-02-01 20:30:00 +00003635}
3636
3637
3638/*---------------------------------------------------------*/
3639/*--- Insn selector top-level ---*/
3640/*---------------------------------------------------------*/
3641
3642/* Translate an entire BB to amd64 code. */
3643
3644HInstrArray* iselBB_AMD64 ( IRBB* bb, VexSubArch subarch_host )
3645{
3646 Int i, j;
sewardj9b967672005-02-08 11:13:09 +00003647 HReg hreg, hregHI;
sewardjc33671d2005-02-01 20:30:00 +00003648
3649 /* sanity ... */
3650 vassert(subarch_host == VexSubArch_NONE);
3651
3652 /* Make up an initial environment to use. */
3653 ISelEnv* env = LibVEX_Alloc(sizeof(ISelEnv));
3654 env->vreg_ctr = 0;
3655
3656 /* Set up output code array. */
3657 env->code = newHInstrArray();
3658
3659 /* Copy BB's type env. */
3660 env->type_env = bb->tyenv;
3661
3662 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
3663 change as we go along. */
3664 env->n_vregmap = bb->tyenv->types_used;
3665 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardj9b967672005-02-08 11:13:09 +00003666 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00003667
3668 /* and finally ... */
3669 env->subarch = subarch_host;
3670
3671 /* For each IR temporary, allocate a suitably-kinded virtual
3672 register. */
3673 j = 0;
3674 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00003675 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00003676 switch (bb->tyenv->types[i]) {
3677 case Ity_I1:
3678 case Ity_I8:
3679 case Ity_I16:
3680 case Ity_I32:
sewardj9b967672005-02-08 11:13:09 +00003681 case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break;
3682 case Ity_I128: hreg = mkHReg(j++, HRcInt64, True);
3683 hregHI = mkHReg(j++, HRcInt64, True); break;
sewardjc33671d2005-02-01 20:30:00 +00003684 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00003685 case Ity_F64:
sewardj9b967672005-02-08 11:13:09 +00003686 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardjc33671d2005-02-01 20:30:00 +00003687 default: ppIRType(bb->tyenv->types[i]);
3688 vpanic("iselBB(amd64): IRTemp type");
3689 }
3690 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00003691 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00003692 }
3693 env->vreg_ctr = j;
3694
3695 /* Ok, finally we can iterate over the statements. */
3696 for (i = 0; i < bb->stmts_used; i++)
3697 if (bb->stmts[i])
3698 iselStmt(env,bb->stmts[i]);
3699
3700 iselNext(env,bb->next,bb->jumpkind);
3701
3702 /* record the number of vregs we used. */
3703 env->code->n_vregs = env->vreg_ctr;
3704 return env->code;
3705}
sewardja3e98302005-02-01 15:55:05 +00003706
3707
3708/*---------------------------------------------------------------*/
3709/*--- end host-amd64/isel.c ---*/
3710/*---------------------------------------------------------------*/