blob: 11cae8c1b877e34ebb7011dbab89de5da6b406db [file] [log] [blame]
sewardja3e98302005-02-01 15:55:05 +00001
2/*---------------------------------------------------------------*/
3/*--- ---*/
4/*--- This file (host-amd64/isel.c) is ---*/
sewardjdbcfae72005-08-02 11:14:04 +00005/*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*/
sewardja3e98302005-02-01 15:55:05 +00006/*--- ---*/
7/*---------------------------------------------------------------*/
8
9/*
10 This file is part of LibVEX, a library for dynamic binary
11 instrumentation and translation.
12
sewardja33e9a42006-06-05 23:13:19 +000013 Copyright (C) 2004-2006 OpenWorks LLP. All rights reserved.
sewardja3e98302005-02-01 15:55:05 +000014
sewardj7bd6ffe2005-08-03 16:07:36 +000015 This library is made available under a dual licensing scheme.
sewardja3e98302005-02-01 15:55:05 +000016
sewardj7bd6ffe2005-08-03 16:07:36 +000017 If you link LibVEX against other code all of which is itself
18 licensed under the GNU General Public License, version 2 dated June
19 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL
20 v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL
21 is missing, you can obtain a copy of the GPL v2 from the Free
22 Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA
23 02110-1301, USA.
24
25 For any other uses of LibVEX, you must first obtain a commercial
26 license from OpenWorks LLP. Please contact info@open-works.co.uk
27 for information about commercial licensing.
28
29 This software is provided by OpenWorks LLP "as is" and any express
30 or implied warranties, including, but not limited to, the implied
31 warranties of merchantability and fitness for a particular purpose
32 are disclaimed. In no event shall OpenWorks LLP be liable for any
33 direct, indirect, incidental, special, exemplary, or consequential
34 damages (including, but not limited to, procurement of substitute
35 goods or services; loss of use, data, or profits; or business
36 interruption) however caused and on any theory of liability,
37 whether in contract, strict liability, or tort (including
38 negligence or otherwise) arising in any way out of the use of this
39 software, even if advised of the possibility of such damage.
sewardja3e98302005-02-01 15:55:05 +000040
41 Neither the names of the U.S. Department of Energy nor the
42 University of California nor the names of its contributors may be
43 used to endorse or promote products derived from this software
44 without prior written permission.
sewardja3e98302005-02-01 15:55:05 +000045*/
46
47#include "libvex_basictypes.h"
48#include "libvex_ir.h"
49#include "libvex.h"
50
sewardj05b3b6a2005-02-04 01:44:33 +000051#include "ir/irmatch.h"
sewardjc33671d2005-02-01 20:30:00 +000052#include "main/vex_util.h"
53#include "main/vex_globals.h"
54#include "host-generic/h_generic_regs.h"
sewardj8711f662005-05-09 17:52:56 +000055#include "host-generic/h_generic_simd64.h"
sewardjc33671d2005-02-01 20:30:00 +000056#include "host-amd64/hdefs.h"
sewardj1a01e652005-02-23 11:39:21 +000057
58
59/*---------------------------------------------------------*/
60/*--- x87/SSE control word stuff ---*/
61/*---------------------------------------------------------*/
62
63/* Vex-generated code expects to run with the FPU set as follows: all
64 exceptions masked, round-to-nearest, precision = 53 bits. This
65 corresponds to a FPU control word value of 0x027F.
66
67 Similarly the SSE control word (%mxcsr) should be 0x1F80.
68
69 %fpucw and %mxcsr should have these values on entry to
70 Vex-generated code, and should those values should be
71 unchanged at exit.
72*/
73
74#define DEFAULT_FPUCW 0x027F
75
76#define DEFAULT_MXCSR 0x1F80
77
78/* debugging only, do not use */
79/* define DEFAULT_FPUCW 0x037F */
sewardj05b3b6a2005-02-04 01:44:33 +000080
81
82/*---------------------------------------------------------*/
83/*--- misc helpers ---*/
84/*---------------------------------------------------------*/
85
86/* These are duplicated in guest-amd64/toIR.c */
87static IRExpr* unop ( IROp op, IRExpr* a )
88{
89 return IRExpr_Unop(op, a);
90}
91
92static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
93{
94 return IRExpr_Binop(op, a1, a2);
95}
96
sewardj05b3b6a2005-02-04 01:44:33 +000097static IRExpr* bind ( Int binder )
98{
99 return IRExpr_Binder(binder);
100}
sewardjc33671d2005-02-01 20:30:00 +0000101
102
sewardjc33671d2005-02-01 20:30:00 +0000103/*---------------------------------------------------------*/
104/*--- ISelEnv ---*/
105/*---------------------------------------------------------*/
106
107/* This carries around:
108
109 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
110 might encounter. This is computed before insn selection starts,
111 and does not change.
112
113 - A mapping from IRTemp to HReg. This tells the insn selector
114 which virtual register is associated with each IRTemp
115 temporary. This is computed before insn selection starts, and
116 does not change. We expect this mapping to map precisely the
117 same set of IRTemps as the type mapping does.
118
sewardj9b967672005-02-08 11:13:09 +0000119 - vregmap holds the primary register for the IRTemp.
120 - vregmapHI is only used for 128-bit integer-typed
121 IRTemps. It holds the identity of a second
122 64-bit virtual HReg, which holds the high half
123 of the value.
124
sewardjc33671d2005-02-01 20:30:00 +0000125 - The code array, that is, the insns selected so far.
126
127 - A counter, for generating new virtual registers.
128
129 - The host subarchitecture we are selecting insns for.
130 This is set at the start and does not change.
131
132 Note, this is all host-independent. (JRS 20050201: well, kinda
133 ... not completely. Compare with ISelEnv for X86.)
134*/
135
136typedef
137 struct {
138 IRTypeEnv* type_env;
139
140 HReg* vregmap;
sewardj9b967672005-02-08 11:13:09 +0000141 HReg* vregmapHI;
sewardjc33671d2005-02-01 20:30:00 +0000142 Int n_vregmap;
143
144 HInstrArray* code;
145
146 Int vreg_ctr;
147
sewardj5117ce12006-01-27 21:20:15 +0000148 /* Currently (27 Jan 06) unused */
149 UInt hwcaps;
sewardjc33671d2005-02-01 20:30:00 +0000150 }
151 ISelEnv;
152
153
154static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
155{
156 vassert(tmp >= 0);
157 vassert(tmp < env->n_vregmap);
158 return env->vregmap[tmp];
159}
160
sewardj9b967672005-02-08 11:13:09 +0000161static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
162 ISelEnv* env, IRTemp tmp )
163{
164 vassert(tmp >= 0);
165 vassert(tmp < env->n_vregmap);
166 vassert(env->vregmapHI[tmp] != INVALID_HREG);
167 *vrLO = env->vregmap[tmp];
168 *vrHI = env->vregmapHI[tmp];
169}
sewardj614b3fb2005-02-02 02:16:03 +0000170
171static void addInstr ( ISelEnv* env, AMD64Instr* instr )
172{
173 addHInstr(env->code, instr);
174 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjf355f6b2006-08-16 00:23:21 +0000175 ppAMD64Instr(instr, True);
sewardj614b3fb2005-02-02 02:16:03 +0000176 vex_printf("\n");
177 }
178}
179
sewardj8258a8c2005-02-02 03:11:24 +0000180static HReg newVRegI ( ISelEnv* env )
181{
182 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
183 env->vreg_ctr++;
184 return reg;
185}
186
sewardja3e98302005-02-01 15:55:05 +0000187//.. static HReg newVRegF ( ISelEnv* env )
188//.. {
189//.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
190//.. env->vreg_ctr++;
191//.. return reg;
192//.. }
sewardj0852a132005-02-21 08:28:46 +0000193
194static HReg newVRegV ( ISelEnv* env )
195{
196 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
197 env->vreg_ctr++;
198 return reg;
199}
sewardj614b3fb2005-02-02 02:16:03 +0000200
201
202/*---------------------------------------------------------*/
203/*--- ISEL: Forward declarations ---*/
204/*---------------------------------------------------------*/
205
206/* These are organised as iselXXX and iselXXX_wrk pairs. The
207 iselXXX_wrk do the real work, but are not to be called directly.
208 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
209 checks that all returned registers are virtual. You should not
210 call the _wrk version directly.
211*/
212static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
213static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
214
215static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
216static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
217
218static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
219static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
220
221static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
222static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
223
224static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
225static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
226
sewardj9b967672005-02-08 11:13:09 +0000227static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
228 ISelEnv* env, IRExpr* e );
229static void iselInt128Expr ( HReg* rHi, HReg* rLo,
230 ISelEnv* env, IRExpr* e );
231
sewardj614b3fb2005-02-02 02:16:03 +0000232static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
233static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
234
sewardj18303862005-02-21 12:36:54 +0000235static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
236static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000237
sewardj8d965312005-02-25 02:48:47 +0000238static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
239static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000240
sewardj0852a132005-02-21 08:28:46 +0000241static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
242static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
sewardj614b3fb2005-02-02 02:16:03 +0000243
244
245/*---------------------------------------------------------*/
246/*--- ISEL: Misc helpers ---*/
247/*---------------------------------------------------------*/
248
249static Bool sane_AMode ( AMD64AMode* am )
250{
251 switch (am->tag) {
252 case Aam_IR:
sewardj428fabd2005-03-21 03:11:17 +0000253 return
254 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
255 && (hregIsVirtual(am->Aam.IR.reg)
256 || am->Aam.IR.reg == hregAMD64_RBP()) );
sewardj614b3fb2005-02-02 02:16:03 +0000257 case Aam_IRRS:
sewardj428fabd2005-03-21 03:11:17 +0000258 return
259 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
260 && hregIsVirtual(am->Aam.IRRS.base)
261 && hregClass(am->Aam.IRRS.index) == HRcInt64
262 && hregIsVirtual(am->Aam.IRRS.index) );
sewardj614b3fb2005-02-02 02:16:03 +0000263 default:
264 vpanic("sane_AMode: unknown amd64 amode tag");
265 }
266}
267
268
269/* Can the lower 32 bits be signedly widened to produce the whole
270 64-bit value? In other words, are the top 33 bits either all 0 or
271 all 1 ? */
272static Bool fitsIn32Bits ( ULong x )
273{
274 Long y0 = (Long)x;
275 Long y1 = y0;
276 y1 <<= 32;
277 y1 >>=/*s*/ 32;
278 return toBool(x == y1);
279}
280
sewardja3e98302005-02-01 15:55:05 +0000281//.. /* Is this a 32-bit zero expression? */
282//..
283//.. static Bool isZero32 ( IRExpr* e )
284//.. {
285//.. return e->tag == Iex_Const
286//.. && e->Iex.Const.con->tag == Ico_U32
287//.. && e->Iex.Const.con->Ico.U32 == 0;
288//.. }
sewardj8258a8c2005-02-02 03:11:24 +0000289
290/* Make a int reg-reg move. */
291
292static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
293{
294 vassert(hregClass(src) == HRcInt64);
295 vassert(hregClass(dst) == HRcInt64);
296 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
297}
298
sewardj0852a132005-02-21 08:28:46 +0000299/* Make a vector reg-reg move. */
sewardj8258a8c2005-02-02 03:11:24 +0000300
sewardj0852a132005-02-21 08:28:46 +0000301static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
302{
303 vassert(hregClass(src) == HRcVec128);
304 vassert(hregClass(dst) == HRcVec128);
305 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
306}
307
308/* Advance/retreat %rsp by n. */
309
310static void add_to_rsp ( ISelEnv* env, Int n )
311{
312 vassert(n > 0 && n < 256 && (n%8) == 0);
313 addInstr(env,
314 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
315 hregAMD64_RSP()));
316}
317
sewardj18303862005-02-21 12:36:54 +0000318static void sub_from_rsp ( ISelEnv* env, Int n )
319{
320 vassert(n > 0 && n < 256 && (n%8) == 0);
321 addInstr(env,
322 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
323 hregAMD64_RSP()));
324}
325
326
sewardja3e98302005-02-01 15:55:05 +0000327//.. /* Given an amode, return one which references 4 bytes further
328//.. along. */
329//..
330//.. static X86AMode* advance4 ( X86AMode* am )
331//.. {
332//.. X86AMode* am4 = dopyX86AMode(am);
333//.. switch (am4->tag) {
334//.. case Xam_IRRS:
335//.. am4->Xam.IRRS.imm += 4; break;
336//.. case Xam_IR:
337//.. am4->Xam.IR.imm += 4; break;
338//.. default:
339//.. vpanic("advance4(x86,host)");
340//.. }
341//.. return am4;
342//.. }
343//..
344//..
345//.. /* Push an arg onto the host stack, in preparation for a call to a
346//.. helper function of some kind. Returns the number of 32-bit words
347//.. pushed. */
348//..
349//.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
350//.. {
351//.. IRType arg_ty = typeOfIRExpr(env->type_env, arg);
352//.. if (arg_ty == Ity_I32) {
353//.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
354//.. return 1;
355//.. } else
356//.. if (arg_ty == Ity_I64) {
357//.. HReg rHi, rLo;
358//.. iselInt64Expr(&rHi, &rLo, env, arg);
359//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
360//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
361//.. return 2;
362//.. }
363//.. ppIRExpr(arg);
364//.. vpanic("pushArg(x86): can't handle arg of this type");
365//.. }
sewardj05b3b6a2005-02-04 01:44:33 +0000366
367
368/* Used only in doHelperCall. See big comment in doHelperCall re
369 handling of register-parameter args. This function figures out
370 whether evaluation of an expression might require use of a fixed
371 register. If in doubt return True (safe but suboptimal).
372*/
373static
374Bool mightRequireFixedRegs ( IRExpr* e )
375{
376 switch (e->tag) {
sewardjdd40fdf2006-12-24 02:20:24 +0000377 case Iex_RdTmp: case Iex_Const: case Iex_Get:
sewardj05b3b6a2005-02-04 01:44:33 +0000378 return False;
379 default:
380 return True;
381 }
382}
383
384
385/* Do a complete function call. guard is a Ity_Bit expression
386 indicating whether or not the call happens. If guard==NULL, the
387 call is unconditional. */
388
389static
390void doHelperCall ( ISelEnv* env,
391 Bool passBBP,
392 IRExpr* guard, IRCallee* cee, IRExpr** args )
393{
394 AMD64CondCode cc;
395 HReg argregs[6];
396 HReg tmpregs[6];
397 Bool go_fast;
398 Int n_args, i, argreg;
399
400 /* Marshal args for a call and do the call.
401
402 If passBBP is True, %rbp (the baseblock pointer) is to be passed
403 as the first arg.
404
405 This function only deals with a tiny set of possibilities, which
406 cover all helpers in practice. The restrictions are that only
407 arguments in registers are supported, hence only 6x64 integer
408 bits in total can be passed. In fact the only supported arg
409 type is I64.
410
411 Generating code which is both efficient and correct when
412 parameters are to be passed in registers is difficult, for the
413 reasons elaborated in detail in comments attached to
414 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
415 of the method described in those comments.
416
417 The problem is split into two cases: the fast scheme and the
418 slow scheme. In the fast scheme, arguments are computed
419 directly into the target (real) registers. This is only safe
420 when we can be sure that computation of each argument will not
421 trash any real registers set by computation of any other
422 argument.
423
424 In the slow scheme, all args are first computed into vregs, and
425 once they are all done, they are moved to the relevant real
426 regs. This always gives correct code, but it also gives a bunch
427 of vreg-to-rreg moves which are usually redundant but are hard
428 for the register allocator to get rid of.
429
430 To decide which scheme to use, all argument expressions are
431 first examined. If they are all so simple that it is clear they
432 will be evaluated without use of any fixed registers, use the
433 fast scheme, else use the slow scheme. Note also that only
434 unconditional calls may use the fast scheme, since having to
435 compute a condition expression could itself trash real
436 registers.
437
438 Note this requires being able to examine an expression and
439 determine whether or not evaluation of it might use a fixed
440 register. That requires knowledge of how the rest of this insn
441 selector works. Currently just the following 3 are regarded as
442 safe -- hopefully they cover the majority of arguments in
443 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
444 */
445
446 /* Note that the cee->regparms field is meaningless on AMD64 host
447 (since there is only one calling convention) and so we always
448 ignore it. */
449
450 n_args = 0;
451 for (i = 0; args[i]; i++)
452 n_args++;
453
454 if (6 < n_args + (passBBP ? 1 : 0))
455 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
456
457 argregs[0] = hregAMD64_RDI();
458 argregs[1] = hregAMD64_RSI();
459 argregs[2] = hregAMD64_RDX();
460 argregs[3] = hregAMD64_RCX();
461 argregs[4] = hregAMD64_R8();
462 argregs[5] = hregAMD64_R9();
463
464 tmpregs[0] = tmpregs[1] = tmpregs[2] =
465 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
466
467 /* First decide which scheme (slow or fast) is to be used. First
468 assume the fast scheme, and select slow if any contraindications
469 (wow) appear. */
470
471 go_fast = True;
472
473 if (guard) {
474 if (guard->tag == Iex_Const
475 && guard->Iex.Const.con->tag == Ico_U1
476 && guard->Iex.Const.con->Ico.U1 == True) {
477 /* unconditional */
478 } else {
479 /* Not manifestly unconditional -- be conservative. */
480 go_fast = False;
481 }
482 }
483
484 if (go_fast) {
485 for (i = 0; i < n_args; i++) {
486 if (mightRequireFixedRegs(args[i])) {
487 go_fast = False;
488 break;
489 }
490 }
491 }
492
493 /* At this point the scheme to use has been established. Generate
494 code to get the arg values into the argument rregs. */
495
496 if (go_fast) {
497
498 /* FAST SCHEME */
499 argreg = 0;
500 if (passBBP) {
501 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]));
502 argreg++;
503 }
504
505 for (i = 0; i < n_args; i++) {
506 vassert(argreg < 6);
507 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
508 addInstr(env, AMD64Instr_Alu64R(
509 Aalu_MOV,
510 iselIntExpr_RMI(env, args[i]),
511 argregs[argreg]
512 )
513 );
514 argreg++;
515 }
516
517 /* Fast scheme only applies for unconditional calls. Hence: */
518 cc = Acc_ALWAYS;
519
520 } else {
521
522 /* SLOW SCHEME; move via temporaries */
523 argreg = 0;
524
525 if (passBBP) {
526 /* This is pretty stupid; better to move directly to rdi
527 after the rest of the args are done. */
528 tmpregs[argreg] = newVRegI(env);
529 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
530 argreg++;
531 }
532
533 for (i = 0; i < n_args; i++) {
534 vassert(argreg < 6);
535 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
536 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
537 argreg++;
538 }
539
540 /* Now we can compute the condition. We can't do it earlier
541 because the argument computations could trash the condition
542 codes. Be a bit clever to handle the common case where the
543 guard is 1:Bit. */
544 cc = Acc_ALWAYS;
545 if (guard) {
546 if (guard->tag == Iex_Const
547 && guard->Iex.Const.con->tag == Ico_U1
548 && guard->Iex.Const.con->Ico.U1 == True) {
549 /* unconditional -- do nothing */
550 } else {
551 cc = iselCondCode( env, guard );
552 }
553 }
554
555 /* Move the args to their final destinations. */
556 for (i = 0; i < argreg; i++) {
557 /* None of these insns, including any spill code that might
558 be generated, may alter the condition codes. */
559 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
560 }
561
562 }
563
564 /* Finally, the call itself. */
565 addInstr(env, AMD64Instr_Call(
566 cc,
sewardjf3992bd2005-02-07 00:20:43 +0000567 Ptr_to_ULong(cee->addr),
sewardj05b3b6a2005-02-04 01:44:33 +0000568 n_args + (passBBP ? 1 : 0)
569 )
570 );
571}
572
573
sewardj8d965312005-02-25 02:48:47 +0000574/* Given a guest-state array descriptor, an index expression and a
575 bias, generate an AMD64AMode holding the relevant guest state
576 offset. */
577
578static
sewardjdd40fdf2006-12-24 02:20:24 +0000579AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
sewardj8d965312005-02-25 02:48:47 +0000580 IRExpr* off, Int bias )
581{
582 HReg tmp, roff;
583 Int elemSz = sizeofIRType(descr->elemTy);
584 Int nElems = descr->nElems;
585
586 /* Throw out any cases not generated by an amd64 front end. In
587 theory there might be a day where we need to handle them -- if
588 we ever run non-amd64-guest on amd64 host. */
589
590 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
591 vpanic("genGuestArrayOffset(amd64 host)");
592
593 /* Compute off into a reg, %off. Then return:
594
595 movq %off, %tmp
596 addq $bias, %tmp (if bias != 0)
597 andq %tmp, 7
598 ... base(%rbp, %tmp, shift) ...
599 */
600 tmp = newVRegI(env);
601 roff = iselIntExpr_R(env, off);
602 addInstr(env, mk_iMOVsd_RR(roff, tmp));
603 if (bias != 0) {
604 /* Make sure the bias is sane, in the sense that there are
605 no significant bits above bit 30 in it. */
606 vassert(-10000 < bias && bias < 10000);
607 addInstr(env,
608 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
609 }
610 addInstr(env,
611 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
612 vassert(elemSz == 1 || elemSz == 8);
613 return
614 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
615 elemSz==8 ? 3 : 0);
616}
617
sewardj1a01e652005-02-23 11:39:21 +0000618
619/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
620static
621void set_SSE_rounding_default ( ISelEnv* env )
622{
623 /* pushq $DEFAULT_MXCSR
624 ldmxcsr 0(%rsp)
625 addq $8, %rsp
626 */
627 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
628 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
629 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
630 add_to_rsp(env, 8);
631}
632
sewardj25a85812005-05-08 23:03:48 +0000633/* Mess with the FPU's rounding mode: set to the default rounding mode
634 (DEFAULT_FPUCW). */
635static
636void set_FPU_rounding_default ( ISelEnv* env )
637{
638 /* movq $DEFAULT_FPUCW, -8(%rsp)
639 fldcw -8(%esp)
640 */
641 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
642 addInstr(env, AMD64Instr_Alu64M(
643 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
644 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
645}
sewardj1a01e652005-02-23 11:39:21 +0000646
647
648/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
649 expression denoting a value in the range 0 .. 3, indicating a round
650 mode encoded as per type IRRoundingMode. Set the SSE machinery to
651 have the same rounding.
652*/
653static
654void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
655{
656 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
657 both rounding bits == 0. If that wasn't the case, we couldn't
658 create a new rounding field simply by ORing the new value into
659 place. */
660
661 /* movq $3, %reg
662 andq [[mode]], %reg -- shouldn't be needed; paranoia
663 shlq $13, %reg
664 orq $DEFAULT_MXCSR, %reg
665 pushq %reg
666 ldmxcsr 0(%esp)
667 addq $8, %rsp
668 */
669 HReg reg = newVRegI(env);
670 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
671 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
672 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
673 iselIntExpr_RMI(env, mode), reg));
sewardj501a3392005-05-11 15:37:50 +0000674 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
sewardj1a01e652005-02-23 11:39:21 +0000675 addInstr(env, AMD64Instr_Alu64R(
676 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
677 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
678 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
679 add_to_rsp(env, 8);
680}
681
682
sewardj25a85812005-05-08 23:03:48 +0000683/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
684 expression denoting a value in the range 0 .. 3, indicating a round
685 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
686 the same rounding.
687*/
688static
689void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
690{
691 HReg rrm = iselIntExpr_R(env, mode);
692 HReg rrm2 = newVRegI(env);
693 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
694
695 /* movq %rrm, %rrm2
696 andq $3, %rrm2 -- shouldn't be needed; paranoia
697 shlq $10, %rrm2
698 orq $DEFAULT_FPUCW, %rrm2
699 movq %rrm2, -8(%rsp)
700 fldcw -8(%esp)
701 */
702 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
703 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
sewardj501a3392005-05-11 15:37:50 +0000704 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
sewardj25a85812005-05-08 23:03:48 +0000705 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
706 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
707 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
708 AMD64RI_Reg(rrm2), m8_rsp));
709 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
710}
sewardj8d965312005-02-25 02:48:47 +0000711
712
sewardjac530442005-05-11 16:13:37 +0000713/* Generate all-zeroes into a new vector register.
714*/
715static HReg generate_zeroes_V128 ( ISelEnv* env )
716{
717 HReg dst = newVRegV(env);
718 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
719 return dst;
720}
721
722/* Generate all-ones into a new vector register.
723*/
724static HReg generate_ones_V128 ( ISelEnv* env )
725{
726 HReg dst = newVRegV(env);
727 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
728 return dst;
729}
730
731
sewardj09717342005-05-05 21:34:02 +0000732/* Generate !src into a new vector register. Amazing that there isn't
733 a less crappy way to do this.
sewardj8d965312005-02-25 02:48:47 +0000734*/
735static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
736{
sewardjac530442005-05-11 16:13:37 +0000737 HReg dst = generate_ones_V128(env);
sewardj8d965312005-02-25 02:48:47 +0000738 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
739 return dst;
740}
741
742
sewardja3e98302005-02-01 15:55:05 +0000743//.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
744//.. after most non-simple FPU operations (simple = +, -, *, / and
745//.. sqrt).
746//..
747//.. This could be done a lot more efficiently if needed, by loading
748//.. zero and adding it to the value to be rounded (fldz ; faddp?).
749//.. */
750//.. static void roundToF64 ( ISelEnv* env, HReg reg )
751//.. {
752//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
753//.. sub_from_esp(env, 8);
754//.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
755//.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
756//.. add_to_esp(env, 8);
757//.. }
sewardj8258a8c2005-02-02 03:11:24 +0000758
759
760/*---------------------------------------------------------*/
761/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
762/*---------------------------------------------------------*/
763
764/* Select insns for an integer-typed expression, and add them to the
765 code list. Return a reg holding the result. This reg will be a
766 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
767 want to modify it, ask for a new vreg, copy it in there, and modify
768 the copy. The register allocator will do its best to map both
769 vregs to the same real register, so the copies will often disappear
770 later in the game.
771
772 This should handle expressions of 64, 32, 16 and 8-bit type. All
773 results are returned in a 64-bit register. For 32-, 16- and 8-bit
774 expressions, the upper 32/16/24 bits are arbitrary, so you should
775 mask or sign extend partial values if necessary.
776*/
777
778static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
779{
780 HReg r = iselIntExpr_R_wrk(env, e);
781 /* sanity checks ... */
782# if 0
783 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
784# endif
785 vassert(hregClass(r) == HRcInt64);
786 vassert(hregIsVirtual(r));
787 return r;
788}
789
790/* DO NOT CALL THIS DIRECTLY ! */
791static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
792{
sewardje7905662005-05-09 18:15:21 +0000793 /* Used for unary/binary SIMD64 ops. */
794 HWord fn = 0;
sewardj8711f662005-05-09 17:52:56 +0000795 Bool second_is_UInt;
sewardje7905662005-05-09 18:15:21 +0000796
sewardj05b3b6a2005-02-04 01:44:33 +0000797 MatchInfo mi;
sewardj7f039c42005-02-04 21:13:55 +0000798 DECLARE_PATTERN(p_8Uto64);
sewardj176ad2f2005-04-27 11:55:08 +0000799 DECLARE_PATTERN(p_1Uto8_64to1);
sewardj8258a8c2005-02-02 03:11:24 +0000800
801 IRType ty = typeOfIRExpr(env->type_env,e);
802 vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
803
804 switch (e->tag) {
805
806 /* --------- TEMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +0000807 case Iex_RdTmp: {
808 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj8258a8c2005-02-02 03:11:24 +0000809 }
810
811 /* --------- LOAD --------- */
sewardjaf1ceca2005-06-30 23:31:27 +0000812 case Iex_Load: {
sewardj8258a8c2005-02-02 03:11:24 +0000813 HReg dst = newVRegI(env);
sewardjaf1ceca2005-06-30 23:31:27 +0000814 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
815
816 if (e->Iex.Load.end != Iend_LE)
817 goto irreducible;
818
sewardjf67eadf2005-02-03 03:53:52 +0000819 if (ty == Ity_I64) {
820 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
821 AMD64RMI_Mem(amode), dst) );
822 return dst;
823 }
sewardj8258a8c2005-02-02 03:11:24 +0000824 if (ty == Ity_I32) {
825 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
826 return dst;
827 }
sewardj05b3b6a2005-02-04 01:44:33 +0000828 if (ty == Ity_I16) {
829 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
830 return dst;
831 }
sewardj7f039c42005-02-04 21:13:55 +0000832 if (ty == Ity_I8) {
833 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
834 return dst;
835 }
sewardj8258a8c2005-02-02 03:11:24 +0000836 break;
837 }
838
839 /* --------- BINARY OP --------- */
840 case Iex_Binop: {
841 AMD64AluOp aluOp;
842 AMD64ShiftOp shOp;
sewardj8711f662005-05-09 17:52:56 +0000843
sewardja3e98302005-02-01 15:55:05 +0000844//..
845//.. /* Pattern: Sub32(0,x) */
846//.. if (e->Iex.Binop.op == Iop_Sub32 && isZero32(e->Iex.Binop.arg1)) {
847//.. HReg dst = newVRegI(env);
848//.. HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
849//.. addInstr(env, mk_iMOVsd_RR(reg,dst));
850//.. addInstr(env, X86Instr_Unary32(Xun_NEG,X86RM_Reg(dst)));
851//.. return dst;
852//.. }
853//..
sewardj8258a8c2005-02-02 03:11:24 +0000854 /* Is it an addition or logical style op? */
855 switch (e->Iex.Binop.op) {
856 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
857 aluOp = Aalu_ADD; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000858 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
859 aluOp = Aalu_SUB; break;
860 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
861 aluOp = Aalu_AND; break;
sewardje1698952005-02-08 15:02:39 +0000862 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
sewardj31191072005-02-05 18:24:47 +0000863 aluOp = Aalu_OR; break;
sewardje1698952005-02-08 15:02:39 +0000864 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
865 aluOp = Aalu_XOR; break;
sewardj85520e42005-02-19 15:22:38 +0000866 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
sewardjd0a12df2005-02-10 02:07:43 +0000867 aluOp = Aalu_MUL; break;
sewardj8258a8c2005-02-02 03:11:24 +0000868 default:
869 aluOp = Aalu_INVALID; break;
870 }
871 /* For commutative ops we assume any literal
872 values are on the second operand. */
873 if (aluOp != Aalu_INVALID) {
874 HReg dst = newVRegI(env);
875 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
876 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
877 addInstr(env, mk_iMOVsd_RR(reg,dst));
878 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
879 return dst;
880 }
881
882 /* Perhaps a shift op? */
883 switch (e->Iex.Binop.op) {
884 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
885 shOp = Ash_SHL; break;
sewardj9b967672005-02-08 11:13:09 +0000886 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
887 shOp = Ash_SHR; break;
sewardj05b3b6a2005-02-04 01:44:33 +0000888 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
889 shOp = Ash_SAR; break;
sewardj8258a8c2005-02-02 03:11:24 +0000890 default:
891 shOp = Ash_INVALID; break;
892 }
893 if (shOp != Ash_INVALID) {
894 HReg dst = newVRegI(env);
895
896 /* regL = the value to be shifted */
897 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
898 addInstr(env, mk_iMOVsd_RR(regL,dst));
899
900 /* Do any necessary widening for 32/16/8 bit operands */
901 switch (e->Iex.Binop.op) {
sewardj05b3b6a2005-02-04 01:44:33 +0000902 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
903 break;
sewardj85520e42005-02-19 15:22:38 +0000904 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
sewardjb095fba2005-02-13 14:13:04 +0000905 break;
sewardj85520e42005-02-19 15:22:38 +0000906 case Iop_Shr8:
907 addInstr(env, AMD64Instr_Alu64R(
908 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
909 break;
910 case Iop_Shr16:
911 addInstr(env, AMD64Instr_Alu64R(
912 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
913 break;
sewardjb095fba2005-02-13 14:13:04 +0000914 case Iop_Shr32:
sewardj909c06d2005-02-19 22:47:41 +0000915 addInstr(env, AMD64Instr_MovZLQ(dst,dst));
sewardjb095fba2005-02-13 14:13:04 +0000916 break;
sewardje83d9b22005-08-13 23:58:34 +0000917 case Iop_Sar8:
918 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
919 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
920 break;
921 case Iop_Sar16:
922 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
923 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
924 break;
sewardj05b3b6a2005-02-04 01:44:33 +0000925 case Iop_Sar32:
sewardj501a3392005-05-11 15:37:50 +0000926 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, dst));
927 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 32, dst));
sewardj05b3b6a2005-02-04 01:44:33 +0000928 break;
929 default:
sewardj909c06d2005-02-19 22:47:41 +0000930 ppIROp(e->Iex.Binop.op);
sewardj05b3b6a2005-02-04 01:44:33 +0000931 vassert(0);
sewardj8258a8c2005-02-02 03:11:24 +0000932 }
933
934 /* Now consider the shift amount. If it's a literal, we
935 can do a much better job than the general case. */
936 if (e->Iex.Binop.arg2->tag == Iex_Const) {
937 /* assert that the IR is well-typed */
938 Int nshift;
939 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
940 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
941 vassert(nshift >= 0);
942 if (nshift > 0)
943 /* Can't allow nshift==0 since that means %cl */
sewardj501a3392005-05-11 15:37:50 +0000944 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
sewardj8258a8c2005-02-02 03:11:24 +0000945 } else {
946 /* General case; we have to force the amount into %cl. */
947 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
948 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
sewardj501a3392005-05-11 15:37:50 +0000949 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
sewardj8258a8c2005-02-02 03:11:24 +0000950 }
951 return dst;
952 }
953
sewardj8711f662005-05-09 17:52:56 +0000954 /* Deal with 64-bit SIMD binary ops */
955 second_is_UInt = False;
956 switch (e->Iex.Binop.op) {
957 case Iop_Add8x8:
958 fn = (HWord)h_generic_calc_Add8x8; break;
959 case Iop_Add16x4:
960 fn = (HWord)h_generic_calc_Add16x4; break;
961 case Iop_Add32x2:
962 fn = (HWord)h_generic_calc_Add32x2; break;
sewardja7ba8c42005-05-10 20:08:34 +0000963
964 case Iop_Avg8Ux8:
965 fn = (HWord)h_generic_calc_Avg8Ux8; break;
966 case Iop_Avg16Ux4:
967 fn = (HWord)h_generic_calc_Avg16Ux4; break;
sewardj8711f662005-05-09 17:52:56 +0000968
969 case Iop_CmpEQ8x8:
970 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
971 case Iop_CmpEQ16x4:
972 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
973 case Iop_CmpEQ32x2:
974 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
975
976 case Iop_CmpGT8Sx8:
977 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
978 case Iop_CmpGT16Sx4:
979 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
980 case Iop_CmpGT32Sx2:
981 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
982
983 case Iop_InterleaveHI8x8:
984 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
985 case Iop_InterleaveLO8x8:
986 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
987 case Iop_InterleaveHI16x4:
988 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
989 case Iop_InterleaveLO16x4:
990 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
991 case Iop_InterleaveHI32x2:
992 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
993 case Iop_InterleaveLO32x2:
994 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
995
sewardja7ba8c42005-05-10 20:08:34 +0000996 case Iop_Max8Ux8:
997 fn = (HWord)h_generic_calc_Max8Ux8; break;
998 case Iop_Max16Sx4:
999 fn = (HWord)h_generic_calc_Max16Sx4; break;
1000 case Iop_Min8Ux8:
1001 fn = (HWord)h_generic_calc_Min8Ux8; break;
1002 case Iop_Min16Sx4:
1003 fn = (HWord)h_generic_calc_Min16Sx4; break;
sewardj8711f662005-05-09 17:52:56 +00001004
1005 case Iop_Mul16x4:
1006 fn = (HWord)h_generic_calc_Mul16x4; break;
1007 case Iop_MulHi16Sx4:
1008 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
sewardja7ba8c42005-05-10 20:08:34 +00001009 case Iop_MulHi16Ux4:
1010 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1011
sewardj8711f662005-05-09 17:52:56 +00001012 case Iop_QAdd8Sx8:
1013 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1014 case Iop_QAdd16Sx4:
1015 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1016 case Iop_QAdd8Ux8:
1017 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1018 case Iop_QAdd16Ux4:
1019 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1020
1021 case Iop_QNarrow32Sx2:
1022 fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
1023 case Iop_QNarrow16Sx4:
1024 fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
1025 case Iop_QNarrow16Ux4:
1026 fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
1027
1028 case Iop_QSub8Sx8:
1029 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1030 case Iop_QSub16Sx4:
1031 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1032 case Iop_QSub8Ux8:
1033 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1034 case Iop_QSub16Ux4:
1035 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1036
1037 case Iop_Sub8x8:
1038 fn = (HWord)h_generic_calc_Sub8x8; break;
1039 case Iop_Sub16x4:
1040 fn = (HWord)h_generic_calc_Sub16x4; break;
1041 case Iop_Sub32x2:
1042 fn = (HWord)h_generic_calc_Sub32x2; break;
1043
1044 case Iop_ShlN32x2:
1045 fn = (HWord)h_generic_calc_ShlN32x2;
1046 second_is_UInt = True;
1047 break;
1048 case Iop_ShlN16x4:
1049 fn = (HWord)h_generic_calc_ShlN16x4;
1050 second_is_UInt = True;
1051 break;
1052 case Iop_ShrN32x2:
1053 fn = (HWord)h_generic_calc_ShrN32x2;
1054 second_is_UInt = True;
1055 break;
1056 case Iop_ShrN16x4:
1057 fn = (HWord)h_generic_calc_ShrN16x4;
1058 second_is_UInt = True;
1059 break;
1060 case Iop_SarN32x2:
1061 fn = (HWord)h_generic_calc_SarN32x2;
1062 second_is_UInt = True;
1063 break;
1064 case Iop_SarN16x4:
1065 fn = (HWord)h_generic_calc_SarN16x4;
1066 second_is_UInt = True;
1067 break;
1068
1069 default:
1070 fn = (HWord)0; break;
1071 }
1072 if (fn != (HWord)0) {
1073 /* Note: the following assumes all helpers are of signature
1074 ULong fn ( ULong, ULong ), and they are
1075 not marked as regparm functions.
1076 */
1077 HReg dst = newVRegI(env);
1078 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1079 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1080 if (second_is_UInt)
1081 addInstr(env, AMD64Instr_MovZLQ(argR, argR));
1082 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1083 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
1084 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
1085 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1086 return dst;
1087 }
1088
sewardj7de0d3c2005-02-13 02:26:41 +00001089 /* Handle misc other ops. */
1090
1091 if (e->Iex.Binop.op == Iop_DivModS64to32
1092 || e->Iex.Binop.op == Iop_DivModU64to32) {
1093 /* 64 x 32 -> (32(rem),32(div)) division */
1094 /* Get the 64-bit operand into edx:eax, and the other into
1095 any old R/M. */
1096 HReg rax = hregAMD64_RAX();
1097 HReg rdx = hregAMD64_RDX();
1098 HReg dst = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00001099 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
sewardj7de0d3c2005-02-13 02:26:41 +00001100 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
sewardj7de0d3c2005-02-13 02:26:41 +00001101 /* Compute the left operand into a reg, and then
1102 put the top half in edx and the bottom in eax. */
1103 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardj7de0d3c2005-02-13 02:26:41 +00001104 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1105 addInstr(env, mk_iMOVsd_RR(left64, rax));
sewardj501a3392005-05-11 15:37:50 +00001106 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001107 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
sewardj909c06d2005-02-19 22:47:41 +00001108 addInstr(env, AMD64Instr_MovZLQ(rdx,rdx));
1109 addInstr(env, AMD64Instr_MovZLQ(rax,rax));
sewardj501a3392005-05-11 15:37:50 +00001110 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
sewardj7de0d3c2005-02-13 02:26:41 +00001111 addInstr(env, mk_iMOVsd_RR(rax, dst));
1112 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1113 return dst;
1114 }
1115
1116 if (e->Iex.Binop.op == Iop_32HLto64) {
1117 HReg hi32 = newVRegI(env);
1118 HReg lo32 = newVRegI(env);
1119 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1120 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1121 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1122 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
sewardj501a3392005-05-11 15:37:50 +00001123 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
sewardj909c06d2005-02-19 22:47:41 +00001124 addInstr(env, AMD64Instr_MovZLQ(lo32,lo32));
sewardj7de0d3c2005-02-13 02:26:41 +00001125 addInstr(env, AMD64Instr_Alu64R(
1126 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1127 return hi32;
1128 }
1129
sewardj85520e42005-02-19 15:22:38 +00001130 if (e->Iex.Binop.op == Iop_16HLto32) {
1131 HReg hi16 = newVRegI(env);
1132 HReg lo16 = newVRegI(env);
1133 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1134 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1135 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1136 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
sewardj501a3392005-05-11 15:37:50 +00001137 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
sewardj85520e42005-02-19 15:22:38 +00001138 addInstr(env, AMD64Instr_Alu64R(
1139 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1140 addInstr(env, AMD64Instr_Alu64R(
1141 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1142 return hi16;
1143 }
sewardj7de0d3c2005-02-13 02:26:41 +00001144
sewardja64f8ad2005-04-24 00:26:37 +00001145 if (e->Iex.Binop.op == Iop_8HLto16) {
1146 HReg hi8 = newVRegI(env);
1147 HReg lo8 = newVRegI(env);
1148 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1149 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1150 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1151 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
sewardj501a3392005-05-11 15:37:50 +00001152 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
sewardja64f8ad2005-04-24 00:26:37 +00001153 addInstr(env, AMD64Instr_Alu64R(
1154 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1155 addInstr(env, AMD64Instr_Alu64R(
1156 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1157 return hi8;
1158 }
sewardj85520e42005-02-19 15:22:38 +00001159
1160 if (e->Iex.Binop.op == Iop_MullS32
1161 || e->Iex.Binop.op == Iop_MullS16
1162 || e->Iex.Binop.op == Iop_MullS8
1163 || e->Iex.Binop.op == Iop_MullU32
1164 || e->Iex.Binop.op == Iop_MullU16
1165 || e->Iex.Binop.op == Iop_MullU8) {
1166 HReg a32 = newVRegI(env);
1167 HReg b32 = newVRegI(env);
1168 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1169 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1170 Int shift = 0;
1171 AMD64ShiftOp shr_op = Ash_SHR;
1172 switch (e->Iex.Binop.op) {
1173 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1174 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1175 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1176 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1177 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1178 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1179 default: vassert(0);
1180 }
1181
1182 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1183 addInstr(env, mk_iMOVsd_RR(b32s, b32));
sewardj501a3392005-05-11 15:37:50 +00001184 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1185 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1186 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1187 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
sewardj85520e42005-02-19 15:22:38 +00001188 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1189 return b32;
1190 }
1191
sewardj18303862005-02-21 12:36:54 +00001192 if (e->Iex.Binop.op == Iop_CmpF64) {
1193 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1194 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1195 HReg dst = newVRegI(env);
1196 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1197 /* Mask out irrelevant parts of the result so as to conform
1198 to the CmpF64 definition. */
1199 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1200 return dst;
1201 }
1202
sewardj37d52572005-02-25 14:22:12 +00001203 if (e->Iex.Binop.op == Iop_F64toI32
1204 || e->Iex.Binop.op == Iop_F64toI64) {
1205 Int szD = e->Iex.Binop.op==Iop_F64toI32 ? 4 : 8;
sewardj1a01e652005-02-23 11:39:21 +00001206 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1207 HReg dst = newVRegI(env);
1208 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
sewardj37d52572005-02-25 14:22:12 +00001209 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
sewardj1a01e652005-02-23 11:39:21 +00001210 set_SSE_rounding_default(env);
1211 return dst;
1212 }
1213
sewardja3e98302005-02-01 15:55:05 +00001214//.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
1215//.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
1216//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1217//.. HReg dst = newVRegI(env);
1218//..
1219//.. /* Used several times ... */
1220//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1221//..
sewardj7de0d3c2005-02-13 02:26:41 +00001222//.. /* rf now holds the value to be converted, and rrm holds the
sewardja3e98302005-02-01 15:55:05 +00001223//.. rounding mode value, encoded as per the IRRoundingMode
1224//.. enum. The first thing to do is set the FPU's rounding
1225//.. mode accordingly. */
1226//..
1227//.. /* Create a space for the format conversion. */
1228//.. /* subl $4, %esp */
1229//.. sub_from_esp(env, 4);
1230//..
1231//.. /* Set host rounding mode */
1232//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1233//..
1234//.. /* gistw/l %rf, 0(%esp) */
1235//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
1236//..
1237//.. if (sz == 2) {
1238//.. /* movzwl 0(%esp), %dst */
1239//.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1240//.. } else {
1241//.. /* movl 0(%esp), %dst */
1242//.. vassert(sz == 4);
1243//.. addInstr(env, X86Instr_Alu32R(
1244//.. Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1245//.. }
1246//..
1247//.. /* Restore default FPU rounding. */
1248//.. set_FPU_rounding_default( env );
1249//..
1250//.. /* addl $4, %esp */
1251//.. add_to_esp(env, 4);
1252//.. return dst;
1253//.. }
1254//..
1255//.. /* C3210 flags following FPU partial remainder (fprem), both
1256//.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1257//.. if (e->Iex.Binop.op == Iop_PRemC3210F64
1258//.. || e->Iex.Binop.op == Iop_PRem1C3210F64) {
1259//.. HReg junk = newVRegF(env);
1260//.. HReg dst = newVRegI(env);
1261//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1262//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1263//.. addInstr(env, X86Instr_FpBinary(
1264//.. e->Iex.Binop.op==Iop_PRemC3210F64
1265//.. ? Xfp_PREM : Xfp_PREM1,
1266//.. srcL,srcR,junk
1267//.. ));
1268//.. /* The previous pseudo-insn will have left the FPU's C3210
1269//.. flags set correctly. So bag them. */
1270//.. addInstr(env, X86Instr_FpStSW_AX());
1271//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1272//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1273//.. return dst;
1274//.. }
sewardj8258a8c2005-02-02 03:11:24 +00001275
1276 break;
1277 }
1278
sewardjf67eadf2005-02-03 03:53:52 +00001279 /* --------- UNARY OP --------- */
1280 case Iex_Unop: {
sewardj176ad2f2005-04-27 11:55:08 +00001281 /* 32Uto64(8Uto32(expr8)) */
sewardj7f039c42005-02-04 21:13:55 +00001282 DEFINE_PATTERN(p_8Uto64,
1283 unop(Iop_32Uto64, unop(Iop_8Uto32, bind(0)) ) );
1284 if (matchIRExpr(&mi,p_8Uto64,e)) {
1285 IRExpr* expr8 = mi.bindee[0];
1286 HReg dst = newVRegI(env);
1287 HReg src = iselIntExpr_R(env, expr8);
1288 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001289 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
1290 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 56, dst));
sewardj7f039c42005-02-04 21:13:55 +00001291 return dst;
1292 }
1293
sewardj176ad2f2005-04-27 11:55:08 +00001294 /* 1Uto8(64to1(expr64)) */
1295 DEFINE_PATTERN( p_1Uto8_64to1,
1296 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1297 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
sewardj05b3b6a2005-02-04 01:44:33 +00001298 IRExpr* expr64 = mi.bindee[0];
1299 HReg dst = newVRegI(env);
1300 HReg src = iselIntExpr_R(env, expr64);
1301 addInstr(env, mk_iMOVsd_RR(src,dst) );
1302 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1303 AMD64RMI_Imm(1), dst));
1304 return dst;
1305 }
1306
sewardja3e98302005-02-01 15:55:05 +00001307//.. /* 16Uto32(LDle(expr32)) */
1308//.. {
1309//.. DECLARE_PATTERN(p_LDle16_then_16Uto32);
1310//.. DEFINE_PATTERN(p_LDle16_then_16Uto32,
1311//.. unop(Iop_16Uto32,IRExpr_LDle(Ity_I16,bind(0))) );
1312//.. if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1313//.. HReg dst = newVRegI(env);
1314//.. X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1315//.. addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1316//.. return dst;
1317//.. }
1318//.. }
sewardjf67eadf2005-02-03 03:53:52 +00001319
1320 switch (e->Iex.Unop.op) {
1321 case Iop_32Uto64: {
1322 HReg dst = newVRegI(env);
1323 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1324 addInstr(env, AMD64Instr_MovZLQ(src,dst) );
1325 return dst;
1326 }
sewardj05b3b6a2005-02-04 01:44:33 +00001327 case Iop_32Sto64: {
1328 HReg dst = newVRegI(env);
1329 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1330 UInt amt = 32;
1331 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001332 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1333 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj05b3b6a2005-02-04 01:44:33 +00001334 return dst;
1335 }
sewardj9b967672005-02-08 11:13:09 +00001336 case Iop_128HIto64: {
1337 HReg rHi, rLo;
1338 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1339 return rHi; /* and abandon rLo */
1340 }
1341 case Iop_128to64: {
1342 HReg rHi, rLo;
1343 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1344 return rLo; /* and abandon rHi */
1345 }
sewardj85520e42005-02-19 15:22:38 +00001346 case Iop_8Uto16:
sewardjec93f982005-06-21 13:51:18 +00001347 case Iop_8Uto32:
sewardj176ad2f2005-04-27 11:55:08 +00001348 case Iop_8Uto64:
1349 case Iop_16Uto64:
sewardj85520e42005-02-19 15:22:38 +00001350 case Iop_16Uto32: {
sewardj176ad2f2005-04-27 11:55:08 +00001351 HReg dst = newVRegI(env);
1352 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001353 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1354 || e->Iex.Unop.op==Iop_16Uto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001355 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
sewardj7de0d3c2005-02-13 02:26:41 +00001356 addInstr(env, mk_iMOVsd_RR(src,dst) );
1357 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1358 AMD64RMI_Imm(mask), dst));
1359 return dst;
1360 }
sewardj85520e42005-02-19 15:22:38 +00001361 case Iop_8Sto16:
sewardj176ad2f2005-04-27 11:55:08 +00001362 case Iop_8Sto64:
sewardj7de0d3c2005-02-13 02:26:41 +00001363 case Iop_8Sto32:
sewardj176ad2f2005-04-27 11:55:08 +00001364 case Iop_16Sto32:
1365 case Iop_16Sto64: {
1366 HReg dst = newVRegI(env);
1367 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj65b17c62005-05-02 15:52:44 +00001368 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1369 || e->Iex.Unop.op==Iop_16Sto64 );
sewardj176ad2f2005-04-27 11:55:08 +00001370 UInt amt = srcIs16 ? 48 : 56;
sewardj486074e2005-02-08 20:10:04 +00001371 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001372 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1373 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
sewardj486074e2005-02-08 20:10:04 +00001374 return dst;
1375 }
sewardj85520e42005-02-19 15:22:38 +00001376 case Iop_Not8:
1377 case Iop_Not16:
sewardj7de0d3c2005-02-13 02:26:41 +00001378 case Iop_Not32:
sewardjd0a12df2005-02-10 02:07:43 +00001379 case Iop_Not64: {
1380 HReg dst = newVRegI(env);
1381 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1382 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001383 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
sewardjd0a12df2005-02-10 02:07:43 +00001384 return dst;
1385 }
sewardja3e98302005-02-01 15:55:05 +00001386//.. case Iop_64HIto32: {
1387//.. HReg rHi, rLo;
1388//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1389//.. return rHi; /* and abandon rLo .. poor wee thing :-) */
1390//.. }
1391//.. case Iop_64to32: {
1392//.. HReg rHi, rLo;
1393//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1394//.. return rLo; /* similar stupid comment to the above ... */
1395//.. }
1396//.. case Iop_16HIto8:
sewardj85520e42005-02-19 15:22:38 +00001397 case Iop_32HIto16:
sewardj7de0d3c2005-02-13 02:26:41 +00001398 case Iop_64HIto32: {
1399 HReg dst = newVRegI(env);
1400 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1401 Int shift = 0;
1402 switch (e->Iex.Unop.op) {
sewardj85520e42005-02-19 15:22:38 +00001403 case Iop_32HIto16: shift = 16; break;
sewardj7de0d3c2005-02-13 02:26:41 +00001404 case Iop_64HIto32: shift = 32; break;
1405 default: vassert(0);
1406 }
1407 addInstr(env, mk_iMOVsd_RR(src,dst) );
sewardj501a3392005-05-11 15:37:50 +00001408 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
sewardj7de0d3c2005-02-13 02:26:41 +00001409 return dst;
1410 }
sewardj176ad2f2005-04-27 11:55:08 +00001411 case Iop_1Uto64:
sewardj0af46ab2005-04-26 01:52:29 +00001412 case Iop_1Uto32:
sewardjf53b7352005-04-06 20:01:56 +00001413 case Iop_1Uto8: {
1414 HReg dst = newVRegI(env);
1415 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1416 addInstr(env, AMD64Instr_Set64(cond,dst));
1417 return dst;
1418 }
sewardja64f8ad2005-04-24 00:26:37 +00001419 case Iop_1Sto8:
sewardj478fe702005-04-23 01:15:47 +00001420 case Iop_1Sto16:
1421 case Iop_1Sto32:
sewardj42322b52005-04-20 22:57:11 +00001422 case Iop_1Sto64: {
1423 /* could do better than this, but for now ... */
1424 HReg dst = newVRegI(env);
1425 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1426 addInstr(env, AMD64Instr_Set64(cond,dst));
sewardj501a3392005-05-11 15:37:50 +00001427 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1428 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
sewardj42322b52005-04-20 22:57:11 +00001429 return dst;
1430 }
sewardjf53b7352005-04-06 20:01:56 +00001431 case Iop_Ctz64: {
1432 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1433 HReg dst = newVRegI(env);
1434 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1435 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1436 return dst;
1437 }
sewardj537cab02005-04-07 02:03:52 +00001438 case Iop_Clz64: {
1439 /* Count leading zeroes. Do 'bsrq' to establish the index
1440 of the highest set bit, and subtract that value from
1441 63. */
1442 HReg tmp = newVRegI(env);
1443 HReg dst = newVRegI(env);
1444 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1445 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1446 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1447 AMD64RMI_Imm(63), dst));
1448 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1449 AMD64RMI_Reg(tmp), dst));
1450 return dst;
1451 }
sewardj176ad2f2005-04-27 11:55:08 +00001452 case Iop_Neg8:
1453 case Iop_Neg16:
1454 case Iop_Neg32:
1455 case Iop_Neg64: {
1456 HReg dst = newVRegI(env);
1457 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
1458 addInstr(env, mk_iMOVsd_RR(reg,dst));
sewardj501a3392005-05-11 15:37:50 +00001459 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
sewardj176ad2f2005-04-27 11:55:08 +00001460 return dst;
1461 }
sewardj537cab02005-04-07 02:03:52 +00001462
sewardj478fe702005-04-23 01:15:47 +00001463 case Iop_V128to32: {
1464 HReg dst = newVRegI(env);
1465 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1466 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1467 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1468 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1469 return dst;
1470 }
sewardj1a01e652005-02-23 11:39:21 +00001471
1472 /* V128{HI}to64 */
1473 case Iop_V128HIto64:
1474 case Iop_V128to64: {
1475 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
1476 HReg dst = newVRegI(env);
1477 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1478 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
1479 AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
1480 sub_from_rsp(env, 16);
1481 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
1482 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
sewardj25a85812005-05-08 23:03:48 +00001483 AMD64RMI_Mem(rspN), dst ));
sewardj1a01e652005-02-23 11:39:21 +00001484 add_to_rsp(env, 16);
1485 return dst;
1486 }
1487
sewardj924215b2005-03-26 21:50:31 +00001488 /* ReinterpF64asI64(e) */
1489 /* Given an IEEE754 double, produce an I64 with the same bit
1490 pattern. */
1491 case Iop_ReinterpF64asI64: {
1492 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1493 HReg dst = newVRegI(env);
1494 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1495 /* paranoia */
1496 set_SSE_rounding_default(env);
1497 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1498 addInstr(env, AMD64Instr_Alu64R(
1499 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1500 return dst;
1501 }
1502
sewardj85520e42005-02-19 15:22:38 +00001503 case Iop_16to8:
sewardja6b93d12005-02-17 09:28:28 +00001504 case Iop_32to8:
sewardj176ad2f2005-04-27 11:55:08 +00001505 case Iop_64to8:
sewardj7de0d3c2005-02-13 02:26:41 +00001506 case Iop_32to16:
sewardj176ad2f2005-04-27 11:55:08 +00001507 case Iop_64to16:
sewardj486074e2005-02-08 20:10:04 +00001508 case Iop_64to32:
1509 /* These are no-ops. */
1510 return iselIntExpr_R(env, e->Iex.Unop.arg);
sewardjf67eadf2005-02-03 03:53:52 +00001511
1512 default:
1513 break;
1514 }
sewardje7905662005-05-09 18:15:21 +00001515
1516 /* Deal with unary 64-bit SIMD ops. */
1517 switch (e->Iex.Unop.op) {
1518 case Iop_CmpNEZ32x2:
1519 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1520 case Iop_CmpNEZ16x4:
1521 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1522 case Iop_CmpNEZ8x8:
1523 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1524 default:
1525 fn = (HWord)0; break;
1526 }
1527 if (fn != (HWord)0) {
1528 /* Note: the following assumes all helpers are of
1529 signature
1530 ULong fn ( ULong ), and they are
1531 not marked as regparm functions.
1532 */
1533 HReg dst = newVRegI(env);
1534 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1535 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
1536 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 ));
1537 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1538 return dst;
1539 }
1540
sewardjf67eadf2005-02-03 03:53:52 +00001541 break;
1542 }
sewardj8258a8c2005-02-02 03:11:24 +00001543
1544 /* --------- GET --------- */
1545 case Iex_Get: {
1546 if (ty == Ity_I64) {
1547 HReg dst = newVRegI(env);
1548 addInstr(env, AMD64Instr_Alu64R(
1549 Aalu_MOV,
1550 AMD64RMI_Mem(
1551 AMD64AMode_IR(e->Iex.Get.offset,
1552 hregAMD64_RBP())),
1553 dst));
1554 return dst;
1555 }
1556 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1557 HReg dst = newVRegI(env);
1558 addInstr(env, AMD64Instr_LoadEX(
sewardj1e499352005-03-23 03:02:50 +00001559 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardj8258a8c2005-02-02 03:11:24 +00001560 False,
1561 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1562 dst));
1563 return dst;
1564 }
1565 break;
1566 }
1567
sewardj8d965312005-02-25 02:48:47 +00001568 case Iex_GetI: {
1569 AMD64AMode* am
1570 = genGuestArrayOffset(
1571 env, e->Iex.GetI.descr,
1572 e->Iex.GetI.ix, e->Iex.GetI.bias );
1573 HReg dst = newVRegI(env);
1574 if (ty == Ity_I8) {
1575 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1576 return dst;
1577 }
sewardj1e015d82005-04-23 23:41:46 +00001578 if (ty == Ity_I64) {
1579 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1580 return dst;
1581 }
sewardj8d965312005-02-25 02:48:47 +00001582 break;
1583 }
sewardj05b3b6a2005-02-04 01:44:33 +00001584
1585 /* --------- CCALL --------- */
1586 case Iex_CCall: {
1587 HReg dst = newVRegI(env);
sewardj7f039c42005-02-04 21:13:55 +00001588 vassert(ty == e->Iex.CCall.retty);
sewardj05b3b6a2005-02-04 01:44:33 +00001589
1590 /* be very restrictive for now. Only 64-bit ints allowed
sewardje8aaa872005-07-07 13:12:04 +00001591 for args, and 64 or 32 bits for return type. */
1592 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
sewardj05b3b6a2005-02-04 01:44:33 +00001593 goto irreducible;
1594
sewardj7f039c42005-02-04 21:13:55 +00001595 /* Marshal args, do the call. */
sewardj05b3b6a2005-02-04 01:44:33 +00001596 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1597
sewardje8aaa872005-07-07 13:12:04 +00001598 /* Move to dst, and zero out the top 32 bits if the result type is
1599 Ity_I32. Probably overkill, but still .. */
1600 if (e->Iex.CCall.retty == Ity_I64)
1601 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1602 else
1603 addInstr(env, AMD64Instr_MovZLQ(hregAMD64_RAX(), dst));
1604
sewardj05b3b6a2005-02-04 01:44:33 +00001605 return dst;
1606 }
1607
sewardj7f039c42005-02-04 21:13:55 +00001608 /* --------- LITERAL --------- */
1609 /* 64/32/16/8-bit literals */
1610 case Iex_Const:
1611 if (ty == Ity_I64) {
1612 HReg r = newVRegI(env);
1613 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1614 return r;
1615 } else {
1616 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1617 HReg r = newVRegI(env);
1618 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1619 return r;
1620 }
sewardj05b3b6a2005-02-04 01:44:33 +00001621
1622 /* --------- MULTIPLEX --------- */
1623 case Iex_Mux0X: {
1624 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1625 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1626 HReg r8;
1627 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1628 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1629 HReg dst = newVRegI(env);
1630 addInstr(env, mk_iMOVsd_RR(rX,dst));
1631 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
sewardj501a3392005-05-11 15:37:50 +00001632 addInstr(env, AMD64Instr_Test64(0xFF, r8));
sewardj05b3b6a2005-02-04 01:44:33 +00001633 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
1634 return dst;
1635 }
1636 break;
1637 }
sewardj8258a8c2005-02-02 03:11:24 +00001638
sewardjf4c803b2006-09-11 11:07:34 +00001639 /* --------- TERNARY OP --------- */
1640 case Iex_Triop: {
1641 /* C3210 flags following FPU partial remainder (fprem), both
1642 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1643 if (e->Iex.Triop.op == Iop_PRemC3210F64) {
1644 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1645 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
1646 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
1647 HReg dst = newVRegI(env);
1648 addInstr(env, AMD64Instr_A87Free(2));
1649
1650 /* one arg -> top of x87 stack */
1651 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
1652 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
1653
1654 /* other arg -> top of x87 stack */
1655 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
1656 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
1657
1658 switch (e->Iex.Triop.op) {
1659 case Iop_PRemC3210F64:
1660 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1661 break;
1662 default:
1663 vassert(0);
1664 }
1665 /* Ignore the result, and instead make off with the FPU's
1666 C3210 flags (in the status word). */
1667 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1668 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1669 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1670 return dst;
1671 }
1672 break;
1673 }
1674
sewardj8258a8c2005-02-02 03:11:24 +00001675 default:
1676 break;
1677 } /* switch (e->tag) */
1678
1679 /* We get here if no pattern matched. */
1680 irreducible:
1681 ppIRExpr(e);
1682 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1683}
sewardj614b3fb2005-02-02 02:16:03 +00001684
1685
1686/*---------------------------------------------------------*/
1687/*--- ISEL: Integer expression auxiliaries ---*/
1688/*---------------------------------------------------------*/
1689
1690/* --------------------- AMODEs --------------------- */
1691
1692/* Return an AMode which computes the value of the specified
1693 expression, possibly also adding insns to the code list as a
1694 result. The expression may only be a 32-bit one.
1695*/
1696
sewardj8258a8c2005-02-02 03:11:24 +00001697static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1698{
1699 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1700 vassert(sane_AMode(am));
1701 return am;
1702}
1703
1704/* DO NOT CALL THIS DIRECTLY ! */
1705static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1706{
sewardj05b3b6a2005-02-04 01:44:33 +00001707 MatchInfo mi;
1708 DECLARE_PATTERN(p_complex);
sewardj8258a8c2005-02-02 03:11:24 +00001709 IRType ty = typeOfIRExpr(env->type_env,e);
1710 vassert(ty == Ity_I64);
1711
sewardj05b3b6a2005-02-04 01:44:33 +00001712 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1713 /* bind0 bind1 bind2 bind3 */
1714 DEFINE_PATTERN(p_complex,
1715 binop( Iop_Add64,
1716 binop( Iop_Add64,
1717 bind(0),
1718 binop(Iop_Shl64, bind(1), bind(2))
1719 ),
1720 bind(3)
1721 )
1722 );
1723 if (matchIRExpr(&mi, p_complex, e)) {
1724 IRExpr* expr1 = mi.bindee[0];
1725 IRExpr* expr2 = mi.bindee[1];
1726 IRExpr* imm8 = mi.bindee[2];
1727 IRExpr* simm32 = mi.bindee[3];
1728 if (imm8->tag == Iex_Const
1729 && imm8->Iex.Const.con->tag == Ico_U8
1730 && imm8->Iex.Const.con->Ico.U8 < 4
1731 /* imm8 is OK, now check simm32 */
1732 && simm32->tag == Iex_Const
1733 && simm32->Iex.Const.con->tag == Ico_U64
1734 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1735 UInt shift = imm8->Iex.Const.con->Ico.U8;
sewardj428fabd2005-03-21 03:11:17 +00001736 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
sewardj05b3b6a2005-02-04 01:44:33 +00001737 HReg r1 = iselIntExpr_R(env, expr1);
1738 HReg r2 = iselIntExpr_R(env, expr2);
1739 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1740 return AMD64AMode_IRRS(offset, r1, r2, shift);
1741 }
1742 }
1743
sewardj8258a8c2005-02-02 03:11:24 +00001744 /* Add64(expr1, Shl64(expr2, imm)) */
1745 if (e->tag == Iex_Binop
1746 && e->Iex.Binop.op == Iop_Add64
1747 && e->Iex.Binop.arg2->tag == Iex_Binop
1748 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1749 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1750 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1751 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1752 if (shift == 1 || shift == 2 || shift == 3) {
1753 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1754 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1755 return AMD64AMode_IRRS(0, r1, r2, shift);
1756 }
1757 }
1758
1759 /* Add64(expr,i) */
1760 if (e->tag == Iex_Binop
1761 && e->Iex.Binop.op == Iop_Add64
1762 && e->Iex.Binop.arg2->tag == Iex_Const
1763 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
1764 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
1765 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1766 return AMD64AMode_IR(
sewardj428fabd2005-03-21 03:11:17 +00001767 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
sewardj8258a8c2005-02-02 03:11:24 +00001768 r1
1769 );
1770 }
1771
1772 /* Doesn't match anything in particular. Generate it into
1773 a register and use that. */
1774 {
1775 HReg r1 = iselIntExpr_R(env, e);
1776 return AMD64AMode_IR(0, r1);
1777 }
1778}
sewardj614b3fb2005-02-02 02:16:03 +00001779
1780
1781/* --------------------- RMIs --------------------- */
1782
1783/* Similarly, calculate an expression into an X86RMI operand. As with
1784 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1785
1786static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1787{
1788 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1789 /* sanity checks ... */
1790 switch (rmi->tag) {
1791 case Armi_Imm:
1792 return rmi;
1793 case Armi_Reg:
1794 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
1795 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
1796 return rmi;
1797 case Armi_Mem:
1798 vassert(sane_AMode(rmi->Armi.Mem.am));
1799 return rmi;
1800 default:
1801 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
1802 }
1803}
1804
1805/* DO NOT CALL THIS DIRECTLY ! */
1806static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1807{
1808 IRType ty = typeOfIRExpr(env->type_env,e);
1809 vassert(ty == Ity_I64 || ty == Ity_I32
1810 || ty == Ity_I16 || ty == Ity_I8);
1811
1812 /* special case: immediate 64/32/16/8 */
1813 if (e->tag == Iex_Const) {
1814 switch (e->Iex.Const.con->tag) {
1815 case Ico_U64:
1816 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00001817 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardj614b3fb2005-02-02 02:16:03 +00001818 }
1819 break;
1820 case Ico_U32:
1821 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
1822 case Ico_U16:
1823 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
1824 case Ico_U8:
1825 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
1826 default:
1827 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1828 }
1829 }
1830
1831 /* special case: 64-bit GET */
1832 if (e->tag == Iex_Get && ty == Ity_I64) {
1833 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1834 hregAMD64_RBP()));
1835 }
1836
sewardj0852a132005-02-21 08:28:46 +00001837 /* special case: 64-bit load from memory */
sewardjaf1ceca2005-06-30 23:31:27 +00001838 if (e->tag == Iex_Load && ty == Ity_I64 && e->Iex.Load.end == Iend_LE) {
1839 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj0852a132005-02-21 08:28:46 +00001840 return AMD64RMI_Mem(am);
1841 }
sewardj614b3fb2005-02-02 02:16:03 +00001842
1843 /* default case: calculate into a register and return that */
sewardj8258a8c2005-02-02 03:11:24 +00001844 {
1845 HReg r = iselIntExpr_R ( env, e );
1846 return AMD64RMI_Reg(r);
1847 }
sewardj614b3fb2005-02-02 02:16:03 +00001848}
1849
1850
sewardjf67eadf2005-02-03 03:53:52 +00001851/* --------------------- RIs --------------------- */
1852
1853/* Calculate an expression into an AMD64RI operand. As with
1854 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1855 bits. */
1856
1857static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1858{
1859 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
1860 /* sanity checks ... */
1861 switch (ri->tag) {
1862 case Ari_Imm:
1863 return ri;
1864 case Armi_Reg:
1865 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
1866 vassert(hregIsVirtual(ri->Ari.Reg.reg));
1867 return ri;
1868 default:
1869 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
1870 }
1871}
1872
1873/* DO NOT CALL THIS DIRECTLY ! */
1874static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1875{
1876 IRType ty = typeOfIRExpr(env->type_env,e);
1877 vassert(ty == Ity_I64 || ty == Ity_I32
1878 || ty == Ity_I16 || ty == Ity_I8);
1879
1880 /* special case: immediate */
1881 if (e->tag == Iex_Const) {
1882 switch (e->Iex.Const.con->tag) {
1883 case Ico_U64:
1884 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
sewardj428fabd2005-03-21 03:11:17 +00001885 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
sewardjf67eadf2005-02-03 03:53:52 +00001886 }
1887 break;
1888 case Ico_U32:
1889 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
1890 case Ico_U16:
1891 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
1892 case Ico_U8:
1893 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
1894 default:
1895 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1896 }
1897 }
1898
1899 /* default case: calculate into a register and return that */
1900 {
1901 HReg r = iselIntExpr_R ( env, e );
1902 return AMD64RI_Reg(r);
1903 }
1904}
1905
1906
sewardj05b3b6a2005-02-04 01:44:33 +00001907/* --------------------- RMs --------------------- */
1908
1909/* Similarly, calculate an expression into an AMD64RM operand. As
1910 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1911 bits. */
1912
1913static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1914{
1915 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
1916 /* sanity checks ... */
1917 switch (rm->tag) {
1918 case Arm_Reg:
1919 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
1920 vassert(hregIsVirtual(rm->Arm.Reg.reg));
1921 return rm;
1922 case Arm_Mem:
1923 vassert(sane_AMode(rm->Arm.Mem.am));
1924 return rm;
1925 default:
1926 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
1927 }
1928}
1929
1930/* DO NOT CALL THIS DIRECTLY ! */
1931static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1932{
1933 IRType ty = typeOfIRExpr(env->type_env,e);
1934 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1935
1936 /* special case: 64-bit GET */
1937 if (e->tag == Iex_Get && ty == Ity_I64) {
1938 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1939 hregAMD64_RBP()));
1940 }
1941
1942 /* special case: load from memory */
1943
1944 /* default case: calculate into a register and return that */
1945 {
1946 HReg r = iselIntExpr_R ( env, e );
1947 return AMD64RM_Reg(r);
1948 }
1949}
1950
1951
1952/* --------------------- CONDCODE --------------------- */
1953
1954/* Generate code to evaluated a bit-typed expression, returning the
1955 condition code which would correspond when the expression would
1956 notionally have returned 1. */
1957
1958static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1959{
1960 /* Uh, there's nothing we can sanity check here, unfortunately. */
1961 return iselCondCode_wrk(env,e);
1962}
1963
1964/* DO NOT CALL THIS DIRECTLY ! */
1965static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1966{
sewardjf8c37f72005-02-07 18:55:29 +00001967 MatchInfo mi;
sewardja3e98302005-02-01 15:55:05 +00001968//.. DECLARE_PATTERN(p_1Uto32_then_32to1);
1969//.. DECLARE_PATTERN(p_1Sto32_then_32to1);
sewardj05b3b6a2005-02-04 01:44:33 +00001970
sewardj0af46ab2005-04-26 01:52:29 +00001971 DECLARE_PATTERN(p_1Uto64_then_64to1);
1972
sewardj05b3b6a2005-02-04 01:44:33 +00001973 vassert(e);
1974 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1975
sewardj176ad2f2005-04-27 11:55:08 +00001976 /* var */
sewardjdd40fdf2006-12-24 02:20:24 +00001977 if (e->tag == Iex_RdTmp) {
1978 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj176ad2f2005-04-27 11:55:08 +00001979 HReg dst = newVRegI(env);
1980 addInstr(env, mk_iMOVsd_RR(r64,dst));
1981 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
1982 return Acc_NZ;
1983 }
1984
sewardj109e9352005-07-19 08:42:56 +00001985 /* Constant 1:Bit */
1986 if (e->tag == Iex_Const) {
1987 HReg r;
1988 vassert(e->Iex.Const.con->tag == Ico_U1);
1989 vassert(e->Iex.Const.con->Ico.U1 == True
1990 || e->Iex.Const.con->Ico.U1 == False);
1991 r = newVRegI(env);
1992 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
1993 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
1994 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
1995 }
sewardj486074e2005-02-08 20:10:04 +00001996
1997 /* Not1(...) */
1998 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1999 /* Generate code for the arg, and negate the test condition */
2000 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2001 }
2002
sewardj176ad2f2005-04-27 11:55:08 +00002003 /* --- patterns rooted at: 64to1 --- */
2004
2005 /* 64to1(1Uto64(expr1)) ==> expr1 */
2006 DEFINE_PATTERN( p_1Uto64_then_64to1,
2007 unop(Iop_64to1, unop(Iop_1Uto64, bind(0))) );
sewardj0af46ab2005-04-26 01:52:29 +00002008 if (matchIRExpr(&mi,p_1Uto64_then_64to1,e)) {
2009 IRExpr* expr1 = mi.bindee[0];
2010 return iselCondCode(env, expr1);
2011 }
2012
sewardja3e98302005-02-01 15:55:05 +00002013//.. /* 32to1(1Uto32(expr1)) -- the casts are pointless, ignore them */
2014//.. DEFINE_PATTERN(p_1Uto32_then_32to1,
2015//.. unop(Iop_32to1,unop(Iop_1Uto32,bind(0))));
2016//.. if (matchIRExpr(&mi,p_1Uto32_then_32to1,e)) {
2017//.. IRExpr* expr1 = mi.bindee[0];
2018//.. return iselCondCode(env, expr1);
2019//.. }
2020//..
2021//.. /* 32to1(1Sto32(expr1)) -- the casts are pointless, ignore them */
2022//.. DEFINE_PATTERN(p_1Sto32_then_32to1,
2023//.. unop(Iop_32to1,unop(Iop_1Sto32,bind(0))));
2024//.. if (matchIRExpr(&mi,p_1Sto32_then_32to1,e)) {
2025//.. IRExpr* expr1 = mi.bindee[0];
2026//.. return iselCondCode(env, expr1);
2027//.. }
sewardjf8c37f72005-02-07 18:55:29 +00002028
sewardj176ad2f2005-04-27 11:55:08 +00002029 /* 64to1 */
2030 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
sewardj501a3392005-05-11 15:37:50 +00002031 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2032 addInstr(env, AMD64Instr_Test64(1,reg));
sewardjf8c37f72005-02-07 18:55:29 +00002033 return Acc_NZ;
2034 }
2035
sewardj176ad2f2005-04-27 11:55:08 +00002036 /* --- patterns rooted at: CmpNEZ8 --- */
2037
2038 /* CmpNEZ8(x) */
2039 if (e->tag == Iex_Unop
2040 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2041 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002042 addInstr(env, AMD64Instr_Test64(0xFF,r));
sewardj176ad2f2005-04-27 11:55:08 +00002043 return Acc_NZ;
2044 }
2045
sewardj86ec28b2005-04-27 13:39:35 +00002046 /* --- patterns rooted at: CmpNEZ16 --- */
2047
2048 /* CmpNEZ16(x) */
2049 if (e->tag == Iex_Unop
2050 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2051 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
sewardj501a3392005-05-11 15:37:50 +00002052 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
sewardj86ec28b2005-04-27 13:39:35 +00002053 return Acc_NZ;
2054 }
2055
sewardj176ad2f2005-04-27 11:55:08 +00002056 /* --- patterns rooted at: CmpNEZ32 --- */
2057
2058 /* CmpNEZ32(x) */
2059 if (e->tag == Iex_Unop
2060 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2061 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2062 HReg tmp = newVRegI(env);
2063 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2064 addInstr(env, AMD64Instr_MovZLQ(r1,tmp));
2065 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
2066 return Acc_NZ;
2067 }
2068
2069 /* --- patterns rooted at: CmpNEZ64 --- */
2070
sewardj0bc78ab2005-05-11 22:47:32 +00002071 /* CmpNEZ64(Or64(x,y)) */
2072 {
2073 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2074 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2075 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2076 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2077 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2078 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2079 HReg tmp = newVRegI(env);
2080 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2081 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2082 return Acc_NZ;
2083 }
2084 }
2085
sewardj176ad2f2005-04-27 11:55:08 +00002086 /* CmpNEZ64(x) */
2087 if (e->tag == Iex_Unop
2088 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2089 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2090 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2091 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2092 return Acc_NZ;
2093 }
2094
2095 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2096
sewardj42322b52005-04-20 22:57:11 +00002097 /* CmpEQ8 / CmpNE8 */
2098 if (e->tag == Iex_Binop
2099 && (e->Iex.Binop.op == Iop_CmpEQ8
2100 || e->Iex.Binop.op == Iop_CmpNE8)) {
2101 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2102 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2103 HReg r = newVRegI(env);
2104 addInstr(env, mk_iMOVsd_RR(r1,r));
2105 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2106 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2107 switch (e->Iex.Binop.op) {
2108 case Iop_CmpEQ8: return Acc_Z;
2109 case Iop_CmpNE8: return Acc_NZ;
2110 default: vpanic("iselCondCode(amd64): CmpXX8");
2111 }
2112 }
2113
sewardj0af46ab2005-04-26 01:52:29 +00002114 /* CmpEQ16 / CmpNE16 */
2115 if (e->tag == Iex_Binop
2116 && (e->Iex.Binop.op == Iop_CmpEQ16
2117 || e->Iex.Binop.op == Iop_CmpNE16)) {
2118 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2119 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2120 HReg r = newVRegI(env);
2121 addInstr(env, mk_iMOVsd_RR(r1,r));
2122 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2123 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2124 switch (e->Iex.Binop.op) {
2125 case Iop_CmpEQ16: return Acc_Z;
2126 case Iop_CmpNE16: return Acc_NZ;
2127 default: vpanic("iselCondCode(amd64): CmpXX16");
2128 }
2129 }
2130
sewardj478fe702005-04-23 01:15:47 +00002131 /* CmpEQ32 / CmpNE32 */
2132 if (e->tag == Iex_Binop
2133 && (e->Iex.Binop.op == Iop_CmpEQ32
2134 || e->Iex.Binop.op == Iop_CmpNE32)) {
2135 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2136 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2137 HReg r = newVRegI(env);
2138 addInstr(env, mk_iMOVsd_RR(r1,r));
2139 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
sewardj501a3392005-05-11 15:37:50 +00002140 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, r));
sewardj478fe702005-04-23 01:15:47 +00002141 switch (e->Iex.Binop.op) {
2142 case Iop_CmpEQ32: return Acc_Z;
2143 case Iop_CmpNE32: return Acc_NZ;
sewardj176ad2f2005-04-27 11:55:08 +00002144 default: vpanic("iselCondCode(amd64): CmpXX32");
sewardj42322b52005-04-20 22:57:11 +00002145 }
2146 }
sewardjd0a12df2005-02-10 02:07:43 +00002147
2148 /* Cmp*64*(x,y) */
2149 if (e->tag == Iex_Binop
2150 && (e->Iex.Binop.op == Iop_CmpEQ64
2151 || e->Iex.Binop.op == Iop_CmpNE64
sewardj0af46ab2005-04-26 01:52:29 +00002152 || e->Iex.Binop.op == Iop_CmpLT64S
2153 || e->Iex.Binop.op == Iop_CmpLT64U
2154 || e->Iex.Binop.op == Iop_CmpLE64S
sewardja9e4a802005-12-26 19:33:55 +00002155 || e->Iex.Binop.op == Iop_CmpLE64U
sewardjd0a12df2005-02-10 02:07:43 +00002156 )) {
2157 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2158 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2159 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2160 switch (e->Iex.Binop.op) {
2161 case Iop_CmpEQ64: return Acc_Z;
2162 case Iop_CmpNE64: return Acc_NZ;
sewardj0af46ab2005-04-26 01:52:29 +00002163 case Iop_CmpLT64S: return Acc_L;
2164 case Iop_CmpLT64U: return Acc_B;
2165 case Iop_CmpLE64S: return Acc_LE;
sewardja9e4a802005-12-26 19:33:55 +00002166 case Iop_CmpLE64U: return Acc_BE;
sewardjd0a12df2005-02-10 02:07:43 +00002167 default: vpanic("iselCondCode(amd64): CmpXX64");
2168 }
2169 }
2170
sewardja3e98302005-02-01 15:55:05 +00002171//.. /* CmpNE64(1Sto64(b), 0) ==> b */
2172//.. {
2173//.. DECLARE_PATTERN(p_CmpNE64_1Sto64);
2174//.. DEFINE_PATTERN(
2175//.. p_CmpNE64_1Sto64,
2176//.. binop(Iop_CmpNE64, unop(Iop_1Sto64,bind(0)), mkU64(0)));
2177//.. if (matchIRExpr(&mi, p_CmpNE64_1Sto64, e)) {
2178//.. return iselCondCode(env, mi.bindee[0]);
2179//.. }
2180//.. }
2181//..
2182//.. /* CmpNE64(x, 0) */
2183//.. {
2184//.. DECLARE_PATTERN(p_CmpNE64_x_zero);
2185//.. DEFINE_PATTERN(
2186//.. p_CmpNE64_x_zero,
2187//.. binop(Iop_CmpNE64, bind(0), mkU64(0)) );
2188//.. if (matchIRExpr(&mi, p_CmpNE64_x_zero, e)) {
2189//.. HReg hi, lo;
2190//.. IRExpr* x = mi.bindee[0];
2191//.. HReg tmp = newVRegI(env);
2192//.. iselInt64Expr( &hi, &lo, env, x );
2193//.. addInstr(env, mk_iMOVsd_RR(hi, tmp));
2194//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
2195//.. return Xcc_NZ;
2196//.. }
2197//.. }
2198//..
2199//.. /* CmpNE64 */
2200//.. if (e->tag == Iex_Binop
2201//.. && e->Iex.Binop.op == Iop_CmpNE64) {
2202//.. HReg hi1, hi2, lo1, lo2;
2203//.. HReg tHi = newVRegI(env);
2204//.. HReg tLo = newVRegI(env);
2205//.. iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
2206//.. iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
2207//.. addInstr(env, mk_iMOVsd_RR(hi1, tHi));
2208//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
2209//.. addInstr(env, mk_iMOVsd_RR(lo1, tLo));
2210//.. addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
2211//.. addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
2212//.. switch (e->Iex.Binop.op) {
2213//.. case Iop_CmpNE64: return Xcc_NZ;
2214//.. default: vpanic("iselCondCode(x86): CmpXX64");
2215//.. }
2216//.. }
sewardja5bd0af2005-03-24 20:40:12 +00002217
sewardj05b3b6a2005-02-04 01:44:33 +00002218 ppIRExpr(e);
2219 vpanic("iselCondCode(amd64)");
2220}
2221
2222
sewardj9b967672005-02-08 11:13:09 +00002223/*---------------------------------------------------------*/
2224/*--- ISEL: Integer expressions (128 bit) ---*/
2225/*---------------------------------------------------------*/
2226
2227/* Compute a 128-bit value into a register pair, which is returned as
2228 the first two parameters. As with iselIntExpr_R, these may be
2229 either real or virtual regs; in any case they must not be changed
2230 by subsequent code emitted by the caller. */
2231
2232static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2233 ISelEnv* env, IRExpr* e )
2234{
2235 iselInt128Expr_wrk(rHi, rLo, env, e);
2236# if 0
2237 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2238# endif
2239 vassert(hregClass(*rHi) == HRcInt64);
2240 vassert(hregIsVirtual(*rHi));
2241 vassert(hregClass(*rLo) == HRcInt64);
2242 vassert(hregIsVirtual(*rLo));
2243}
2244
2245/* DO NOT CALL THIS DIRECTLY ! */
2246static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2247 ISelEnv* env, IRExpr* e )
2248{
sewardja3e98302005-02-01 15:55:05 +00002249//.. HWord fn = 0; /* helper fn for most SIMD64 stuff */
sewardj9b967672005-02-08 11:13:09 +00002250 vassert(e);
2251 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2252
sewardja3e98302005-02-01 15:55:05 +00002253//.. /* 64-bit literal */
2254//.. if (e->tag == Iex_Const) {
2255//.. ULong w64 = e->Iex.Const.con->Ico.U64;
2256//.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
2257//.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
2258//.. HReg tLo = newVRegI(env);
2259//.. HReg tHi = newVRegI(env);
2260//.. vassert(e->Iex.Const.con->tag == Ico_U64);
2261//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2262//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2263//.. *rHi = tHi;
2264//.. *rLo = tLo;
2265//.. return;
2266//.. }
sewardj9b967672005-02-08 11:13:09 +00002267
2268 /* read 128-bit IRTemp */
sewardjdd40fdf2006-12-24 02:20:24 +00002269 if (e->tag == Iex_RdTmp) {
2270 lookupIRTemp128( rHi, rLo, env, e->Iex.RdTmp.tmp);
sewardj9b967672005-02-08 11:13:09 +00002271 return;
2272 }
2273
sewardja3e98302005-02-01 15:55:05 +00002274//.. /* 64-bit load */
2275//.. if (e->tag == Iex_LDle) {
2276//.. HReg tLo, tHi;
2277//.. X86AMode *am0, *am4;
2278//.. vassert(e->Iex.LDle.ty == Ity_I64);
2279//.. tLo = newVRegI(env);
2280//.. tHi = newVRegI(env);
2281//.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2282//.. am4 = advance4(am0);
2283//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2284//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2285//.. *rHi = tHi;
2286//.. *rLo = tLo;
2287//.. return;
2288//.. }
2289//..
2290//.. /* 64-bit GET */
2291//.. if (e->tag == Iex_Get) {
2292//.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2293//.. X86AMode* am4 = advance4(am);
2294//.. HReg tLo = newVRegI(env);
2295//.. HReg tHi = newVRegI(env);
2296//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2297//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2298//.. *rHi = tHi;
2299//.. *rLo = tLo;
2300//.. return;
2301//.. }
2302//..
2303//.. /* 64-bit GETI */
2304//.. if (e->tag == Iex_GetI) {
2305//.. X86AMode* am
2306//.. = genGuestArrayOffset( env, e->Iex.GetI.descr,
2307//.. e->Iex.GetI.ix, e->Iex.GetI.bias );
2308//.. X86AMode* am4 = advance4(am);
2309//.. HReg tLo = newVRegI(env);
2310//.. HReg tHi = newVRegI(env);
2311//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2312//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2313//.. *rHi = tHi;
2314//.. *rLo = tLo;
2315//.. return;
2316//.. }
2317//..
2318//.. /* 64-bit Mux0X */
2319//.. if (e->tag == Iex_Mux0X) {
2320//.. HReg e0Lo, e0Hi, eXLo, eXHi, r8;
2321//.. HReg tLo = newVRegI(env);
2322//.. HReg tHi = newVRegI(env);
2323//.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2324//.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2325//.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2326//.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2327//.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2328//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
2329//.. /* This assumes the first cmov32 doesn't trash the condition
2330//.. codes, so they are still available for the second cmov32 */
2331//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2332//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2333//.. *rHi = tHi;
2334//.. *rLo = tLo;
2335//.. return;
2336//.. }
sewardj9b967672005-02-08 11:13:09 +00002337
2338 /* --------- BINARY ops --------- */
2339 if (e->tag == Iex_Binop) {
2340 switch (e->Iex.Binop.op) {
sewardj7de0d3c2005-02-13 02:26:41 +00002341 /* 64 x 64 -> 128 multiply */
sewardj9b967672005-02-08 11:13:09 +00002342 case Iop_MullU64:
2343 case Iop_MullS64: {
2344 /* get one operand into %rax, and the other into a R/M.
2345 Need to make an educated guess about which is better in
2346 which. */
2347 HReg tLo = newVRegI(env);
2348 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002349 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
sewardj9b967672005-02-08 11:13:09 +00002350 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2351 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2352 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
sewardj501a3392005-05-11 15:37:50 +00002353 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
sewardj9b967672005-02-08 11:13:09 +00002354 /* Result is now in RDX:RAX. Tell the caller. */
2355 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2356 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2357 *rHi = tHi;
2358 *rLo = tLo;
2359 return;
2360 }
sewardj7de0d3c2005-02-13 02:26:41 +00002361
sewardja6b93d12005-02-17 09:28:28 +00002362 /* 128 x 64 -> (64(rem),64(div)) division */
2363 case Iop_DivModU128to64:
2364 case Iop_DivModS128to64: {
2365 /* Get the 128-bit operand into rdx:rax, and the other into
2366 any old R/M. */
2367 HReg sHi, sLo;
2368 HReg tLo = newVRegI(env);
2369 HReg tHi = newVRegI(env);
sewardj428fabd2005-03-21 03:11:17 +00002370 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
sewardja6b93d12005-02-17 09:28:28 +00002371 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2372 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2373 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2374 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2375 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2376 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2377 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2378 *rHi = tHi;
2379 *rLo = tLo;
2380 return;
2381 }
2382
2383 /* 64HLto128(e1,e2) */
2384 case Iop_64HLto128:
2385 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2386 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2387 return;
2388
sewardja3e98302005-02-01 15:55:05 +00002389//.. /* Or64/And64/Xor64 */
2390//.. case Iop_Or64:
2391//.. case Iop_And64:
2392//.. case Iop_Xor64: {
2393//.. HReg xLo, xHi, yLo, yHi;
2394//.. HReg tLo = newVRegI(env);
2395//.. HReg tHi = newVRegI(env);
2396//.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2397//.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2398//.. : Xalu_XOR;
2399//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2400//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2401//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2402//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2403//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2404//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2405//.. *rHi = tHi;
2406//.. *rLo = tLo;
2407//.. return;
2408//.. }
2409//..
2410//.. /* Add64/Sub64 */
2411//.. case Iop_Add64:
2412//.. case Iop_Sub64: {
2413//.. HReg xLo, xHi, yLo, yHi;
2414//.. HReg tLo = newVRegI(env);
2415//.. HReg tHi = newVRegI(env);
2416//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2417//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2418//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2419//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2420//.. if (e->Iex.Binop.op==Iop_Add64) {
2421//.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2422//.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2423//.. } else {
2424//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2425//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2426//.. }
2427//.. *rHi = tHi;
2428//.. *rLo = tLo;
2429//.. return;
2430//.. }
2431//..
2432//.. /* 32HLto64(e1,e2) */
2433//.. case Iop_32HLto64:
2434//.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2435//.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2436//.. return;
2437//..
2438//.. /* 64-bit shifts */
2439//.. case Iop_Shl64: {
2440//.. /* We use the same ingenious scheme as gcc. Put the value
2441//.. to be shifted into %hi:%lo, and the shift amount into
2442//.. %cl. Then (dsts on right, a la ATT syntax):
2443//..
2444//.. shldl %cl, %lo, %hi -- make %hi be right for the
2445//.. -- shift amt %cl % 32
2446//.. shll %cl, %lo -- make %lo be right for the
2447//.. -- shift amt %cl % 32
2448//..
2449//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2450//.. we have to do a fixup, which puts the result low half
2451//.. into the result high half, and zeroes the low half:
2452//..
2453//.. testl $32, %ecx
2454//..
2455//.. cmovnz %lo, %hi
2456//.. movl $0, %tmp -- sigh; need yet another reg
2457//.. cmovnz %tmp, %lo
2458//.. */
2459//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2460//.. tLo = newVRegI(env);
2461//.. tHi = newVRegI(env);
2462//.. tTemp = newVRegI(env);
2463//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2464//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2465//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2466//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2467//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2468//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2469//.. and those regs are legitimately modifiable. */
2470//.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2471//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
2472//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2473//.. X86RM_Reg(hregX86_ECX())));
2474//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2475//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2476//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2477//.. *rHi = tHi;
2478//.. *rLo = tLo;
2479//.. return;
2480//.. }
2481//..
2482//.. case Iop_Shr64: {
2483//.. /* We use the same ingenious scheme as gcc. Put the value
2484//.. to be shifted into %hi:%lo, and the shift amount into
2485//.. %cl. Then:
2486//..
2487//.. shrdl %cl, %hi, %lo -- make %lo be right for the
2488//.. -- shift amt %cl % 32
2489//.. shrl %cl, %hi -- make %hi be right for the
2490//.. -- shift amt %cl % 32
2491//..
2492//.. Now, if (shift amount % 64) is in the range 32 .. 63,
2493//.. we have to do a fixup, which puts the result high half
2494//.. into the result low half, and zeroes the high half:
2495//..
2496//.. testl $32, %ecx
2497//..
2498//.. cmovnz %hi, %lo
2499//.. movl $0, %tmp -- sigh; need yet another reg
2500//.. cmovnz %tmp, %hi
2501//.. */
2502//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2503//.. tLo = newVRegI(env);
2504//.. tHi = newVRegI(env);
2505//.. tTemp = newVRegI(env);
2506//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2507//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2508//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2509//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2510//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2511//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2512//.. and those regs are legitimately modifiable. */
2513//.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2514//.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
2515//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2516//.. X86RM_Reg(hregX86_ECX())));
2517//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2518//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2519//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2520//.. *rHi = tHi;
2521//.. *rLo = tLo;
2522//.. return;
2523//.. }
2524//..
2525//.. /* F64 -> I64 */
2526//.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2527//.. case. Unfortunately I see no easy way to avoid the
2528//.. duplication. */
2529//.. case Iop_F64toI64: {
2530//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2531//.. HReg tLo = newVRegI(env);
2532//.. HReg tHi = newVRegI(env);
2533//..
2534//.. /* Used several times ... */
2535//.. /* Careful ... this sharing is only safe because
2536//.. zero_esp/four_esp do not hold any registers which the
2537//.. register allocator could attempt to swizzle later. */
2538//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2539//.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2540//..
2541//.. /* rf now holds the value to be converted, and rrm holds
2542//.. the rounding mode value, encoded as per the
2543//.. IRRoundingMode enum. The first thing to do is set the
2544//.. FPU's rounding mode accordingly. */
2545//..
2546//.. /* Create a space for the format conversion. */
2547//.. /* subl $8, %esp */
2548//.. sub_from_esp(env, 8);
2549//..
2550//.. /* Set host rounding mode */
2551//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2552//..
2553//.. /* gistll %rf, 0(%esp) */
2554//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2555//..
2556//.. /* movl 0(%esp), %dstLo */
2557//.. /* movl 4(%esp), %dstHi */
2558//.. addInstr(env, X86Instr_Alu32R(
2559//.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2560//.. addInstr(env, X86Instr_Alu32R(
2561//.. Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2562//..
2563//.. /* Restore default FPU rounding. */
2564//.. set_FPU_rounding_default( env );
2565//..
2566//.. /* addl $8, %esp */
2567//.. add_to_esp(env, 8);
2568//..
2569//.. *rHi = tHi;
2570//.. *rLo = tLo;
2571//.. return;
2572//.. }
2573//..
sewardj9b967672005-02-08 11:13:09 +00002574 default:
2575 break;
2576 }
2577 } /* if (e->tag == Iex_Binop) */
2578
2579
sewardja3e98302005-02-01 15:55:05 +00002580//.. /* --------- UNARY ops --------- */
2581//.. if (e->tag == Iex_Unop) {
2582//.. switch (e->Iex.Unop.op) {
2583//..
2584//.. /* 32Sto64(e) */
2585//.. case Iop_32Sto64: {
2586//.. HReg tLo = newVRegI(env);
2587//.. HReg tHi = newVRegI(env);
2588//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2589//.. addInstr(env, mk_iMOVsd_RR(src,tHi));
2590//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2591//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
2592//.. *rHi = tHi;
2593//.. *rLo = tLo;
2594//.. return;
2595//.. }
2596//..
2597//.. /* 32Uto64(e) */
2598//.. case Iop_32Uto64: {
2599//.. HReg tLo = newVRegI(env);
2600//.. HReg tHi = newVRegI(env);
2601//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2602//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2603//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2604//.. *rHi = tHi;
2605//.. *rLo = tLo;
2606//.. return;
2607//.. }
sewardj1a01e652005-02-23 11:39:21 +00002608
sewardja3e98302005-02-01 15:55:05 +00002609//.. /* could do better than this, but for now ... */
2610//.. case Iop_1Sto64: {
2611//.. HReg tLo = newVRegI(env);
2612//.. HReg tHi = newVRegI(env);
2613//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2614//.. addInstr(env, X86Instr_Set32(cond,tLo));
2615//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
2616//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
2617//.. addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2618//.. *rHi = tHi;
2619//.. *rLo = tLo;
2620//.. return;
2621//.. }
2622//..
2623//.. /* Not64(e) */
2624//.. case Iop_Not64: {
2625//.. HReg tLo = newVRegI(env);
2626//.. HReg tHi = newVRegI(env);
2627//.. HReg sHi, sLo;
2628//.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2629//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2630//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2631//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
2632//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
2633//.. *rHi = tHi;
2634//.. *rLo = tLo;
2635//.. return;
2636//.. }
2637//..
sewardja3e98302005-02-01 15:55:05 +00002638//.. default:
2639//.. break;
2640//.. }
2641//.. } /* if (e->tag == Iex_Unop) */
2642//..
2643//..
2644//.. /* --------- CCALL --------- */
2645//.. if (e->tag == Iex_CCall) {
2646//.. HReg tLo = newVRegI(env);
2647//.. HReg tHi = newVRegI(env);
2648//..
2649//.. /* Marshal args, do the call, clear stack. */
2650//.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2651//..
2652//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2653//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2654//.. *rHi = tHi;
2655//.. *rLo = tLo;
2656//.. return;
2657//.. }
sewardj9b967672005-02-08 11:13:09 +00002658
2659 ppIRExpr(e);
2660 vpanic("iselInt128Expr");
2661}
2662
2663
sewardj8d965312005-02-25 02:48:47 +00002664/*---------------------------------------------------------*/
2665/*--- ISEL: Floating point expressions (32 bit) ---*/
2666/*---------------------------------------------------------*/
2667
2668/* Nothing interesting here; really just wrappers for
2669 64-bit stuff. */
2670
2671static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2672{
2673 HReg r = iselFltExpr_wrk( env, e );
2674# if 0
2675 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2676# endif
2677 vassert(hregClass(r) == HRcVec128);
2678 vassert(hregIsVirtual(r));
2679 return r;
2680}
2681
2682/* DO NOT CALL THIS DIRECTLY */
2683static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2684{
2685 IRType ty = typeOfIRExpr(env->type_env,e);
2686 vassert(ty == Ity_F32);
2687
sewardjdd40fdf2006-12-24 02:20:24 +00002688 if (e->tag == Iex_RdTmp) {
2689 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardjc49ce232005-02-25 13:03:03 +00002690 }
2691
sewardjaf1ceca2005-06-30 23:31:27 +00002692 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardjc49ce232005-02-25 13:03:03 +00002693 AMD64AMode* am;
2694 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002695 vassert(e->Iex.Load.ty == Ity_F32);
2696 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardjc49ce232005-02-25 13:03:03 +00002697 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2698 return res;
2699 }
sewardj8d965312005-02-25 02:48:47 +00002700
2701 if (e->tag == Iex_Binop
2702 && e->Iex.Binop.op == Iop_F64toF32) {
2703 /* Although the result is still held in a standard SSE register,
2704 we need to round it to reflect the loss of accuracy/range
2705 entailed in casting it to a 32-bit float. */
2706 HReg dst = newVRegV(env);
2707 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2708 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2709 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2710 set_SSE_rounding_default( env );
2711 return dst;
2712 }
2713
sewardjc49ce232005-02-25 13:03:03 +00002714 if (e->tag == Iex_Get) {
2715 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2716 hregAMD64_RBP() );
2717 HReg res = newVRegV(env);
2718 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2719 return res;
2720 }
2721
sewardj5992bd02005-05-11 02:13:42 +00002722 if (e->tag == Iex_Unop
2723 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2724 /* Given an I32, produce an IEEE754 float with the same bit
2725 pattern. */
2726 HReg dst = newVRegV(env);
2727 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2728 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2729 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2730 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2731 return dst;
2732 }
sewardj8d965312005-02-25 02:48:47 +00002733
2734 ppIRExpr(e);
2735 vpanic("iselFltExpr_wrk");
2736}
sewardj18303862005-02-21 12:36:54 +00002737
2738
2739/*---------------------------------------------------------*/
2740/*--- ISEL: Floating point expressions (64 bit) ---*/
2741/*---------------------------------------------------------*/
2742
2743/* Compute a 64-bit floating point value into the lower half of an xmm
2744 register, the identity of which is returned. As with
2745 iselIntExpr_R, the returned reg will be virtual, and it must not be
2746 changed by subsequent code emitted by the caller.
2747*/
2748
2749/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2750
2751 Type S (1 bit) E (11 bits) F (52 bits)
2752 ---- --------- ----------- -----------
2753 signalling NaN u 2047 (max) .0uuuuu---u
2754 (with at least
2755 one 1 bit)
2756 quiet NaN u 2047 (max) .1uuuuu---u
2757
2758 negative infinity 1 2047 (max) .000000---0
2759
2760 positive infinity 0 2047 (max) .000000---0
2761
2762 negative zero 1 0 .000000---0
2763
2764 positive zero 0 0 .000000---0
2765*/
2766
2767static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2768{
2769 HReg r = iselDblExpr_wrk( env, e );
2770# if 0
2771 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2772# endif
2773 vassert(hregClass(r) == HRcVec128);
2774 vassert(hregIsVirtual(r));
2775 return r;
2776}
2777
2778/* DO NOT CALL THIS DIRECTLY */
2779static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2780{
2781 IRType ty = typeOfIRExpr(env->type_env,e);
2782 vassert(e);
2783 vassert(ty == Ity_F64);
2784
sewardjdd40fdf2006-12-24 02:20:24 +00002785 if (e->tag == Iex_RdTmp) {
2786 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj18303862005-02-21 12:36:54 +00002787 }
2788
sewardj8d965312005-02-25 02:48:47 +00002789 if (e->tag == Iex_Const) {
2790 union { ULong u64; Double f64; } u;
2791 HReg res = newVRegV(env);
2792 HReg tmp = newVRegI(env);
2793 vassert(sizeof(u) == 8);
2794 vassert(sizeof(u.u64) == 8);
2795 vassert(sizeof(u.f64) == 8);
2796
2797 if (e->Iex.Const.con->tag == Ico_F64) {
2798 u.f64 = e->Iex.Const.con->Ico.F64;
2799 }
2800 else if (e->Iex.Const.con->tag == Ico_F64i) {
2801 u.u64 = e->Iex.Const.con->Ico.F64i;
2802 }
2803 else
2804 vpanic("iselDblExpr(amd64): const");
2805
2806 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2807 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2808 addInstr(env, AMD64Instr_SseLdSt(
2809 True/*load*/, 8, res,
2810 AMD64AMode_IR(0, hregAMD64_RSP())
2811 ));
2812 add_to_rsp(env, 8);
2813 return res;
2814 }
sewardj9da16972005-02-21 13:58:26 +00002815
sewardjaf1ceca2005-06-30 23:31:27 +00002816 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj9da16972005-02-21 13:58:26 +00002817 AMD64AMode* am;
2818 HReg res = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00002819 vassert(e->Iex.Load.ty == Ity_F64);
2820 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj9da16972005-02-21 13:58:26 +00002821 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2822 return res;
2823 }
sewardj18303862005-02-21 12:36:54 +00002824
2825 if (e->tag == Iex_Get) {
2826 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2827 hregAMD64_RBP() );
2828 HReg res = newVRegV(env);
2829 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2830 return res;
2831 }
2832
sewardj8d965312005-02-25 02:48:47 +00002833 if (e->tag == Iex_GetI) {
2834 AMD64AMode* am
2835 = genGuestArrayOffset(
2836 env, e->Iex.GetI.descr,
2837 e->Iex.GetI.ix, e->Iex.GetI.bias );
2838 HReg res = newVRegV(env);
2839 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2840 return res;
2841 }
2842
sewardj4796d662006-02-05 16:06:26 +00002843 if (e->tag == Iex_Triop) {
sewardj137015d2005-03-27 04:01:15 +00002844 AMD64SseOp op = Asse_INVALID;
sewardj4796d662006-02-05 16:06:26 +00002845 switch (e->Iex.Triop.op) {
sewardj137015d2005-03-27 04:01:15 +00002846 case Iop_AddF64: op = Asse_ADDF; break;
2847 case Iop_SubF64: op = Asse_SUBF; break;
2848 case Iop_MulF64: op = Asse_MULF; break;
2849 case Iop_DivF64: op = Asse_DIVF; break;
2850 default: break;
2851 }
2852 if (op != Asse_INVALID) {
2853 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00002854 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
2855 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
sewardj137015d2005-03-27 04:01:15 +00002856 addInstr(env, mk_vMOVsd_RR(argL, dst));
sewardj4796d662006-02-05 16:06:26 +00002857 /* XXXROUNDINGFIXME */
2858 /* set roundingmode here */
sewardj137015d2005-03-27 04:01:15 +00002859 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2860 return dst;
2861 }
2862 }
2863
sewardjb183b852006-02-03 16:08:03 +00002864 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
sewardj25a85812005-05-08 23:03:48 +00002865 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2866 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2867 HReg dst = newVRegV(env);
2868
2869 /* rf now holds the value to be rounded. The first thing to do
2870 is set the FPU's rounding mode accordingly. */
2871
2872 /* Set host x87 rounding mode */
2873 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2874
2875 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2876 addInstr(env, AMD64Instr_A87Free(1));
2877 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2878 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2879 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
2880 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2881
2882 /* Restore default x87 rounding. */
2883 set_FPU_rounding_default( env );
2884
2885 return dst;
2886 }
2887
sewardj4796d662006-02-05 16:06:26 +00002888 if (e->tag == Iex_Triop
2889 && (e->Iex.Triop.op == Iop_ScaleF64
2890 || e->Iex.Triop.op == Iop_AtanF64
2891 || e->Iex.Triop.op == Iop_Yl2xF64
sewardjf4c803b2006-09-11 11:07:34 +00002892 || e->Iex.Triop.op == Iop_Yl2xp1F64
2893 || e->Iex.Triop.op == Iop_PRemF64)
sewardj25a85812005-05-08 23:03:48 +00002894 ) {
2895 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00002896 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
2897 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
sewardj25a85812005-05-08 23:03:48 +00002898 HReg dst = newVRegV(env);
sewardjf4c803b2006-09-11 11:07:34 +00002899 Bool arg2first = toBool(e->Iex.Triop.op == Iop_ScaleF64
2900 || e->Iex.Triop.op == Iop_PRemF64);
sewardj25a85812005-05-08 23:03:48 +00002901 addInstr(env, AMD64Instr_A87Free(2));
2902
2903 /* one arg -> top of x87 stack */
2904 addInstr(env, AMD64Instr_SseLdSt(
2905 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
2906 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2907
2908 /* other arg -> top of x87 stack */
2909 addInstr(env, AMD64Instr_SseLdSt(
2910 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
2911 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
2912
2913 /* do it */
sewardj4796d662006-02-05 16:06:26 +00002914 /* XXXROUNDINGFIXME */
2915 /* set roundingmode here */
2916 switch (e->Iex.Triop.op) {
sewardj25a85812005-05-08 23:03:48 +00002917 case Iop_ScaleF64:
2918 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
2919 break;
2920 case Iop_AtanF64:
2921 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
2922 break;
2923 case Iop_Yl2xF64:
2924 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
2925 break;
sewardj5e205372005-05-09 02:57:08 +00002926 case Iop_Yl2xp1F64:
2927 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
2928 break;
sewardjf4c803b2006-09-11 11:07:34 +00002929 case Iop_PRemF64:
2930 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
2931 break;
sewardj25a85812005-05-08 23:03:48 +00002932 default:
2933 vassert(0);
2934 }
2935
2936 /* save result */
2937 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
2938 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2939 return dst;
2940 }
sewardj1a01e652005-02-23 11:39:21 +00002941
2942 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64toF64) {
2943 HReg dst = newVRegV(env);
2944 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
2945 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2946 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
2947 set_SSE_rounding_default( env );
2948 return dst;
2949 }
2950
2951 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32toF64) {
2952 HReg dst = newVRegV(env);
2953 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2954 set_SSE_rounding_default( env );
2955 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
2956 return dst;
2957 }
2958
sewardj137015d2005-03-27 04:01:15 +00002959 if (e->tag == Iex_Unop
2960 && (e->Iex.Unop.op == Iop_NegF64
2961 || e->Iex.Unop.op == Iop_AbsF64)) {
sewardj8d965312005-02-25 02:48:47 +00002962 /* Sigh ... very rough code. Could do much better. */
sewardj137015d2005-03-27 04:01:15 +00002963 /* Get the 128-bit literal 00---0 10---0 into a register
2964 and xor/nand it with the value to be negated. */
sewardj8d965312005-02-25 02:48:47 +00002965 HReg r1 = newVRegI(env);
2966 HReg dst = newVRegV(env);
sewardj137015d2005-03-27 04:01:15 +00002967 HReg tmp = newVRegV(env);
sewardj8d965312005-02-25 02:48:47 +00002968 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2969 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
sewardj137015d2005-03-27 04:01:15 +00002970 addInstr(env, mk_vMOVsd_RR(src,tmp));
sewardj8d965312005-02-25 02:48:47 +00002971 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
2972 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
2973 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
sewardj137015d2005-03-27 04:01:15 +00002974 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
2975
2976 if (e->Iex.Unop.op == Iop_NegF64)
2977 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
2978 else
2979 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
2980
sewardj8d965312005-02-25 02:48:47 +00002981 add_to_rsp(env, 16);
2982 return dst;
2983 }
2984
sewardj4796d662006-02-05 16:06:26 +00002985 if (e->tag == Iex_Binop) {
sewardj25a85812005-05-08 23:03:48 +00002986 A87FpOp fpop = Afp_INVALID;
sewardj4796d662006-02-05 16:06:26 +00002987 switch (e->Iex.Binop.op) {
sewardj25a85812005-05-08 23:03:48 +00002988 case Iop_SqrtF64: fpop = Afp_SQRT; break;
sewardj5e205372005-05-09 02:57:08 +00002989 case Iop_SinF64: fpop = Afp_SIN; break;
2990 case Iop_CosF64: fpop = Afp_COS; break;
2991 case Iop_TanF64: fpop = Afp_TAN; break;
sewardj25a85812005-05-08 23:03:48 +00002992 case Iop_2xm1F64: fpop = Afp_2XM1; break;
2993 default: break;
2994 }
2995 if (fpop != Afp_INVALID) {
2996 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
sewardj4796d662006-02-05 16:06:26 +00002997 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
sewardj25a85812005-05-08 23:03:48 +00002998 HReg dst = newVRegV(env);
sewardj4796d662006-02-05 16:06:26 +00002999 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
sewardj25a85812005-05-08 23:03:48 +00003000 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
sewardj5e205372005-05-09 02:57:08 +00003001 addInstr(env, AMD64Instr_A87Free(nNeeded));
sewardj25a85812005-05-08 23:03:48 +00003002 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
sewardj4796d662006-02-05 16:06:26 +00003003 /* XXXROUNDINGFIXME */
3004 /* set roundingmode here */
sewardj25a85812005-05-08 23:03:48 +00003005 addInstr(env, AMD64Instr_A87FpOp(fpop));
sewardj4796d662006-02-05 16:06:26 +00003006 if (e->Iex.Binop.op==Iop_TanF64) {
sewardj5e205372005-05-09 02:57:08 +00003007 /* get rid of the extra 1.0 that fptan pushes */
3008 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3009 }
sewardj25a85812005-05-08 23:03:48 +00003010 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
3011 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3012 return dst;
3013 }
3014 }
sewardjc49ce232005-02-25 13:03:03 +00003015
3016 if (e->tag == Iex_Unop) {
3017 switch (e->Iex.Unop.op) {
sewardja3e98302005-02-01 15:55:05 +00003018//.. case Iop_I32toF64: {
3019//.. HReg dst = newVRegF(env);
3020//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3021//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3022//.. set_FPU_rounding_default(env);
3023//.. addInstr(env, X86Instr_FpLdStI(
3024//.. True/*load*/, 4, dst,
3025//.. X86AMode_IR(0, hregX86_ESP())));
sewardjc49ce232005-02-25 13:03:03 +00003026//.. add_to_esp(env, 4);
sewardja3e98302005-02-01 15:55:05 +00003027//.. return dst;
3028//.. }
sewardj924215b2005-03-26 21:50:31 +00003029 case Iop_ReinterpI64asF64: {
3030 /* Given an I64, produce an IEEE754 double with the same
3031 bit pattern. */
3032 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3033 HReg dst = newVRegV(env);
3034 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3035 /* paranoia */
3036 set_SSE_rounding_default(env);
3037 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3038 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3039 return dst;
3040 }
sewardjc49ce232005-02-25 13:03:03 +00003041 case Iop_F32toF64: {
sewardj9a036bf2005-03-14 18:19:08 +00003042 HReg f32;
sewardjc49ce232005-02-25 13:03:03 +00003043 HReg f64 = newVRegV(env);
3044 /* this shouldn't be necessary, but be paranoid ... */
3045 set_SSE_rounding_default(env);
sewardj9a036bf2005-03-14 18:19:08 +00003046 f32 = iselFltExpr(env, e->Iex.Unop.arg);
sewardjc49ce232005-02-25 13:03:03 +00003047 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3048 return f64;
3049 }
3050 default:
3051 break;
3052 }
3053 }
sewardj8d965312005-02-25 02:48:47 +00003054
3055 /* --------- MULTIPLEX --------- */
3056 if (e->tag == Iex_Mux0X) {
3057 HReg r8, rX, r0, dst;
3058 vassert(ty == Ity_F64);
3059 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
3060 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3061 rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3062 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3063 dst = newVRegV(env);
3064 addInstr(env, mk_vMOVsd_RR(rX,dst));
sewardj501a3392005-05-11 15:37:50 +00003065 addInstr(env, AMD64Instr_Test64(0xFF, r8));
sewardj8d965312005-02-25 02:48:47 +00003066 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3067 return dst;
3068 }
sewardj18303862005-02-21 12:36:54 +00003069
3070 ppIRExpr(e);
3071 vpanic("iselDblExpr_wrk");
3072}
sewardjc2bcb6f2005-02-07 00:17:12 +00003073
sewardj0852a132005-02-21 08:28:46 +00003074
3075/*---------------------------------------------------------*/
3076/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3077/*---------------------------------------------------------*/
3078
3079static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3080{
3081 HReg r = iselVecExpr_wrk( env, e );
3082# if 0
3083 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3084# endif
3085 vassert(hregClass(r) == HRcVec128);
3086 vassert(hregIsVirtual(r));
3087 return r;
3088}
3089
3090
3091/* DO NOT CALL THIS DIRECTLY */
3092static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3093{
sewardj9da16972005-02-21 13:58:26 +00003094 Bool arg1isEReg = False;
sewardj0852a132005-02-21 08:28:46 +00003095 AMD64SseOp op = Asse_INVALID;
3096 IRType ty = typeOfIRExpr(env->type_env,e);
3097 vassert(e);
3098 vassert(ty == Ity_V128);
3099
sewardjdd40fdf2006-12-24 02:20:24 +00003100 if (e->tag == Iex_RdTmp) {
3101 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
sewardj0852a132005-02-21 08:28:46 +00003102 }
3103
3104 if (e->tag == Iex_Get) {
3105 HReg dst = newVRegV(env);
3106 addInstr(env, AMD64Instr_SseLdSt(
3107 True/*load*/,
sewardj18303862005-02-21 12:36:54 +00003108 16,
sewardj0852a132005-02-21 08:28:46 +00003109 dst,
3110 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3111 )
3112 );
3113 return dst;
3114 }
3115
sewardjaf1ceca2005-06-30 23:31:27 +00003116 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
sewardj1a01e652005-02-23 11:39:21 +00003117 HReg dst = newVRegV(env);
sewardjaf1ceca2005-06-30 23:31:27 +00003118 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
sewardj1a01e652005-02-23 11:39:21 +00003119 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3120 return dst;
3121 }
3122
3123 if (e->tag == Iex_Const) {
3124 HReg dst = newVRegV(env);
3125 vassert(e->Iex.Const.con->tag == Ico_V128);
sewardj8d965312005-02-25 02:48:47 +00003126 if (e->Iex.Const.con->Ico.V128 == 0x0000) {
sewardjac530442005-05-11 16:13:37 +00003127 dst = generate_zeroes_V128(env);
sewardj1a01e652005-02-23 11:39:21 +00003128 return dst;
sewardj8d965312005-02-25 02:48:47 +00003129 } else
3130 if (e->Iex.Const.con->Ico.V128 == 0x00FF) {
3131 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3132 /* Both of these literals are sign-extended to 64 bits. */
3133 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3134 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0xFFFFFFFF)));
3135 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3136 add_to_rsp(env, 16);
3137 return dst;
sewardj3aba9eb2005-03-30 23:20:47 +00003138 } else
3139 if (e->Iex.Const.con->Ico.V128 == 0x000F) {
3140 HReg tmp = newVRegI(env);
3141 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3142 addInstr(env, AMD64Instr_Imm64(0xFFFFFFFFULL, tmp));
3143 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3144 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
3145 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3146 add_to_rsp(env, 16);
3147 return dst;
sewardj1a01e652005-02-23 11:39:21 +00003148 } else {
3149 goto vec_fail;
sewardj8d965312005-02-25 02:48:47 +00003150# if 0
3151 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3152 return dst;
3153# endif
sewardj1a01e652005-02-23 11:39:21 +00003154 }
3155 }
sewardj0852a132005-02-21 08:28:46 +00003156
3157 if (e->tag == Iex_Unop) {
3158 switch (e->Iex.Unop.op) {
3159
sewardj8d965312005-02-25 02:48:47 +00003160 case Iop_NotV128: {
3161 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3162 return do_sse_NotV128(env, arg);
3163 }
3164
sewardj09717342005-05-05 21:34:02 +00003165 case Iop_CmpNEZ64x2: {
3166 /* We can use SSE2 instructions for this. */
3167 /* Ideally, we want to do a 64Ix2 comparison against zero of
3168 the operand. Problem is no such insn exists. Solution
3169 therefore is to do a 32Ix4 comparison instead, and bitwise-
3170 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3171 let the not'd result of this initial comparison be a:b:c:d.
3172 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3173 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3174 giving the required result.
3175
3176 The required selection sequence is 2,3,0,1, which
3177 according to Intel's documentation means the pshufd
3178 literal value is 0xB1, that is,
3179 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3180 */
3181 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
sewardjac530442005-05-11 16:13:37 +00003182 HReg tmp = generate_zeroes_V128(env);
sewardj09717342005-05-05 21:34:02 +00003183 HReg dst = newVRegV(env);
sewardj09717342005-05-05 21:34:02 +00003184 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3185 tmp = do_sse_NotV128(env, tmp);
3186 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3187 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3188 return dst;
3189 }
3190
sewardjac530442005-05-11 16:13:37 +00003191 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3192 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3193 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3194 do_CmpNEZ_vector:
3195 {
3196 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3197 HReg tmp = newVRegV(env);
3198 HReg zero = generate_zeroes_V128(env);
3199 HReg dst;
3200 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3201 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3202 dst = do_sse_NotV128(env, tmp);
3203 return dst;
3204 }
sewardja7ba8c42005-05-10 20:08:34 +00003205
3206 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3207 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3208 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
3209 do_32Fx4_unary:
3210 {
3211 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3212 HReg dst = newVRegV(env);
3213 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3214 return dst;
3215 }
3216
sewardja3e98302005-02-01 15:55:05 +00003217//.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
sewardj97628592005-05-10 22:42:54 +00003218//.. case Iop_RSqrt64Fx2: op = Asse_RSQRTF; goto do_64Fx2_unary;
3219 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
3220 do_64Fx2_unary:
3221 {
3222 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3223 HReg dst = newVRegV(env);
3224 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3225 return dst;
3226 }
sewardja7ba8c42005-05-10 20:08:34 +00003227
3228 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3229 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3230 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
3231 do_32F0x4_unary:
3232 {
3233 /* A bit subtle. We have to copy the arg to the result
3234 register first, because actually doing the SSE scalar insn
3235 leaves the upper 3/4 of the destination register
3236 unchanged. Whereas the required semantics of these
3237 primops is that the upper 3/4 is simply copied in from the
3238 argument. */
3239 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3240 HReg dst = newVRegV(env);
3241 addInstr(env, mk_vMOVsd_RR(arg, dst));
3242 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3243 return dst;
3244 }
3245
sewardja3e98302005-02-01 15:55:05 +00003246//.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3247//.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
sewardj0852a132005-02-21 08:28:46 +00003248 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3249 do_64F0x2_unary:
3250 {
3251 /* A bit subtle. We have to copy the arg to the result
3252 register first, because actually doing the SSE scalar insn
3253 leaves the upper half of the destination register
3254 unchanged. Whereas the required semantics of these
3255 primops is that the upper half is simply copied in from the
3256 argument. */
3257 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3258 HReg dst = newVRegV(env);
3259 addInstr(env, mk_vMOVsd_RR(arg, dst));
3260 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3261 return dst;
3262 }
3263
sewardj8d965312005-02-25 02:48:47 +00003264 case Iop_32UtoV128: {
3265 HReg dst = newVRegV(env);
3266 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3267 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3268 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3269 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3270 return dst;
3271 }
sewardj0852a132005-02-21 08:28:46 +00003272
3273 case Iop_64UtoV128: {
3274 HReg dst = newVRegV(env);
3275 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3276 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3277 addInstr(env, AMD64Instr_Push(rmi));
3278 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3279 add_to_rsp(env, 8);
3280 return dst;
3281 }
3282
3283 default:
3284 break;
3285 } /* switch (e->Iex.Unop.op) */
3286 } /* if (e->tag == Iex_Unop) */
3287
3288 if (e->tag == Iex_Binop) {
3289 switch (e->Iex.Binop.op) {
3290
sewardj18303862005-02-21 12:36:54 +00003291 case Iop_SetV128lo64: {
3292 HReg dst = newVRegV(env);
3293 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3294 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
sewardj478fe702005-04-23 01:15:47 +00003295 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3296 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3297 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3298 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3299 return dst;
3300 }
3301
3302 case Iop_SetV128lo32: {
3303 HReg dst = newVRegV(env);
3304 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3305 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3306 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3307 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3308 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3309 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
sewardj18303862005-02-21 12:36:54 +00003310 return dst;
3311 }
3312
sewardj1a01e652005-02-23 11:39:21 +00003313 case Iop_64HLtoV128: {
3314 AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
3315 HReg dst = newVRegV(env);
3316 /* do this via the stack (easy, convenient, etc) */
3317 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
3318 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
3319 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
3320 add_to_rsp(env, 16);
3321 return dst;
3322 }
3323
sewardj432f8b62005-05-10 02:50:05 +00003324 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3325 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3326 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
sewardjb9282632005-11-05 02:33:25 +00003327 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
sewardj432f8b62005-05-10 02:50:05 +00003328 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
3329 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
3330 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3331 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
3332 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
3333 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
3334 do_32Fx4:
3335 {
3336 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3337 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3338 HReg dst = newVRegV(env);
3339 addInstr(env, mk_vMOVsd_RR(argL, dst));
3340 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3341 return dst;
3342 }
3343
sewardj97628592005-05-10 22:42:54 +00003344 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3345 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3346 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
sewardjb9282632005-11-05 02:33:25 +00003347 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003348 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
sewardj5992bd02005-05-11 02:13:42 +00003349 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
3350 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3351 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
sewardj4c328cf2005-05-05 12:05:54 +00003352 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
3353 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
3354 do_64Fx2:
3355 {
3356 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3357 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3358 HReg dst = newVRegV(env);
3359 addInstr(env, mk_vMOVsd_RR(argL, dst));
3360 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3361 return dst;
3362 }
sewardj8d965312005-02-25 02:48:47 +00003363
sewardj432f8b62005-05-10 02:50:05 +00003364 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
sewardj3aba9eb2005-03-30 23:20:47 +00003365 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
sewardj4c328cf2005-05-05 12:05:54 +00003366 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
sewardjb9282632005-11-05 02:33:25 +00003367 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003368 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
sewardjc49ce232005-02-25 13:03:03 +00003369 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
sewardj37d52572005-02-25 14:22:12 +00003370 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3371 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
sewardj8d965312005-02-25 02:48:47 +00003372 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3373 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3374 do_32F0x4: {
3375 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3376 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3377 HReg dst = newVRegV(env);
3378 addInstr(env, mk_vMOVsd_RR(argL, dst));
3379 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3380 return dst;
3381 }
3382
sewardj137015d2005-03-27 04:01:15 +00003383 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
sewardj8d965312005-02-25 02:48:47 +00003384 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
sewardj137015d2005-03-27 04:01:15 +00003385 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
sewardjb9282632005-11-05 02:33:25 +00003386 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003387 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3388 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
sewardj1a01e652005-02-23 11:39:21 +00003389 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
sewardjc49ce232005-02-25 13:03:03 +00003390 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
sewardj0852a132005-02-21 08:28:46 +00003391 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3392 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3393 do_64F0x2: {
3394 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3395 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3396 HReg dst = newVRegV(env);
3397 addInstr(env, mk_vMOVsd_RR(argL, dst));
3398 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3399 return dst;
3400 }
3401
sewardj97628592005-05-10 22:42:54 +00003402 case Iop_QNarrow32Sx4:
3403 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3404 case Iop_QNarrow16Sx8:
3405 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3406 case Iop_QNarrow16Ux8:
3407 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3408
3409 case Iop_InterleaveHI8x16:
3410 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3411 case Iop_InterleaveHI16x8:
3412 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3413 case Iop_InterleaveHI32x4:
3414 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3415 case Iop_InterleaveHI64x2:
3416 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3417
3418 case Iop_InterleaveLO8x16:
3419 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3420 case Iop_InterleaveLO16x8:
3421 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3422 case Iop_InterleaveLO32x4:
3423 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3424 case Iop_InterleaveLO64x2:
3425 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3426
sewardj1a01e652005-02-23 11:39:21 +00003427 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
sewardj8d965312005-02-25 02:48:47 +00003428 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003429 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003430 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003431 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003432 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003433 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
sewardj5992bd02005-05-11 02:13:42 +00003434 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3435 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3436 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3437 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3438 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3439 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3440 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3441 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3442 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3443 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3444 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3445 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
sewardjadffcef2005-05-11 00:03:06 +00003446 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3447 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3448 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3449 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3450 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3451 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3452 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003453 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3454 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3455 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
sewardj09717342005-05-05 21:34:02 +00003456 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
sewardj97628592005-05-10 22:42:54 +00003457 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3458 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3459 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3460 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
sewardj9da16972005-02-21 13:58:26 +00003461 do_SseReRg: {
3462 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3463 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3464 HReg dst = newVRegV(env);
3465 if (arg1isEReg) {
sewardj9da16972005-02-21 13:58:26 +00003466 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3467 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3468 } else {
3469 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3470 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3471 }
3472 return dst;
3473 }
3474
sewardjadffcef2005-05-11 00:03:06 +00003475 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3476 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3477 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3478 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3479 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3480 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3481 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
sewardj09717342005-05-05 21:34:02 +00003482 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3483 do_SseShift: {
3484 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3485 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3486 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3487 HReg ereg = newVRegV(env);
3488 HReg dst = newVRegV(env);
3489 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3490 addInstr(env, AMD64Instr_Push(rmi));
3491 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3492 addInstr(env, mk_vMOVsd_RR(greg, dst));
3493 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3494 add_to_rsp(env, 16);
3495 return dst;
3496 }
sewardj0852a132005-02-21 08:28:46 +00003497
3498 default:
3499 break;
3500 } /* switch (e->Iex.Binop.op) */
3501 } /* if (e->tag == Iex_Binop) */
3502
sewardjadffcef2005-05-11 00:03:06 +00003503 if (e->tag == Iex_Mux0X) {
3504 HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3505 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3506 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3507 HReg dst = newVRegV(env);
3508 addInstr(env, mk_vMOVsd_RR(rX,dst));
sewardj501a3392005-05-11 15:37:50 +00003509 addInstr(env, AMD64Instr_Test64(0xFF, r8));
sewardjadffcef2005-05-11 00:03:06 +00003510 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3511 return dst;
3512 }
3513
sewardj9da16972005-02-21 13:58:26 +00003514 vec_fail:
sewardj0852a132005-02-21 08:28:46 +00003515 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
sewardj5117ce12006-01-27 21:20:15 +00003516 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
sewardj0852a132005-02-21 08:28:46 +00003517 ppIRExpr(e);
3518 vpanic("iselVecExpr_wrk");
3519}
sewardjc33671d2005-02-01 20:30:00 +00003520
3521
3522/*---------------------------------------------------------*/
3523/*--- ISEL: Statements ---*/
3524/*---------------------------------------------------------*/
3525
3526static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3527{
3528 if (vex_traceflags & VEX_TRACE_VCODE) {
3529 vex_printf("\n-- ");
3530 ppIRStmt(stmt);
3531 vex_printf("\n");
3532 }
3533
3534 switch (stmt->tag) {
3535
sewardj05b3b6a2005-02-04 01:44:33 +00003536 /* --------- STORE --------- */
sewardjaf1ceca2005-06-30 23:31:27 +00003537 case Ist_Store: {
sewardj05b3b6a2005-02-04 01:44:33 +00003538 AMD64AMode* am;
sewardjaf1ceca2005-06-30 23:31:27 +00003539 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3540 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3541 IREndness end = stmt->Ist.Store.end;
3542
3543 if (tya != Ity_I64 || end != Iend_LE)
3544 goto stmt_fail;
3545
3546 am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
sewardj31191072005-02-05 18:24:47 +00003547 if (tyd == Ity_I64) {
sewardjaf1ceca2005-06-30 23:31:27 +00003548 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
sewardj31191072005-02-05 18:24:47 +00003549 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
3550 return;
3551 }
sewardj05b3b6a2005-02-04 01:44:33 +00003552 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
sewardjaf1ceca2005-06-30 23:31:27 +00003553 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
sewardj428fabd2005-03-21 03:11:17 +00003554 addInstr(env, AMD64Instr_Store(
3555 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
3556 r,am));
sewardj05b3b6a2005-02-04 01:44:33 +00003557 return;
3558 }
sewardj8d965312005-02-25 02:48:47 +00003559 if (tyd == Ity_F64) {
sewardjaf1ceca2005-06-30 23:31:27 +00003560 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
sewardj8d965312005-02-25 02:48:47 +00003561 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
3562 return;
3563 }
sewardjc49ce232005-02-25 13:03:03 +00003564 if (tyd == Ity_F32) {
sewardjaf1ceca2005-06-30 23:31:27 +00003565 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
sewardjc49ce232005-02-25 13:03:03 +00003566 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
3567 return;
3568 }
sewardja3e98302005-02-01 15:55:05 +00003569//.. if (tyd == Ity_I64) {
3570//.. HReg vHi, vLo, rA;
sewardjaf1ceca2005-06-30 23:31:27 +00003571//.. iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3572//.. rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
sewardja3e98302005-02-01 15:55:05 +00003573//.. addInstr(env, X86Instr_Alu32M(
3574//.. Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3575//.. addInstr(env, X86Instr_Alu32M(
3576//.. Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3577//.. return;
3578//.. }
sewardj0852a132005-02-21 08:28:46 +00003579 if (tyd == Ity_V128) {
sewardjaf1ceca2005-06-30 23:31:27 +00003580 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
sewardj18303862005-02-21 12:36:54 +00003581 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
sewardj0852a132005-02-21 08:28:46 +00003582 return;
3583 }
3584 break;
sewardj05b3b6a2005-02-04 01:44:33 +00003585 }
sewardjf67eadf2005-02-03 03:53:52 +00003586
3587 /* --------- PUT --------- */
3588 case Ist_Put: {
3589 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3590 if (ty == Ity_I64) {
3591 /* We're going to write to memory, so compute the RHS into an
3592 AMD64RI. */
3593 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3594 addInstr(env,
3595 AMD64Instr_Alu64M(
3596 Aalu_MOV,
3597 ri,
3598 AMD64AMode_IR(stmt->Ist.Put.offset,
3599 hregAMD64_RBP())
3600 ));
3601 return;
3602 }
sewardjf67eadf2005-02-03 03:53:52 +00003603 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
3604 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3605 addInstr(env, AMD64Instr_Store(
sewardj428fabd2005-03-21 03:11:17 +00003606 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
sewardjf67eadf2005-02-03 03:53:52 +00003607 r,
3608 AMD64AMode_IR(stmt->Ist.Put.offset,
3609 hregAMD64_RBP())));
3610 return;
3611 }
sewardj0852a132005-02-21 08:28:46 +00003612 if (ty == Ity_V128) {
3613 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3614 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
3615 hregAMD64_RBP());
sewardj18303862005-02-21 12:36:54 +00003616 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
sewardj0852a132005-02-21 08:28:46 +00003617 return;
3618 }
sewardj8d965312005-02-25 02:48:47 +00003619 if (ty == Ity_F32) {
3620 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3621 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
3622 set_SSE_rounding_default(env); /* paranoia */
3623 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
3624 return;
3625 }
sewardj1a01e652005-02-23 11:39:21 +00003626 if (ty == Ity_F64) {
3627 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3628 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
3629 hregAMD64_RBP() );
3630 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
3631 return;
3632 }
sewardjf67eadf2005-02-03 03:53:52 +00003633 break;
3634 }
3635
sewardj8d965312005-02-25 02:48:47 +00003636 /* --------- Indexed PUT --------- */
3637 case Ist_PutI: {
3638 AMD64AMode* am
3639 = genGuestArrayOffset(
3640 env, stmt->Ist.PutI.descr,
3641 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3642
3643 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3644 if (ty == Ity_F64) {
3645 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3646 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
3647 return;
3648 }
3649 if (ty == Ity_I8) {
3650 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3651 addInstr(env, AMD64Instr_Store( 1, r, am ));
3652 return;
3653 }
sewardj1e015d82005-04-23 23:41:46 +00003654 if (ty == Ity_I64) {
3655 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data);
3656 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
3657 return;
3658 }
sewardj8d965312005-02-25 02:48:47 +00003659 break;
3660 }
sewardj614b3fb2005-02-02 02:16:03 +00003661
3662 /* --------- TMP --------- */
sewardjdd40fdf2006-12-24 02:20:24 +00003663 case Ist_WrTmp: {
3664 IRTemp tmp = stmt->Ist.WrTmp.tmp;
sewardj614b3fb2005-02-02 02:16:03 +00003665 IRType ty = typeOfIRTemp(env->type_env, tmp);
sewardj9b967672005-02-08 11:13:09 +00003666 if (ty == Ity_I64 || ty == Ity_I32
3667 || ty == Ity_I16 || ty == Ity_I8) {
sewardjdd40fdf2006-12-24 02:20:24 +00003668 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
sewardj614b3fb2005-02-02 02:16:03 +00003669 HReg dst = lookupIRTemp(env, tmp);
3670 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
3671 return;
3672 }
sewardj9b967672005-02-08 11:13:09 +00003673 if (ty == Ity_I128) {
3674 HReg rHi, rLo, dstHi, dstLo;
sewardjdd40fdf2006-12-24 02:20:24 +00003675 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
sewardj9b967672005-02-08 11:13:09 +00003676 lookupIRTemp128( &dstHi, &dstLo, env, tmp);
3677 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3678 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3679 return;
3680 }
sewardja5bd0af2005-03-24 20:40:12 +00003681 if (ty == Ity_I1) {
sewardjdd40fdf2006-12-24 02:20:24 +00003682 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
sewardja5bd0af2005-03-24 20:40:12 +00003683 HReg dst = lookupIRTemp(env, tmp);
3684 addInstr(env, AMD64Instr_Set64(cond, dst));
3685 return;
3686 }
sewardj18303862005-02-21 12:36:54 +00003687 if (ty == Ity_F64) {
3688 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00003689 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00003690 addInstr(env, mk_vMOVsd_RR(src, dst));
3691 return;
3692 }
sewardjc49ce232005-02-25 13:03:03 +00003693 if (ty == Ity_F32) {
3694 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00003695 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
sewardjc49ce232005-02-25 13:03:03 +00003696 addInstr(env, mk_vMOVsd_RR(src, dst));
3697 return;
3698 }
sewardj0852a132005-02-21 08:28:46 +00003699 if (ty == Ity_V128) {
3700 HReg dst = lookupIRTemp(env, tmp);
sewardjdd40fdf2006-12-24 02:20:24 +00003701 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
sewardj18303862005-02-21 12:36:54 +00003702 addInstr(env, mk_vMOVsd_RR(src, dst));
sewardj0852a132005-02-21 08:28:46 +00003703 return;
3704 }
sewardj614b3fb2005-02-02 02:16:03 +00003705 break;
3706 }
3707
sewardjd0a12df2005-02-10 02:07:43 +00003708 /* --------- Call to DIRTY helper --------- */
3709 case Ist_Dirty: {
3710 IRType retty;
3711 IRDirty* d = stmt->Ist.Dirty.details;
3712 Bool passBBP = False;
3713
3714 if (d->nFxState == 0)
3715 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00003716
3717 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
sewardjd0a12df2005-02-10 02:07:43 +00003718
3719 /* Marshal args, do the call, clear stack. */
3720 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3721
3722 /* Now figure out what to do with the returned value, if any. */
3723 if (d->tmp == IRTemp_INVALID)
3724 /* No return value. Nothing to do. */
3725 return;
3726
3727 retty = typeOfIRTemp(env->type_env, d->tmp);
sewardj478fe702005-04-23 01:15:47 +00003728 if (retty == Ity_I64 || retty == Ity_I32
3729 || retty == Ity_I16 || retty == Ity_I8) {
sewardjd0a12df2005-02-10 02:07:43 +00003730 /* The returned value is in %rax. Park it in the register
3731 associated with tmp. */
3732 HReg dst = lookupIRTemp(env, d->tmp);
3733 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
3734 return;
3735 }
3736 break;
3737 }
3738
3739 /* --------- MEM FENCE --------- */
3740 case Ist_MFence:
3741 addInstr(env, AMD64Instr_MFence());
3742 return;
sewardjf8c37f72005-02-07 18:55:29 +00003743
sewardjd20b2902005-03-22 00:15:00 +00003744 /* --------- INSTR MARK --------- */
3745 /* Doesn't generate any executable code ... */
3746 case Ist_IMark:
3747 return;
3748
sewardj5a9ffab2005-05-12 17:55:01 +00003749 /* --------- ABI HINT --------- */
3750 /* These have no meaning (denotation in the IR) and so we ignore
3751 them ... if any actually made it this far. */
3752 case Ist_AbiHint:
3753 return;
3754
sewardjd20b2902005-03-22 00:15:00 +00003755 /* --------- NO-OP --------- */
3756 case Ist_NoOp:
3757 return;
3758
sewardjf8c37f72005-02-07 18:55:29 +00003759 /* --------- EXIT --------- */
3760 case Ist_Exit: {
3761 AMD64RI* dst;
3762 AMD64CondCode cc;
3763 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3764 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
3765 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
3766 cc = iselCondCode(env,stmt->Ist.Exit.guard);
3767 addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
3768 return;
3769 }
sewardjc33671d2005-02-01 20:30:00 +00003770
3771 default: break;
3772 }
sewardjaf1ceca2005-06-30 23:31:27 +00003773 stmt_fail:
sewardjc33671d2005-02-01 20:30:00 +00003774 ppIRStmt(stmt);
3775 vpanic("iselStmt(amd64)");
3776}
3777
3778
3779/*---------------------------------------------------------*/
3780/*--- ISEL: Basic block terminators (Nexts) ---*/
3781/*---------------------------------------------------------*/
3782
3783static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
sewardjf67eadf2005-02-03 03:53:52 +00003784{
3785 AMD64RI* ri;
3786 if (vex_traceflags & VEX_TRACE_VCODE) {
3787 vex_printf("\n-- goto {");
3788 ppIRJumpKind(jk);
3789 vex_printf("} ");
3790 ppIRExpr(next);
3791 vex_printf("\n");
3792 }
3793 ri = iselIntExpr_RI(env, next);
3794 addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
sewardjc33671d2005-02-01 20:30:00 +00003795}
3796
3797
3798/*---------------------------------------------------------*/
3799/*--- Insn selector top-level ---*/
3800/*---------------------------------------------------------*/
3801
sewardjdd40fdf2006-12-24 02:20:24 +00003802/* Translate an entire SB to amd64 code. */
sewardjc33671d2005-02-01 20:30:00 +00003803
sewardjdd40fdf2006-12-24 02:20:24 +00003804HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host,
sewardjaca070a2006-10-17 00:28:22 +00003805 VexArchInfo* archinfo_host,
sewardjdd40fdf2006-12-24 02:20:24 +00003806 VexAbiInfo* vbi/*UNUSED*/ )
sewardjc33671d2005-02-01 20:30:00 +00003807{
sewardj5117ce12006-01-27 21:20:15 +00003808 Int i, j;
3809 HReg hreg, hregHI;
3810 ISelEnv* env;
3811 UInt hwcaps_host = archinfo_host->hwcaps;
sewardjc33671d2005-02-01 20:30:00 +00003812
3813 /* sanity ... */
sewardj8f073592006-05-01 02:14:17 +00003814 vassert(arch_host == VexArchAMD64);
sewardj5117ce12006-01-27 21:20:15 +00003815 vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3)));
sewardjc33671d2005-02-01 20:30:00 +00003816
3817 /* Make up an initial environment to use. */
sewardj9a036bf2005-03-14 18:19:08 +00003818 env = LibVEX_Alloc(sizeof(ISelEnv));
sewardjc33671d2005-02-01 20:30:00 +00003819 env->vreg_ctr = 0;
3820
3821 /* Set up output code array. */
3822 env->code = newHInstrArray();
3823
3824 /* Copy BB's type env. */
3825 env->type_env = bb->tyenv;
3826
3827 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
3828 change as we go along. */
3829 env->n_vregmap = bb->tyenv->types_used;
3830 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardj9b967672005-02-08 11:13:09 +00003831 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
sewardjc33671d2005-02-01 20:30:00 +00003832
3833 /* and finally ... */
sewardj5117ce12006-01-27 21:20:15 +00003834 env->hwcaps = hwcaps_host;
sewardjc33671d2005-02-01 20:30:00 +00003835
3836 /* For each IR temporary, allocate a suitably-kinded virtual
3837 register. */
3838 j = 0;
3839 for (i = 0; i < env->n_vregmap; i++) {
sewardj9b967672005-02-08 11:13:09 +00003840 hregHI = hreg = INVALID_HREG;
sewardjc33671d2005-02-01 20:30:00 +00003841 switch (bb->tyenv->types[i]) {
3842 case Ity_I1:
3843 case Ity_I8:
3844 case Ity_I16:
3845 case Ity_I32:
sewardj9b967672005-02-08 11:13:09 +00003846 case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break;
3847 case Ity_I128: hreg = mkHReg(j++, HRcInt64, True);
3848 hregHI = mkHReg(j++, HRcInt64, True); break;
sewardjc33671d2005-02-01 20:30:00 +00003849 case Ity_F32:
sewardj18303862005-02-21 12:36:54 +00003850 case Ity_F64:
sewardj9b967672005-02-08 11:13:09 +00003851 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardjc33671d2005-02-01 20:30:00 +00003852 default: ppIRType(bb->tyenv->types[i]);
3853 vpanic("iselBB(amd64): IRTemp type");
3854 }
3855 env->vregmap[i] = hreg;
sewardj9b967672005-02-08 11:13:09 +00003856 env->vregmapHI[i] = hregHI;
sewardjc33671d2005-02-01 20:30:00 +00003857 }
3858 env->vreg_ctr = j;
3859
3860 /* Ok, finally we can iterate over the statements. */
3861 for (i = 0; i < bb->stmts_used; i++)
3862 if (bb->stmts[i])
3863 iselStmt(env,bb->stmts[i]);
3864
3865 iselNext(env,bb->next,bb->jumpkind);
3866
3867 /* record the number of vregs we used. */
3868 env->code->n_vregs = env->vreg_ctr;
3869 return env->code;
3870}
sewardja3e98302005-02-01 15:55:05 +00003871
3872
3873/*---------------------------------------------------------------*/
3874/*--- end host-amd64/isel.c ---*/
3875/*---------------------------------------------------------------*/