blob: e6955676ac5738711ee7868398aac410552454c7 [file] [log] [blame]
cerioncee30312004-12-17 20:30:21 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
cerioncee30312004-12-17 20:30:21 +00009
sewardje6c53e02011-10-23 07:33:43 +000010 Copyright (C) 2004-2011 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardj64733c42010-10-12 10:10:46 +000012
13 NEON support is
sewardje6c53e02011-10-23 07:33:43 +000014 Copyright (C) 2010-2011 Samsung Electronics
sewardj64733c42010-10-12 10:10:46 +000015 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
cerioncee30312004-12-17 20:30:21 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
cerioncee30312004-12-17 20:30:21 +000022
sewardj752f9062010-05-03 21:38:49 +000023 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000031 02110-1301, USA.
32
sewardj752f9062010-05-03 21:38:49 +000033 The GNU General Public License is contained in the file COPYING.
cerioncee30312004-12-17 20:30:21 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
sewardj6c60b322010-08-22 12:48:28 +000039#include "ir_match.h"
cerioncee30312004-12-17 20:30:21 +000040
sewardjcef7d3e2009-07-02 12:21:59 +000041#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
sewardje2ea1762010-09-22 00:56:37 +000044#include "host_generic_simd64.h" // for 32-bit SIMD helpers
sewardjcef7d3e2009-07-02 12:21:59 +000045#include "host_arm_defs.h"
cerioncee30312004-12-17 20:30:21 +000046
47
cerioncee30312004-12-17 20:30:21 +000048/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +000049/*--- ARMvfp control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
56
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +000066/*--- ISelEnv ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
74
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
80
81 - vregmap holds the primary register for the IRTemp.
sewardj6c299f32009-12-31 18:00:12 +000082 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
86
87 - The name of the vreg in which we stash a copy of the link reg, so
88 helper functions don't kill it.
cerioncee30312004-12-17 20:30:21 +000089
90 - The code array, that is, the insns selected so far.
91
92 - A counter, for generating new virtual registers.
93
sewardj6c299f32009-12-31 18:00:12 +000094 - The host hardware capabilities word. This is set at the start
95 and does not change.
96
cerioncee30312004-12-17 20:30:21 +000097 Note, this is all host-independent. */
98
99typedef
100 struct {
101 IRTypeEnv* type_env;
102
103 HReg* vregmap;
sewardj6c299f32009-12-31 18:00:12 +0000104 HReg* vregmapHI;
cerioncee30312004-12-17 20:30:21 +0000105 Int n_vregmap;
106
sewardj6c299f32009-12-31 18:00:12 +0000107 HReg savedLR;
108
cerioncee30312004-12-17 20:30:21 +0000109 HInstrArray* code;
110
111 Int vreg_ctr;
sewardj6c299f32009-12-31 18:00:12 +0000112
113 UInt hwcaps;
cerioncee30312004-12-17 20:30:21 +0000114 }
115 ISelEnv;
116
117static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
118{
119 vassert(tmp >= 0);
120 vassert(tmp < env->n_vregmap);
121 return env->vregmap[tmp];
122}
123
sewardj6c299f32009-12-31 18:00:12 +0000124static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
125{
126 vassert(tmp >= 0);
127 vassert(tmp < env->n_vregmap);
128 vassert(env->vregmapHI[tmp] != INVALID_HREG);
129 *vrLO = env->vregmap[tmp];
130 *vrHI = env->vregmapHI[tmp];
131}
132
cerioncee30312004-12-17 20:30:21 +0000133static void addInstr ( ISelEnv* env, ARMInstr* instr )
134{
135 addHInstr(env->code, instr);
136 if (vex_traceflags & VEX_TRACE_VCODE) {
137 ppARMInstr(instr);
138 vex_printf("\n");
139 }
sewardj6c60b322010-08-22 12:48:28 +0000140#if 0
141 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
142 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
143 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
144 ppARMInstr(instr);
145 vex_printf("\n");
146 }
147#endif
cerioncee30312004-12-17 20:30:21 +0000148}
149
150static HReg newVRegI ( ISelEnv* env )
151{
152 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
153 env->vreg_ctr++;
154 return reg;
155}
156
sewardj6c299f32009-12-31 18:00:12 +0000157static HReg newVRegD ( ISelEnv* env )
158{
159 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
160 env->vreg_ctr++;
161 return reg;
162}
163
164static HReg newVRegF ( ISelEnv* env )
165{
166 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
167 env->vreg_ctr++;
168 return reg;
169}
cerioncee30312004-12-17 20:30:21 +0000170
sewardj6c60b322010-08-22 12:48:28 +0000171static HReg newVRegV ( ISelEnv* env )
172{
173 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
174 env->vreg_ctr++;
175 return reg;
176}
177
178/* These are duplicated in guest_arm_toIR.c */
179static IRExpr* unop ( IROp op, IRExpr* a )
180{
181 return IRExpr_Unop(op, a);
182}
183
184static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
185{
186 return IRExpr_Binop(op, a1, a2);
187}
188
sewardj6c60b322010-08-22 12:48:28 +0000189static IRExpr* bind ( Int binder )
190{
191 return IRExpr_Binder(binder);
192}
193
cerioncee30312004-12-17 20:30:21 +0000194
195/*---------------------------------------------------------*/
196/*--- ISEL: Forward declarations ---*/
197/*---------------------------------------------------------*/
198
199/* These are organised as iselXXX and iselXXX_wrk pairs. The
200 iselXXX_wrk do the real work, but are not to be called directly.
201 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
202 checks that all returned registers are virtual. You should not
203 call the _wrk version directly.
204*/
sewardj6c299f32009-12-31 18:00:12 +0000205static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
206static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000207
sewardj6c299f32009-12-31 18:00:12 +0000208static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
209static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000210
sewardj6c299f32009-12-31 18:00:12 +0000211static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
212static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000213
sewardjff7f5b72011-07-11 11:43:38 +0000214static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
215static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
sewardj6c60b322010-08-22 12:48:28 +0000216
sewardj6c299f32009-12-31 18:00:12 +0000217static ARMRI84* iselIntExpr_RI84_wrk
218 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
219static ARMRI84* iselIntExpr_RI84
220 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000221
sewardj6c299f32009-12-31 18:00:12 +0000222static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
223static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000224
sewardj6c299f32009-12-31 18:00:12 +0000225static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
226static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000227
sewardj6c299f32009-12-31 18:00:12 +0000228static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
230
231static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
232 ISelEnv* env, IRExpr* e );
233static void iselInt64Expr ( HReg* rHi, HReg* rLo,
234 ISelEnv* env, IRExpr* e );
235
236static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
237static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
238
239static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
240static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000241
sewardj6c60b322010-08-22 12:48:28 +0000242static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
244
245static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
246static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000247
248/*---------------------------------------------------------*/
249/*--- ISEL: Misc helpers ---*/
250/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000251
252static UInt ROR32 ( UInt x, UInt sh ) {
253 vassert(sh >= 0 && sh < 32);
254 if (sh == 0)
255 return x;
256 else
257 return (x << (32-sh)) | (x >> sh);
cerioncee30312004-12-17 20:30:21 +0000258}
sewardj6c299f32009-12-31 18:00:12 +0000259
260/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
261 form, and if so return the components. */
262static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
263{
264 UInt i;
265 for (i = 0; i < 16; i++) {
266 if (0 == (u & 0xFFFFFF00)) {
267 *u8 = u;
268 *u4 = i;
269 return True;
270 }
271 u = ROR32(u, 30);
272 }
273 vassert(i == 16);
274 return False;
275}
cerioncee30312004-12-17 20:30:21 +0000276
277/* Make a int reg-reg move. */
sewardj6c299f32009-12-31 18:00:12 +0000278static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
cerioncee30312004-12-17 20:30:21 +0000279{
280 vassert(hregClass(src) == HRcInt32);
281 vassert(hregClass(dst) == HRcInt32);
sewardj6c299f32009-12-31 18:00:12 +0000282 return ARMInstr_Mov(dst, ARMRI84_R(src));
cerioncee30312004-12-17 20:30:21 +0000283}
284
sewardj6c299f32009-12-31 18:00:12 +0000285/* Set the VFP unit's rounding mode to default (round to nearest). */
286static void set_VFP_rounding_default ( ISelEnv* env )
cerioncee30312004-12-17 20:30:21 +0000287{
sewardj6c299f32009-12-31 18:00:12 +0000288 /* mov rTmp, #DEFAULT_FPSCR
289 fmxr fpscr, rTmp
290 */
291 HReg rTmp = newVRegI(env);
292 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
293 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
cerioncee30312004-12-17 20:30:21 +0000294}
295
sewardj6c299f32009-12-31 18:00:12 +0000296/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
297 expression denoting a value in the range 0 .. 3, indicating a round
298 mode encoded as per type IRRoundingMode. Set FPSCR to have the
299 same rounding.
300*/
301static
302void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
cerioncee30312004-12-17 20:30:21 +0000303{
sewardj6c299f32009-12-31 18:00:12 +0000304 /* This isn't simple, because 'mode' carries an IR rounding
305 encoding, and we need to translate that to an ARMvfp one:
306 The IR encoding:
307 00 to nearest (the default)
308 10 to +infinity
309 01 to -infinity
310 11 to zero
311 The ARMvfp encoding:
312 00 to nearest
313 01 to +infinity
314 10 to -infinity
315 11 to zero
316 Easy enough to do; just swap the two bits.
317 */
318 HReg irrm = iselIntExpr_R(env, mode);
319 HReg tL = newVRegI(env);
320 HReg tR = newVRegI(env);
321 HReg t3 = newVRegI(env);
322 /* tL = irrm << 1;
323 tR = irrm >> 1; if we're lucky, these will issue together
324 tL &= 2;
325 tR &= 1; ditto
326 t3 = tL | tR;
327 t3 <<= 22;
328 fmxr fpscr, t3
329 */
330 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
331 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
332 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
333 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
334 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
335 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
336 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
cerioncee30312004-12-17 20:30:21 +0000337}
cerioncee30312004-12-17 20:30:21 +0000338
cerioncee30312004-12-17 20:30:21 +0000339
sewardj6c299f32009-12-31 18:00:12 +0000340/*---------------------------------------------------------*/
341/*--- ISEL: Function call helpers ---*/
342/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +0000343
cerioncee30312004-12-17 20:30:21 +0000344/* Used only in doHelperCall. See big comment in doHelperCall re
sewardj6c299f32009-12-31 18:00:12 +0000345 handling of register-parameter args. This function figures out
346 whether evaluation of an expression might require use of a fixed
347 register. If in doubt return True (safe but suboptimal).
cerioncee30312004-12-17 20:30:21 +0000348*/
349static
350Bool mightRequireFixedRegs ( IRExpr* e )
351{
352 switch (e->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000353 case Iex_RdTmp: case Iex_Const: case Iex_Get:
354 return False;
355 default:
356 return True;
cerioncee30312004-12-17 20:30:21 +0000357 }
358}
sewardj6c299f32009-12-31 18:00:12 +0000359
cerioncee30312004-12-17 20:30:21 +0000360
361/* Do a complete function call. guard is a Ity_Bit expression
362 indicating whether or not the call happens. If guard==NULL, the
sewardj6c299f32009-12-31 18:00:12 +0000363 call is unconditional. Returns True iff it managed to handle this
364 combination of arg/return types, else returns False. */
cerioncee30312004-12-17 20:30:21 +0000365
366static
sewardj6c299f32009-12-31 18:00:12 +0000367Bool doHelperCall ( ISelEnv* env,
368 Bool passBBP,
cerioncee30312004-12-17 20:30:21 +0000369 IRExpr* guard, IRCallee* cee, IRExpr** args )
370{
cerioncee30312004-12-17 20:30:21 +0000371 ARMCondCode cc;
sewardj6c299f32009-12-31 18:00:12 +0000372 HReg argregs[ARM_N_ARGREGS];
373 HReg tmpregs[ARM_N_ARGREGS];
374 Bool go_fast;
375 Int n_args, i, nextArgReg;
376 ULong target;
cerioncee30312004-12-17 20:30:21 +0000377
sewardj6c299f32009-12-31 18:00:12 +0000378 vassert(ARM_N_ARGREGS == 4);
cerioncee30312004-12-17 20:30:21 +0000379
sewardj6c299f32009-12-31 18:00:12 +0000380 /* Marshal args for a call and do the call.
cerioncee30312004-12-17 20:30:21 +0000381
sewardj6c299f32009-12-31 18:00:12 +0000382 If passBBP is True, r8 (the baseblock pointer) is to be passed
383 as the first arg.
cerioncee30312004-12-17 20:30:21 +0000384
sewardj6c299f32009-12-31 18:00:12 +0000385 This function only deals with a tiny set of possibilities, which
386 cover all helpers in practice. The restrictions are that only
387 arguments in registers are supported, hence only ARM_N_REGPARMS
388 x 32 integer bits in total can be passed. In fact the only
389 supported arg types are I32 and I64.
cerioncee30312004-12-17 20:30:21 +0000390
sewardj6c299f32009-12-31 18:00:12 +0000391 Generating code which is both efficient and correct when
392 parameters are to be passed in registers is difficult, for the
393 reasons elaborated in detail in comments attached to
394 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
395 of the method described in those comments.
cerioncee30312004-12-17 20:30:21 +0000396
sewardj6c299f32009-12-31 18:00:12 +0000397 The problem is split into two cases: the fast scheme and the
398 slow scheme. In the fast scheme, arguments are computed
399 directly into the target (real) registers. This is only safe
400 when we can be sure that computation of each argument will not
401 trash any real registers set by computation of any other
402 argument.
cerioncee30312004-12-17 20:30:21 +0000403
sewardj6c299f32009-12-31 18:00:12 +0000404 In the slow scheme, all args are first computed into vregs, and
405 once they are all done, they are moved to the relevant real
406 regs. This always gives correct code, but it also gives a bunch
407 of vreg-to-rreg moves which are usually redundant but are hard
408 for the register allocator to get rid of.
409
410 To decide which scheme to use, all argument expressions are
411 first examined. If they are all so simple that it is clear they
412 will be evaluated without use of any fixed registers, use the
413 fast scheme, else use the slow scheme. Note also that only
414 unconditional calls may use the fast scheme, since having to
415 compute a condition expression could itself trash real
416 registers.
cerioncee30312004-12-17 20:30:21 +0000417
418 Note this requires being able to examine an expression and
419 determine whether or not evaluation of it might use a fixed
sewardj6c299f32009-12-31 18:00:12 +0000420 register. That requires knowledge of how the rest of this insn
421 selector works. Currently just the following 3 are regarded as
422 safe -- hopefully they cover the majority of arguments in
423 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
cerioncee30312004-12-17 20:30:21 +0000424 */
cerioncee30312004-12-17 20:30:21 +0000425
sewardj6c299f32009-12-31 18:00:12 +0000426 /* Note that the cee->regparms field is meaningless on ARM hosts
427 (since there is only one calling convention) and so we always
428 ignore it. */
cerioncee30312004-12-17 20:30:21 +0000429
sewardj6c299f32009-12-31 18:00:12 +0000430 n_args = 0;
431 for (i = 0; args[i]; i++)
432 n_args++;
cerioncee30312004-12-17 20:30:21 +0000433
sewardj6c299f32009-12-31 18:00:12 +0000434 argregs[0] = hregARM_R0();
435 argregs[1] = hregARM_R1();
436 argregs[2] = hregARM_R2();
437 argregs[3] = hregARM_R3();
cerioncee30312004-12-17 20:30:21 +0000438
sewardj6c299f32009-12-31 18:00:12 +0000439 tmpregs[0] = tmpregs[1] = tmpregs[2] =
440 tmpregs[3] = INVALID_HREG;
cerioncee30312004-12-17 20:30:21 +0000441
sewardj6c299f32009-12-31 18:00:12 +0000442 /* First decide which scheme (slow or fast) is to be used. First
443 assume the fast scheme, and select slow if any contraindications
444 (wow) appear. */
445
446 go_fast = True;
447
448 if (guard) {
449 if (guard->tag == Iex_Const
450 && guard->Iex.Const.con->tag == Ico_U1
451 && guard->Iex.Const.con->Ico.U1 == True) {
452 /* unconditional */
453 } else {
454 /* Not manifestly unconditional -- be conservative. */
455 go_fast = False;
456 }
cerioncee30312004-12-17 20:30:21 +0000457 }
458
sewardj6c299f32009-12-31 18:00:12 +0000459 if (go_fast) {
460 for (i = 0; i < n_args; i++) {
cerioncee30312004-12-17 20:30:21 +0000461 if (mightRequireFixedRegs(args[i])) {
sewardj6c299f32009-12-31 18:00:12 +0000462 go_fast = False;
cerioncee30312004-12-17 20:30:21 +0000463 break;
464 }
465 }
sewardj6c299f32009-12-31 18:00:12 +0000466 }
467 /* At this point the scheme to use has been established. Generate
468 code to get the arg values into the argument rregs. If we run
469 out of arg regs, give up. */
cerioncee30312004-12-17 20:30:21 +0000470
sewardj6c299f32009-12-31 18:00:12 +0000471 if (go_fast) {
cerioncee30312004-12-17 20:30:21 +0000472
sewardj6c299f32009-12-31 18:00:12 +0000473 /* FAST SCHEME */
474 nextArgReg = 0;
cerioncee30312004-12-17 20:30:21 +0000475 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000476 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
477 hregARM_R8() ));
478 nextArgReg++;
cerioncee30312004-12-17 20:30:21 +0000479 }
480
sewardj6c299f32009-12-31 18:00:12 +0000481 for (i = 0; i < n_args; i++) {
482 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
483 if (nextArgReg >= ARM_N_ARGREGS)
484 return False; /* out of argregs */
485 if (aTy == Ity_I32) {
486 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
487 iselIntExpr_R(env, args[i]) ));
488 nextArgReg++;
489 }
490 else if (aTy == Ity_I64) {
491 /* 64-bit args must be passed in an a reg-pair of the form
492 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
493 On a little-endian host, the less significant word is
494 passed in the lower-numbered register. */
495 if (nextArgReg & 1) {
496 if (nextArgReg >= ARM_N_ARGREGS)
497 return False; /* out of argregs */
498 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
499 nextArgReg++;
500 }
501 if (nextArgReg >= ARM_N_ARGREGS)
502 return False; /* out of argregs */
503 HReg raHi, raLo;
504 iselInt64Expr(&raHi, &raLo, env, args[i]);
505 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
506 nextArgReg++;
507 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
508 nextArgReg++;
509 }
510 else
511 return False; /* unhandled arg type */
512 }
513
514 /* Fast scheme only applies for unconditional calls. Hence: */
515 cc = ARMcc_AL;
cerioncee30312004-12-17 20:30:21 +0000516
517 } else {
518
sewardj6c299f32009-12-31 18:00:12 +0000519 /* SLOW SCHEME; move via temporaries */
520 nextArgReg = 0;
521
cerioncee30312004-12-17 20:30:21 +0000522 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000523 /* This is pretty stupid; better to move directly to r0
524 after the rest of the args are done. */
525 tmpregs[nextArgReg] = newVRegI(env);
526 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
527 hregARM_R8() ));
528 nextArgReg++;
529 }
530
531 for (i = 0; i < n_args; i++) {
532 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
533 if (nextArgReg >= ARM_N_ARGREGS)
534 return False; /* out of argregs */
535 if (aTy == Ity_I32) {
536 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
537 nextArgReg++;
538 }
539 else if (aTy == Ity_I64) {
540 /* Same comment applies as in the Fast-scheme case. */
541 if (nextArgReg & 1)
542 nextArgReg++;
543 if (nextArgReg + 1 >= ARM_N_ARGREGS)
544 return False; /* out of argregs */
545 HReg raHi, raLo;
546 iselInt64Expr(&raHi, &raLo, env, args[i]);
547 tmpregs[nextArgReg] = raLo;
548 nextArgReg++;
549 tmpregs[nextArgReg] = raHi;
550 nextArgReg++;
551 }
552 }
553
554 /* Now we can compute the condition. We can't do it earlier
555 because the argument computations could trash the condition
556 codes. Be a bit clever to handle the common case where the
557 guard is 1:Bit. */
558 cc = ARMcc_AL;
559 if (guard) {
560 if (guard->tag == Iex_Const
561 && guard->Iex.Const.con->tag == Ico_U1
562 && guard->Iex.Const.con->Ico.U1 == True) {
563 /* unconditional -- do nothing */
564 } else {
565 cc = iselCondCode( env, guard );
566 }
567 }
568
569 /* Move the args to their final destinations. */
570 for (i = 0; i < nextArgReg; i++) {
571 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
572 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
573 continue;
574 }
575 /* None of these insns, including any spill code that might
576 be generated, may alter the condition codes. */
577 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
cerioncee30312004-12-17 20:30:21 +0000578 }
579
580 }
581
sewardj6c299f32009-12-31 18:00:12 +0000582 /* Should be assured by checks above */
583 vassert(nextArgReg <= ARM_N_ARGREGS);
cerioncee30312004-12-17 20:30:21 +0000584
sewardj6c299f32009-12-31 18:00:12 +0000585 target = (HWord)Ptr_to_ULong(cee->addr);
cerioncee30312004-12-17 20:30:21 +0000586
sewardj6c299f32009-12-31 18:00:12 +0000587 /* nextArgReg doles out argument registers. Since these are
588 assigned in the order r0, r1, r2, r3, its numeric value at this
589 point, which must be between 0 and 4 inclusive, is going to be
590 equal to the number of arg regs in use for the call. Hence bake
591 that number into the call (we'll need to know it when doing
592 register allocation, to know what regs the call reads.)
cerioncee30312004-12-17 20:30:21 +0000593
sewardj6c299f32009-12-31 18:00:12 +0000594 There is a bit of a twist -- harmless but worth recording.
595 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
596 the first arg in r0 and the second in r3:r2, but r1 isn't used.
597 We nevertheless have nextArgReg==4 and bake that into the call
598 instruction. This will mean the register allocator wil believe
599 this insn reads r1 when in fact it doesn't. But that's
600 harmless; it just artificially extends the live range of r1
601 unnecessarily. The best fix would be to put into the
602 instruction, a bitmask indicating which of r0/1/2/3 carry live
603 values. But that's too much hassle. */
cerioncee30312004-12-17 20:30:21 +0000604
sewardj6c299f32009-12-31 18:00:12 +0000605 /* Finally, the call itself. */
606 addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
cerioncee30312004-12-17 20:30:21 +0000607
sewardj6c299f32009-12-31 18:00:12 +0000608 return True; /* success */
cerioncee30312004-12-17 20:30:21 +0000609}
610
611
612/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000613/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
cerioncee30312004-12-17 20:30:21 +0000614/*---------------------------------------------------------*/
615
sewardj6c299f32009-12-31 18:00:12 +0000616/* Select insns for an integer-typed expression, and add them to the
617 code list. Return a reg holding the result. This reg will be a
618 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
619 want to modify it, ask for a new vreg, copy it in there, and modify
620 the copy. The register allocator will do its best to map both
621 vregs to the same real register, so the copies will often disappear
622 later in the game.
cerioncee30312004-12-17 20:30:21 +0000623
sewardj6c299f32009-12-31 18:00:12 +0000624 This should handle expressions of 32, 16 and 8-bit type. All
625 results are returned in a 32-bit register. For 16- and 8-bit
626 expressions, the upper 16/24 bits are arbitrary, so you should mask
627 or sign extend partial values if necessary.
cerioncee30312004-12-17 20:30:21 +0000628*/
629
sewardj6c299f32009-12-31 18:00:12 +0000630/* --------------------- AMode1 --------------------- */
631
632/* Return an AMode1 which computes the value of the specified
633 expression, possibly also adding insns to the code list as a
634 result. The expression may only be a 32-bit one.
635*/
cerioncee30312004-12-17 20:30:21 +0000636
637static Bool sane_AMode1 ( ARMAMode1* am )
638{
sewardj6c299f32009-12-31 18:00:12 +0000639 switch (am->tag) {
640 case ARMam1_RI:
641 return
642 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
643 && (hregIsVirtual(am->ARMam1.RI.reg)
644 || am->ARMam1.RI.reg == hregARM_R8())
645 && am->ARMam1.RI.simm13 >= -4095
646 && am->ARMam1.RI.simm13 <= 4095 );
647 case ARMam1_RRS:
648 return
649 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
650 && hregIsVirtual(am->ARMam1.RRS.base)
651 && hregClass(am->ARMam1.RRS.index) == HRcInt32
652 && hregIsVirtual(am->ARMam1.RRS.index)
653 && am->ARMam1.RRS.shift >= 0
654 && am->ARMam1.RRS.shift <= 3 );
655 default:
656 vpanic("sane_AMode: unknown ARM AMode1 tag");
657 }
cerioncee30312004-12-17 20:30:21 +0000658}
659
660static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
661{
sewardj6c299f32009-12-31 18:00:12 +0000662 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
663 vassert(sane_AMode1(am));
664 return am;
cerioncee30312004-12-17 20:30:21 +0000665}
666
cerioncee30312004-12-17 20:30:21 +0000667static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
668{
sewardj6c299f32009-12-31 18:00:12 +0000669 IRType ty = typeOfIRExpr(env->type_env,e);
670 vassert(ty == Ity_I32);
cerioncee30312004-12-17 20:30:21 +0000671
sewardj6c299f32009-12-31 18:00:12 +0000672 /* FIXME: add RRS matching */
cerioncee30312004-12-17 20:30:21 +0000673
sewardj6c299f32009-12-31 18:00:12 +0000674 /* {Add32,Sub32}(expr,simm13) */
675 if (e->tag == Iex_Binop
676 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
677 && e->Iex.Binop.arg2->tag == Iex_Const
678 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
679 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
680 if (simm >= -4095 && simm <= 4095) {
681 HReg reg;
682 if (e->Iex.Binop.op == Iop_Sub32)
683 simm = -simm;
684 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
685 return ARMAMode1_RI(reg, simm);
686 }
687 }
cerioncee30312004-12-17 20:30:21 +0000688
sewardj6c299f32009-12-31 18:00:12 +0000689 /* Doesn't match anything in particular. Generate it into
690 a register and use that. */
691 {
692 HReg reg = iselIntExpr_R(env, e);
693 return ARMAMode1_RI(reg, 0);
694 }
695
cerioncee30312004-12-17 20:30:21 +0000696}
697
698
sewardj6c299f32009-12-31 18:00:12 +0000699/* --------------------- AMode2 --------------------- */
cerioncee30312004-12-17 20:30:21 +0000700
sewardj6c299f32009-12-31 18:00:12 +0000701/* Return an AMode2 which computes the value of the specified
702 expression, possibly also adding insns to the code list as a
703 result. The expression may only be a 32-bit one.
704*/
cerioncee30312004-12-17 20:30:21 +0000705
706static Bool sane_AMode2 ( ARMAMode2* am )
707{
708 switch (am->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000709 case ARMam2_RI:
710 return
711 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
712 && hregIsVirtual(am->ARMam2.RI.reg)
713 && am->ARMam2.RI.simm9 >= -255
714 && am->ARMam2.RI.simm9 <= 255 );
715 case ARMam2_RR:
716 return
717 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
718 && hregIsVirtual(am->ARMam2.RR.base)
719 && hregClass(am->ARMam2.RR.index) == HRcInt32
720 && hregIsVirtual(am->ARMam2.RR.index) );
721 default:
722 vpanic("sane_AMode: unknown ARM AMode2 tag");
cerioncee30312004-12-17 20:30:21 +0000723 }
724}
725
sewardj6c299f32009-12-31 18:00:12 +0000726static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
727{
728 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
729 vassert(sane_AMode2(am));
730 return am;
731}
732
cerioncee30312004-12-17 20:30:21 +0000733static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
734{
sewardj6c299f32009-12-31 18:00:12 +0000735 IRType ty = typeOfIRExpr(env->type_env,e);
736 vassert(ty == Ity_I32);
737
738 /* FIXME: add RR matching */
739
740 /* {Add32,Sub32}(expr,simm8) */
741 if (e->tag == Iex_Binop
742 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
743 && e->Iex.Binop.arg2->tag == Iex_Const
744 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
745 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
746 if (simm >= -255 && simm <= 255) {
747 HReg reg;
748 if (e->Iex.Binop.op == Iop_Sub32)
749 simm = -simm;
750 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
751 return ARMAMode2_RI(reg, simm);
752 }
753 }
754
755 /* Doesn't match anything in particular. Generate it into
756 a register and use that. */
757 {
758 HReg reg = iselIntExpr_R(env, e);
759 return ARMAMode2_RI(reg, 0);
760 }
761
cerioncee30312004-12-17 20:30:21 +0000762}
sewardj6c299f32009-12-31 18:00:12 +0000763
764
765/* --------------------- AModeV --------------------- */
766
767/* Return an AModeV which computes the value of the specified
768 expression, possibly also adding insns to the code list as a
769 result. The expression may only be a 32-bit one.
sewardj48b279b2007-11-16 12:43:32 +0000770*/
cerioncee30312004-12-17 20:30:21 +0000771
sewardj6c299f32009-12-31 18:00:12 +0000772static Bool sane_AModeV ( ARMAModeV* am )
773{
774 return toBool( hregClass(am->reg) == HRcInt32
775 && hregIsVirtual(am->reg)
776 && am->simm11 >= -1020 && am->simm11 <= 1020
777 && 0 == (am->simm11 & 3) );
cerioncee30312004-12-17 20:30:21 +0000778}
779
sewardj6c299f32009-12-31 18:00:12 +0000780static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000781{
sewardj6c299f32009-12-31 18:00:12 +0000782 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
783 vassert(sane_AModeV(am));
784 return am;
785}
786
787static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
788{
789 IRType ty = typeOfIRExpr(env->type_env,e);
790 vassert(ty == Ity_I32);
791
792 /* {Add32,Sub32}(expr, simm8 << 2) */
793 if (e->tag == Iex_Binop
794 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
795 && e->Iex.Binop.arg2->tag == Iex_Const
796 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
797 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
798 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
799 HReg reg;
800 if (e->Iex.Binop.op == Iop_Sub32)
801 simm = -simm;
802 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
803 return mkARMAModeV(reg, simm);
804 }
cerioncee30312004-12-17 20:30:21 +0000805 }
sewardj6c299f32009-12-31 18:00:12 +0000806
807 /* Doesn't match anything in particular. Generate it into
808 a register and use that. */
809 {
810 HReg reg = iselIntExpr_R(env, e);
811 return mkARMAModeV(reg, 0);
812 }
813
cerioncee30312004-12-17 20:30:21 +0000814}
815
sewardj6c60b322010-08-22 12:48:28 +0000816/* -------------------- AModeN -------------------- */
817
818static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
819{
820 return iselIntExpr_AModeN_wrk(env, e);
821}
822
823static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
824{
825 HReg reg = iselIntExpr_R(env, e);
826 return mkARMAModeN_R(reg);
827}
828
sewardj6c299f32009-12-31 18:00:12 +0000829
830/* --------------------- RI84 --------------------- */
831
832/* Select instructions to generate 'e' into a RI84. If mayInv is
833 true, then the caller will also accept an I84 form that denotes
834 'not e'. In this case didInv may not be NULL, and *didInv is set
835 to True. This complication is so as to allow generation of an RI84
836 which is suitable for use in either an AND or BIC instruction,
837 without knowing (before this call) which one.
838*/
839static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
840 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000841{
sewardj6c299f32009-12-31 18:00:12 +0000842 ARMRI84* ri;
843 if (mayInv)
844 vassert(didInv != NULL);
845 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
846 /* sanity checks ... */
847 switch (ri->tag) {
848 case ARMri84_I84:
849 return ri;
850 case ARMri84_R:
851 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
852 vassert(hregIsVirtual(ri->ARMri84.R.reg));
853 return ri;
854 default:
855 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
cerioncee30312004-12-17 20:30:21 +0000856 }
857}
858
859/* DO NOT CALL THIS DIRECTLY ! */
sewardj6c299f32009-12-31 18:00:12 +0000860static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
861 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000862{
sewardj6c299f32009-12-31 18:00:12 +0000863 IRType ty = typeOfIRExpr(env->type_env,e);
864 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
cerioncee30312004-12-17 20:30:21 +0000865
sewardj6c299f32009-12-31 18:00:12 +0000866 if (didInv) *didInv = False;
867
868 /* special case: immediate */
869 if (e->tag == Iex_Const) {
870 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
871 switch (e->Iex.Const.con->tag) {
872 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
873 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
874 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
875 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
876 }
877 if (fitsIn8x4(&u8, &u4, u)) {
878 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
879 }
880 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
881 vassert(didInv);
882 *didInv = True;
883 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
884 }
885 /* else fail, fall through to default case */
886 }
887
888 /* default case: calculate into a register and return that */
889 {
890 HReg r = iselIntExpr_R ( env, e );
891 return ARMRI84_R(r);
892 }
cerioncee30312004-12-17 20:30:21 +0000893}
894
895
sewardj6c299f32009-12-31 18:00:12 +0000896/* --------------------- RI5 --------------------- */
897
898/* Select instructions to generate 'e' into a RI5. */
899
900static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
901{
902 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
903 /* sanity checks ... */
904 switch (ri->tag) {
905 case ARMri5_I5:
906 return ri;
907 case ARMri5_R:
908 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
909 vassert(hregIsVirtual(ri->ARMri5.R.reg));
910 return ri;
911 default:
912 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
913 }
914}
915
916/* DO NOT CALL THIS DIRECTLY ! */
917static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
918{
919 IRType ty = typeOfIRExpr(env->type_env,e);
920 vassert(ty == Ity_I32 || ty == Ity_I8);
921
922 /* special case: immediate */
923 if (e->tag == Iex_Const) {
924 UInt u; /* both invalid */
925 switch (e->Iex.Const.con->tag) {
926 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
927 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
928 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
929 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
930 }
931 if (u >= 1 && u <= 31) {
932 return ARMRI5_I5(u);
933 }
934 /* else fail, fall through to default case */
935 }
936
937 /* default case: calculate into a register and return that */
938 {
939 HReg r = iselIntExpr_R ( env, e );
940 return ARMRI5_R(r);
941 }
942}
cerioncee30312004-12-17 20:30:21 +0000943
944
sewardj6c299f32009-12-31 18:00:12 +0000945/* ------------------- CondCode ------------------- */
cerioncee30312004-12-17 20:30:21 +0000946
947/* Generate code to evaluated a bit-typed expression, returning the
948 condition code which would correspond when the expression would
949 notionally have returned 1. */
950
951static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
952{
sewardj6c299f32009-12-31 18:00:12 +0000953 ARMCondCode cc = iselCondCode_wrk(env,e);
sewardj6c60b322010-08-22 12:48:28 +0000954 vassert(cc != ARMcc_NV);
sewardj6c299f32009-12-31 18:00:12 +0000955 return cc;
cerioncee30312004-12-17 20:30:21 +0000956}
957
cerioncee30312004-12-17 20:30:21 +0000958static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
959{
sewardj6c299f32009-12-31 18:00:12 +0000960 vassert(e);
961 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
cerioncee30312004-12-17 20:30:21 +0000962
sewardj6c299f32009-12-31 18:00:12 +0000963 /* var */
964 if (e->tag == Iex_RdTmp) {
965 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
966 /* CmpOrTst doesn't modify rTmp; so this is OK. */
967 ARMRI84* one = ARMRI84_I84(1,0);
968 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
969 return ARMcc_NE;
970 }
971
972 /* Not1(e) */
973 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
974 /* Generate code for the arg, and negate the test condition */
975 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
976 }
977
978 /* --- patterns rooted at: 32to1 --- */
979
980 if (e->tag == Iex_Unop
981 && e->Iex.Unop.op == Iop_32to1) {
982 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
983 ARMRI84* one = ARMRI84_I84(1,0);
984 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
985 return ARMcc_NE;
986 }
987
988 /* --- patterns rooted at: CmpNEZ8 --- */
989
990 if (e->tag == Iex_Unop
991 && e->Iex.Unop.op == Iop_CmpNEZ8) {
992 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
993 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
994 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
995 return ARMcc_NE;
996 }
997
998 /* --- patterns rooted at: CmpNEZ32 --- */
999
1000 if (e->tag == Iex_Unop
1001 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1002 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1003 ARMRI84* zero = ARMRI84_I84(0,0);
1004 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1005 return ARMcc_NE;
1006 }
1007
1008 /* --- patterns rooted at: CmpNEZ64 --- */
1009
1010 if (e->tag == Iex_Unop
1011 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1012 HReg tHi, tLo;
1013 HReg tmp = newVRegI(env);
1014 ARMRI84* zero = ARMRI84_I84(0,0);
1015 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1016 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1017 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1018 return ARMcc_NE;
1019 }
1020
1021 /* --- Cmp*32*(x,y) --- */
1022 if (e->tag == Iex_Binop
1023 && (e->Iex.Binop.op == Iop_CmpEQ32
1024 || e->Iex.Binop.op == Iop_CmpNE32
1025 || e->Iex.Binop.op == Iop_CmpLT32S
1026 || e->Iex.Binop.op == Iop_CmpLT32U
1027 || e->Iex.Binop.op == Iop_CmpLE32S
1028 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1029 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1030 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1031 env, e->Iex.Binop.arg2);
1032 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1033 switch (e->Iex.Binop.op) {
1034 case Iop_CmpEQ32: return ARMcc_EQ;
1035 case Iop_CmpNE32: return ARMcc_NE;
1036 case Iop_CmpLT32S: return ARMcc_LT;
1037 case Iop_CmpLT32U: return ARMcc_LO;
1038 case Iop_CmpLE32S: return ARMcc_LE;
1039 case Iop_CmpLE32U: return ARMcc_LS;
1040 default: vpanic("iselCondCode(arm): CmpXX32");
1041 }
1042 }
1043
sewardj6c60b322010-08-22 12:48:28 +00001044 /* --- CasCmpEQ* --- */
1045 /* Ist_Cas has a dummy argument to compare with, so comparison is
1046 always true. */
1047 if (e->tag == Iex_Binop
1048 && (e->Iex.Binop.op == Iop_CasCmpEQ32
1049 || e->Iex.Binop.op == Iop_CasCmpEQ16
1050 || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1051 return ARMcc_AL;
1052 }
1053
sewardj6c299f32009-12-31 18:00:12 +00001054 ppIRExpr(e);
1055 vpanic("iselCondCode");
cerioncee30312004-12-17 20:30:21 +00001056}
1057
1058
sewardj6c299f32009-12-31 18:00:12 +00001059/* --------------------- Reg --------------------- */
cerioncee30312004-12-17 20:30:21 +00001060
1061static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1062{
sewardj6c299f32009-12-31 18:00:12 +00001063 HReg r = iselIntExpr_R_wrk(env, e);
1064 /* sanity checks ... */
1065# if 0
1066 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1067# endif
1068 vassert(hregClass(r) == HRcInt32);
1069 vassert(hregIsVirtual(r));
1070 return r;
cerioncee30312004-12-17 20:30:21 +00001071}
1072
1073/* DO NOT CALL THIS DIRECTLY ! */
1074static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1075{
sewardj6c299f32009-12-31 18:00:12 +00001076 IRType ty = typeOfIRExpr(env->type_env,e);
1077 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c60b322010-08-22 12:48:28 +00001078// vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c299f32009-12-31 18:00:12 +00001079
1080 switch (e->tag) {
1081
1082 /* --------- TEMP --------- */
1083 case Iex_RdTmp: {
1084 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1085 }
1086
1087 /* --------- LOAD --------- */
1088 case Iex_Load: {
1089 HReg dst = newVRegI(env);
1090
1091 if (e->Iex.Load.end != Iend_LE)
1092 goto irreducible;
1093
1094 if (ty == Ity_I32) {
1095 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1096 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1097 return dst;
1098 }
1099 if (ty == Ity_I16) {
1100 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1101 addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1102 dst, amode));
1103 return dst;
1104 }
1105 if (ty == Ity_I8) {
1106 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1107 addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1108 return dst;
1109 }
1110
1111//zz if (ty == Ity_I16) {
1112//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1113//zz return dst;
1114//zz }
1115//zz if (ty == Ity_I8) {
1116//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1117//zz return dst;
1118//zz }
1119 break;
1120 }
1121
1122//zz /* --------- TERNARY OP --------- */
1123//zz case Iex_Triop: {
1124//zz /* C3210 flags following FPU partial remainder (fprem), both
1125//zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1126//zz if (e->Iex.Triop.op == Iop_PRemC3210F64
1127//zz || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1128//zz HReg junk = newVRegF(env);
1129//zz HReg dst = newVRegI(env);
1130//zz HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
1131//zz HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
1132//zz /* XXXROUNDINGFIXME */
1133//zz /* set roundingmode here */
1134//zz addInstr(env, X86Instr_FpBinary(
1135//zz e->Iex.Binop.op==Iop_PRemC3210F64
1136//zz ? Xfp_PREM : Xfp_PREM1,
1137//zz srcL,srcR,junk
1138//zz ));
1139//zz /* The previous pseudo-insn will have left the FPU's C3210
1140//zz flags set correctly. So bag them. */
1141//zz addInstr(env, X86Instr_FpStSW_AX());
1142//zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1143//zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1144//zz return dst;
1145//zz }
1146//zz
1147//zz break;
1148//zz }
1149
1150 /* --------- BINARY OP --------- */
1151 case Iex_Binop: {
1152
1153 ARMAluOp aop = 0; /* invalid */
1154 ARMShiftOp sop = 0; /* invalid */
1155
1156 /* ADD/SUB/AND/OR/XOR */
1157 switch (e->Iex.Binop.op) {
1158 case Iop_And32: {
1159 Bool didInv = False;
1160 HReg dst = newVRegI(env);
1161 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1162 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1163 env, e->Iex.Binop.arg2);
1164 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1165 dst, argL, argR));
1166 return dst;
1167 }
1168 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1169 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1170 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1171 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1172 std_binop: {
1173 HReg dst = newVRegI(env);
1174 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1176 env, e->Iex.Binop.arg2);
1177 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1178 return dst;
1179 }
1180 default: break;
1181 }
1182
1183 /* SHL/SHR/SAR */
1184 switch (e->Iex.Binop.op) {
1185 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1186 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1187 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1188 sh_binop: {
1189 HReg dst = newVRegI(env);
1190 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1191 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1192 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1193 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1194 return dst;
1195 }
1196 default: break;
1197 }
1198
1199 /* MUL */
1200 if (e->Iex.Binop.op == Iop_Mul32) {
1201 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1202 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1203 HReg dst = newVRegI(env);
1204 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1205 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1206 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1207 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1208 return dst;
1209 }
1210
1211 /* Handle misc other ops. */
1212
1213 if (e->Iex.Binop.op == Iop_Max32U) {
1214 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1215 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1216 HReg dst = newVRegI(env);
sewardj6c60b322010-08-22 12:48:28 +00001217 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1218 ARMRI84_R(argR)));
sewardj6c299f32009-12-31 18:00:12 +00001219 addInstr(env, mk_iMOVds_RR(dst, argL));
1220 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1221 return dst;
1222 }
1223
1224 if (e->Iex.Binop.op == Iop_CmpF64) {
1225 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1226 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1227 HReg dst = newVRegI(env);
1228 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1229 FMSTAT, so we can examine the results directly. */
1230 addInstr(env, ARMInstr_VCmpD(dL, dR));
1231 /* Create in dst, the IRCmpF64Result encoded result. */
1232 addInstr(env, ARMInstr_Imm32(dst, 0));
1233 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1234 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1235 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1236 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1237 return dst;
1238 }
1239
1240 if (e->Iex.Binop.op == Iop_F64toI32S
1241 || e->Iex.Binop.op == Iop_F64toI32U) {
1242 /* Wretched uglyness all round, due to having to deal
1243 with rounding modes. Oh well. */
1244 /* FIXME: if arg1 is a constant indicating round-to-zero,
1245 then we could skip all this arsing around with FPSCR and
1246 simply emit FTO{S,U}IZD. */
1247 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1248 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1249 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1250 /* FTO{S,U}ID valF, valD */
1251 HReg valF = newVRegF(env);
1252 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1253 valF, valD));
1254 set_VFP_rounding_default(env);
1255 /* VMOV dst, valF */
1256 HReg dst = newVRegI(env);
1257 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1258 return dst;
1259 }
1260
sewardj6c60b322010-08-22 12:48:28 +00001261 if (e->Iex.Binop.op == Iop_GetElem8x8
1262 || e->Iex.Binop.op == Iop_GetElem16x4
1263 || e->Iex.Binop.op == Iop_GetElem32x2) {
1264 HReg res = newVRegI(env);
1265 HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
1266 UInt index, size;
1267 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1268 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1269 vpanic("ARM target supports GetElem with constant "
1270 "second argument only\n");
1271 }
1272 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1273 switch (e->Iex.Binop.op) {
1274 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1275 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1276 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1277 default: vassert(0);
1278 }
1279 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1280 mkARMNRS(ARMNRS_Reg, res, 0),
1281 mkARMNRS(ARMNRS_Scalar, arg, index),
1282 size, False));
1283 return res;
1284 }
1285
1286 if (e->Iex.Binop.op == Iop_GetElem8x16
1287 || e->Iex.Binop.op == Iop_GetElem16x8
1288 || e->Iex.Binop.op == Iop_GetElem32x4) {
1289 HReg res = newVRegI(env);
1290 HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
1291 UInt index, size;
1292 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1293 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1294 vpanic("ARM target supports GetElem with constant "
1295 "second argument only\n");
1296 }
1297 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1298 switch (e->Iex.Binop.op) {
1299 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1300 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1301 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1302 default: vassert(0);
1303 }
1304 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1305 mkARMNRS(ARMNRS_Reg, res, 0),
1306 mkARMNRS(ARMNRS_Scalar, arg, index),
1307 size, True));
1308 return res;
1309 }
1310
sewardje2ea1762010-09-22 00:56:37 +00001311 /* All cases involving host-side helper calls. */
1312 void* fn = NULL;
1313 switch (e->Iex.Binop.op) {
1314 case Iop_Add16x2:
1315 fn = &h_generic_calc_Add16x2; break;
1316 case Iop_Sub16x2:
1317 fn = &h_generic_calc_Sub16x2; break;
1318 case Iop_HAdd16Ux2:
1319 fn = &h_generic_calc_HAdd16Ux2; break;
1320 case Iop_HAdd16Sx2:
1321 fn = &h_generic_calc_HAdd16Sx2; break;
1322 case Iop_HSub16Ux2:
1323 fn = &h_generic_calc_HSub16Ux2; break;
1324 case Iop_HSub16Sx2:
1325 fn = &h_generic_calc_HSub16Sx2; break;
1326 case Iop_QAdd16Sx2:
1327 fn = &h_generic_calc_QAdd16Sx2; break;
1328 case Iop_QSub16Sx2:
1329 fn = &h_generic_calc_QSub16Sx2; break;
1330 case Iop_Add8x4:
1331 fn = &h_generic_calc_Add8x4; break;
1332 case Iop_Sub8x4:
1333 fn = &h_generic_calc_Sub8x4; break;
1334 case Iop_HAdd8Ux4:
1335 fn = &h_generic_calc_HAdd8Ux4; break;
1336 case Iop_HAdd8Sx4:
1337 fn = &h_generic_calc_HAdd8Sx4; break;
1338 case Iop_HSub8Ux4:
1339 fn = &h_generic_calc_HSub8Ux4; break;
1340 case Iop_HSub8Sx4:
1341 fn = &h_generic_calc_HSub8Sx4; break;
1342 case Iop_QAdd8Sx4:
1343 fn = &h_generic_calc_QAdd8Sx4; break;
1344 case Iop_QAdd8Ux4:
1345 fn = &h_generic_calc_QAdd8Ux4; break;
1346 case Iop_QSub8Sx4:
1347 fn = &h_generic_calc_QSub8Sx4; break;
1348 case Iop_QSub8Ux4:
1349 fn = &h_generic_calc_QSub8Ux4; break;
sewardj310d6b22010-10-18 16:29:40 +00001350 case Iop_Sad8Ux4:
1351 fn = &h_generic_calc_Sad8Ux4; break;
sewardje2ea1762010-09-22 00:56:37 +00001352 default:
1353 break;
1354 }
1355
1356 if (fn) {
1357 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1358 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1359 HReg res = newVRegI(env);
1360 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1361 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1362 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1363 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1364 return res;
1365 }
1366
sewardj6c299f32009-12-31 18:00:12 +00001367 break;
1368 }
1369
1370 /* --------- UNARY OP --------- */
1371 case Iex_Unop: {
1372
1373//zz /* 1Uto8(32to1(expr32)) */
1374//zz if (e->Iex.Unop.op == Iop_1Uto8) {
1375//zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1376//zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1377//zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1378//zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1379//zz IRExpr* expr32 = mi.bindee[0];
1380//zz HReg dst = newVRegI(env);
1381//zz HReg src = iselIntExpr_R(env, expr32);
1382//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1383//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1384//zz X86RMI_Imm(1), dst));
1385//zz return dst;
1386//zz }
1387//zz }
1388//zz
1389//zz /* 8Uto32(LDle(expr32)) */
1390//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1391//zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1392//zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1393//zz unop(Iop_8Uto32,
1394//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1395//zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1396//zz HReg dst = newVRegI(env);
1397//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1398//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1399//zz return dst;
1400//zz }
1401//zz }
1402//zz
1403//zz /* 8Sto32(LDle(expr32)) */
1404//zz if (e->Iex.Unop.op == Iop_8Sto32) {
1405//zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1406//zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1407//zz unop(Iop_8Sto32,
1408//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1409//zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1410//zz HReg dst = newVRegI(env);
1411//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1412//zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1413//zz return dst;
1414//zz }
1415//zz }
1416//zz
1417//zz /* 16Uto32(LDle(expr32)) */
1418//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1419//zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1420//zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1421//zz unop(Iop_16Uto32,
1422//zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1423//zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1424//zz HReg dst = newVRegI(env);
1425//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1426//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1427//zz return dst;
1428//zz }
1429//zz }
1430//zz
1431//zz /* 8Uto32(GET:I8) */
1432//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1433//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1434//zz HReg dst;
1435//zz X86AMode* amode;
1436//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1437//zz dst = newVRegI(env);
1438//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1439//zz hregX86_EBP());
1440//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1441//zz return dst;
1442//zz }
1443//zz }
1444//zz
1445//zz /* 16to32(GET:I16) */
1446//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1447//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1448//zz HReg dst;
1449//zz X86AMode* amode;
1450//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1451//zz dst = newVRegI(env);
1452//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1453//zz hregX86_EBP());
1454//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1455//zz return dst;
1456//zz }
1457//zz }
1458
1459 switch (e->Iex.Unop.op) {
1460 case Iop_8Uto32: {
1461 HReg dst = newVRegI(env);
1462 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1463 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1464 dst, src, ARMRI84_I84(0xFF,0)));
1465 return dst;
1466 }
1467//zz case Iop_8Uto16:
1468//zz case Iop_8Uto32:
1469//zz case Iop_16Uto32: {
1470//zz HReg dst = newVRegI(env);
1471//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1472//zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1473//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1474//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1475//zz X86RMI_Imm(mask), dst));
1476//zz return dst;
1477//zz }
1478//zz case Iop_8Sto16:
1479//zz case Iop_8Sto32:
1480 case Iop_16Uto32: {
1481 HReg dst = newVRegI(env);
1482 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1483 ARMRI5* amt = ARMRI5_I5(16);
1484 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1485 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1486 return dst;
1487 }
1488 case Iop_8Sto32:
1489 case Iop_16Sto32: {
1490 HReg dst = newVRegI(env);
1491 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1492 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1493 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1494 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1495 return dst;
1496 }
1497//zz case Iop_Not8:
1498//zz case Iop_Not16:
1499 case Iop_Not32: {
1500 HReg dst = newVRegI(env);
1501 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1502 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1503 return dst;
1504 }
1505 case Iop_64HIto32: {
1506 HReg rHi, rLo;
1507 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1508 return rHi; /* and abandon rLo .. poor wee thing :-) */
1509 }
1510 case Iop_64to32: {
1511 HReg rHi, rLo;
1512 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1513 return rLo; /* similar stupid comment to the above ... */
1514 }
sewardj6c60b322010-08-22 12:48:28 +00001515 case Iop_64to8: {
1516 HReg rHi, rLo;
1517 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1518 HReg tHi = newVRegI(env);
1519 HReg tLo = newVRegI(env);
1520 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1521 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1522 rHi = tHi;
1523 rLo = tLo;
1524 } else {
1525 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1526 }
1527 return rLo;
1528 }
sewardj6c299f32009-12-31 18:00:12 +00001529//zz case Iop_16HIto8:
1530//zz case Iop_32HIto16: {
1531//zz HReg dst = newVRegI(env);
1532//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1533//zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1534//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1535//zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1536//zz return dst;
1537//zz }
1538 case Iop_1Uto32:
1539 case Iop_1Uto8: {
1540 HReg dst = newVRegI(env);
1541 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1542 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1543 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1544 return dst;
1545 }
1546
1547 case Iop_1Sto32: {
1548 HReg dst = newVRegI(env);
1549 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1550 ARMRI5* amt = ARMRI5_I5(31);
1551 /* This is really rough. We could do much better here;
1552 perhaps mvn{cond} dst, #0 as the second insn?
1553 (same applies to 1Sto64) */
1554 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1555 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1556 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1557 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1558 return dst;
1559 }
1560
1561
1562//zz case Iop_1Sto8:
1563//zz case Iop_1Sto16:
1564//zz case Iop_1Sto32: {
1565//zz /* could do better than this, but for now ... */
1566//zz HReg dst = newVRegI(env);
1567//zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1568//zz addInstr(env, X86Instr_Set32(cond,dst));
1569//zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1570//zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1571//zz return dst;
1572//zz }
1573//zz case Iop_Ctz32: {
1574//zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1575//zz HReg dst = newVRegI(env);
1576//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1577//zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1578//zz return dst;
1579//zz }
1580 case Iop_Clz32: {
1581 /* Count leading zeroes; easy on ARM. */
1582 HReg dst = newVRegI(env);
1583 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1584 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1585 return dst;
1586 }
1587
1588 case Iop_CmpwNEZ32: {
1589 HReg dst = newVRegI(env);
1590 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1591 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1592 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1593 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1594 return dst;
1595 }
1596
1597 case Iop_Left32: {
1598 HReg dst = newVRegI(env);
1599 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1600 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1601 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1602 return dst;
1603 }
1604
1605//zz case Iop_V128to32: {
1606//zz HReg dst = newVRegI(env);
1607//zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1608//zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1609//zz sub_from_esp(env, 16);
1610//zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1611//zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1612//zz add_to_esp(env, 16);
1613//zz return dst;
1614//zz }
1615//zz
1616 case Iop_ReinterpF32asI32: {
1617 HReg dst = newVRegI(env);
1618 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1619 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1620 return dst;
1621 }
1622
1623//zz
1624//zz case Iop_16to8:
1625 case Iop_32to8:
1626 case Iop_32to16:
1627 /* These are no-ops. */
1628 return iselIntExpr_R(env, e->Iex.Unop.arg);
1629
sewardj6c60b322010-08-22 12:48:28 +00001630 default:
sewardj6c299f32009-12-31 18:00:12 +00001631 break;
1632 }
sewardje2ea1762010-09-22 00:56:37 +00001633
1634 /* All Unop cases involving host-side helper calls. */
1635 void* fn = NULL;
1636 switch (e->Iex.Unop.op) {
1637 case Iop_CmpNEZ16x2:
1638 fn = &h_generic_calc_CmpNEZ16x2; break;
1639 case Iop_CmpNEZ8x4:
1640 fn = &h_generic_calc_CmpNEZ8x4; break;
1641 default:
1642 break;
1643 }
1644
1645 if (fn) {
1646 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1647 HReg res = newVRegI(env);
1648 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1649 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1650 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1651 return res;
1652 }
1653
sewardj6c299f32009-12-31 18:00:12 +00001654 break;
1655 }
1656
1657 /* --------- GET --------- */
1658 case Iex_Get: {
1659 if (ty == Ity_I32
1660 && 0 == (e->Iex.Get.offset & 3)
1661 && e->Iex.Get.offset < 4096-4) {
1662 HReg dst = newVRegI(env);
1663 addInstr(env, ARMInstr_LdSt32(
1664 True/*isLoad*/,
1665 dst,
1666 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1667 return dst;
1668 }
1669//zz if (ty == Ity_I8 || ty == Ity_I16) {
1670//zz HReg dst = newVRegI(env);
1671//zz addInstr(env, X86Instr_LoadEX(
1672//zz toUChar(ty==Ity_I8 ? 1 : 2),
1673//zz False,
1674//zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1675//zz dst));
1676//zz return dst;
1677//zz }
1678 break;
1679 }
1680
1681//zz case Iex_GetI: {
1682//zz X86AMode* am
1683//zz = genGuestArrayOffset(
1684//zz env, e->Iex.GetI.descr,
1685//zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1686//zz HReg dst = newVRegI(env);
1687//zz if (ty == Ity_I8) {
1688//zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1689//zz return dst;
1690//zz }
1691//zz if (ty == Ity_I32) {
1692//zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1693//zz return dst;
1694//zz }
1695//zz break;
1696//zz }
1697
1698 /* --------- CCALL --------- */
1699 case Iex_CCall: {
1700 HReg dst = newVRegI(env);
1701 vassert(ty == e->Iex.CCall.retty);
1702
1703 /* be very restrictive for now. Only 32/64-bit ints allowed
1704 for args, and 32 bits for return type. */
1705 if (e->Iex.CCall.retty != Ity_I32)
1706 goto irreducible;
1707
1708 /* Marshal args, do the call, clear stack. */
1709 Bool ok = doHelperCall( env, False,
1710 NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1711 if (ok) {
1712 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1713 return dst;
1714 }
1715 /* else fall through; will hit the irreducible: label */
1716 }
1717
1718 /* --------- LITERAL --------- */
1719 /* 32 literals */
1720 case Iex_Const: {
1721 UInt u = 0;
1722 HReg dst = newVRegI(env);
1723 switch (e->Iex.Const.con->tag) {
1724 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1725 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1726 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
sewardj6c60b322010-08-22 12:48:28 +00001727 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
sewardj6c299f32009-12-31 18:00:12 +00001728 }
1729 addInstr(env, ARMInstr_Imm32(dst, u));
1730 return dst;
1731 }
1732
1733 /* --------- MULTIPLEX --------- */
1734 case Iex_Mux0X: {
1735 IRExpr* cond = e->Iex.Mux0X.cond;
1736
1737 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1738 if (ty == Ity_I32
1739 && cond->tag == Iex_Unop
1740 && cond->Iex.Unop.op == Iop_32to8
1741 && cond->Iex.Unop.arg->tag == Iex_Unop
1742 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1743 ARMCondCode cc;
1744 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1745 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1746 HReg dst = newVRegI(env);
1747 addInstr(env, mk_iMOVds_RR(dst, rX));
1748 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1749 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1750 return dst;
1751 }
1752
1753 /* Mux0X(cond, expr0, exprX) (general case) */
1754 if (ty == Ity_I32) {
1755 HReg r8;
1756 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1757 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1758 HReg dst = newVRegI(env);
1759 addInstr(env, mk_iMOVds_RR(dst, rX));
1760 r8 = iselIntExpr_R(env, cond);
1761 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1762 ARMRI84_I84(0xFF,0)));
1763 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1764 return dst;
1765 }
1766 break;
1767 }
1768
1769 default:
1770 break;
1771 } /* switch (e->tag) */
1772
1773 /* We get here if no pattern matched. */
1774 irreducible:
1775 ppIRExpr(e);
1776 vpanic("iselIntExpr_R: cannot reduce tree");
cerioncee30312004-12-17 20:30:21 +00001777}
1778
1779
sewardj6c299f32009-12-31 18:00:12 +00001780/* -------------------- 64-bit -------------------- */
1781
1782/* Compute a 64-bit value into a register pair, which is returned as
1783 the first two parameters. As with iselIntExpr_R, these may be
1784 either real or virtual regs; in any case they must not be changed
1785 by subsequent code emitted by the caller. */
1786
1787static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1788{
1789 iselInt64Expr_wrk(rHi, rLo, env, e);
1790# if 0
1791 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1792# endif
1793 vassert(hregClass(*rHi) == HRcInt32);
1794 vassert(hregIsVirtual(*rHi));
1795 vassert(hregClass(*rLo) == HRcInt32);
1796 vassert(hregIsVirtual(*rLo));
1797}
1798
1799/* DO NOT CALL THIS DIRECTLY ! */
1800static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1801{
1802 vassert(e);
1803 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1804
1805 /* 64-bit literal */
1806 if (e->tag == Iex_Const) {
1807 ULong w64 = e->Iex.Const.con->Ico.U64;
1808 UInt wHi = toUInt(w64 >> 32);
1809 UInt wLo = toUInt(w64);
1810 HReg tHi = newVRegI(env);
1811 HReg tLo = newVRegI(env);
1812 vassert(e->Iex.Const.con->tag == Ico_U64);
1813 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1814 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1815 *rHi = tHi;
1816 *rLo = tLo;
1817 return;
1818 }
1819
1820 /* read 64-bit IRTemp */
1821 if (e->tag == Iex_RdTmp) {
sewardj6c60b322010-08-22 12:48:28 +00001822 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1823 HReg tHi = newVRegI(env);
1824 HReg tLo = newVRegI(env);
1825 HReg tmp = iselNeon64Expr(env, e);
1826 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1827 *rHi = tHi;
1828 *rLo = tLo;
1829 } else {
1830 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1831 }
sewardj6c299f32009-12-31 18:00:12 +00001832 return;
1833 }
1834
1835 /* 64-bit load */
1836 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1837 HReg tLo, tHi, rA;
1838 vassert(e->Iex.Load.ty == Ity_I64);
1839 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1840 tHi = newVRegI(env);
1841 tLo = newVRegI(env);
1842 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1843 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1844 *rHi = tHi;
1845 *rLo = tLo;
1846 return;
1847 }
1848
1849 /* 64-bit GET */
1850 if (e->tag == Iex_Get) {
1851 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1852 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1853 HReg tHi = newVRegI(env);
1854 HReg tLo = newVRegI(env);
1855 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1856 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1857 *rHi = tHi;
1858 *rLo = tLo;
1859 return;
1860 }
1861
1862 /* --------- BINARY ops --------- */
1863 if (e->tag == Iex_Binop) {
1864 switch (e->Iex.Binop.op) {
1865
1866 /* 32 x 32 -> 64 multiply */
1867 case Iop_MullS32:
1868 case Iop_MullU32: {
1869 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1870 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1871 HReg tHi = newVRegI(env);
1872 HReg tLo = newVRegI(env);
1873 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
1874 ? ARMmul_SX : ARMmul_ZX;
1875 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1876 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1877 addInstr(env, ARMInstr_Mul(mop));
1878 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1879 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1880 *rHi = tHi;
1881 *rLo = tLo;
1882 return;
1883 }
1884
1885 case Iop_Or64: {
1886 HReg xLo, xHi, yLo, yHi;
1887 HReg tHi = newVRegI(env);
1888 HReg tLo = newVRegI(env);
1889 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1890 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1891 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1892 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1893 *rHi = tHi;
1894 *rLo = tLo;
1895 return;
1896 }
1897
1898 case Iop_Add64: {
1899 HReg xLo, xHi, yLo, yHi;
1900 HReg tHi = newVRegI(env);
1901 HReg tLo = newVRegI(env);
1902 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1903 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1904 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1905 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
1906 *rHi = tHi;
1907 *rLo = tLo;
1908 return;
1909 }
1910
1911 /* 32HLto64(e1,e2) */
1912 case Iop_32HLto64: {
1913 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1914 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1915 return;
1916 }
1917
1918 default:
1919 break;
1920 }
1921 }
1922
1923 /* --------- UNARY ops --------- */
1924 if (e->tag == Iex_Unop) {
1925 switch (e->Iex.Unop.op) {
1926
1927 /* ReinterpF64asI64 */
1928 case Iop_ReinterpF64asI64: {
1929 HReg dstHi = newVRegI(env);
1930 HReg dstLo = newVRegI(env);
1931 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1932 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1933 *rHi = dstHi;
1934 *rLo = dstLo;
1935 return;
1936 }
1937
1938 /* Left64(e) */
1939 case Iop_Left64: {
1940 HReg yLo, yHi;
1941 HReg tHi = newVRegI(env);
1942 HReg tLo = newVRegI(env);
1943 HReg zero = newVRegI(env);
1944 /* yHi:yLo = arg */
1945 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1946 /* zero = 0 */
1947 addInstr(env, ARMInstr_Imm32(zero, 0));
1948 /* tLo = 0 - yLo, and set carry */
sewardj6c60b322010-08-22 12:48:28 +00001949 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1950 tLo, zero, ARMRI84_R(yLo)));
sewardj6c299f32009-12-31 18:00:12 +00001951 /* tHi = 0 - yHi - carry */
sewardj6c60b322010-08-22 12:48:28 +00001952 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1953 tHi, zero, ARMRI84_R(yHi)));
sewardj6c299f32009-12-31 18:00:12 +00001954 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
1955 back in, so as to give the final result
1956 tHi:tLo = arg | -arg. */
1957 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1958 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1959 *rHi = tHi;
1960 *rLo = tLo;
1961 return;
1962 }
1963
1964 /* CmpwNEZ64(e) */
1965 case Iop_CmpwNEZ64: {
1966 HReg srcLo, srcHi;
1967 HReg tmp1 = newVRegI(env);
1968 HReg tmp2 = newVRegI(env);
1969 /* srcHi:srcLo = arg */
1970 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1971 /* tmp1 = srcHi | srcLo */
1972 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1973 tmp1, srcHi, ARMRI84_R(srcLo)));
1974 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1975 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1976 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1977 tmp2, tmp2, ARMRI84_R(tmp1)));
1978 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1979 tmp2, tmp2, ARMRI5_I5(31)));
1980 *rHi = tmp2;
1981 *rLo = tmp2;
1982 return;
1983 }
1984
1985 case Iop_1Sto64: {
1986 HReg dst = newVRegI(env);
1987 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1988 ARMRI5* amt = ARMRI5_I5(31);
1989 /* This is really rough. We could do much better here;
1990 perhaps mvn{cond} dst, #0 as the second insn?
1991 (same applies to 1Sto32) */
1992 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1993 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1994 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1995 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1996 *rHi = dst;
1997 *rLo = dst;
1998 return;
1999 }
2000
2001 default:
2002 break;
2003 }
2004 } /* if (e->tag == Iex_Unop) */
2005
2006 /* --------- MULTIPLEX --------- */
2007 if (e->tag == Iex_Mux0X) {
2008 IRType ty8;
2009 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2010 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2011 vassert(ty8 == Ity_I8);
2012 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2013 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2014 dstHi = newVRegI(env);
2015 dstLo = newVRegI(env);
2016 addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2017 addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2018 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2019 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2020 ARMRI84_I84(0xFF,0)));
2021 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2022 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2023 *rHi = dstHi;
2024 *rLo = dstLo;
2025 return;
2026 }
2027
sewardj6c60b322010-08-22 12:48:28 +00002028 /* It is convenient sometimes to call iselInt64Expr even when we
2029 have NEON support (e.g. in do_helper_call we need 64-bit
2030 arguments as 2 x 32 regs). */
2031 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
2032 HReg tHi = newVRegI(env);
2033 HReg tLo = newVRegI(env);
2034 HReg tmp = iselNeon64Expr(env, e);
2035 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2036 *rHi = tHi;
2037 *rLo = tLo;
2038 return ;
2039 }
2040
sewardj6c299f32009-12-31 18:00:12 +00002041 ppIRExpr(e);
2042 vpanic("iselInt64Expr");
2043}
2044
2045
2046/*---------------------------------------------------------*/
sewardj6c60b322010-08-22 12:48:28 +00002047/*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2048/*---------------------------------------------------------*/
2049
2050static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2051{
2052 HReg r = iselNeon64Expr_wrk( env, e );
2053 vassert(hregClass(r) == HRcFlt64);
2054 vassert(hregIsVirtual(r));
2055 return r;
2056}
2057
2058/* DO NOT CALL THIS DIRECTLY */
2059static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2060{
2061 IRType ty = typeOfIRExpr(env->type_env, e);
2062 MatchInfo mi;
2063 vassert(e);
2064 vassert(ty == Ity_I64);
2065
2066 if (e->tag == Iex_RdTmp) {
2067 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2068 }
2069
2070 if (e->tag == Iex_Const) {
2071 HReg rLo, rHi;
2072 HReg res = newVRegD(env);
2073 iselInt64Expr(&rHi, &rLo, env, e);
2074 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2075 return res;
2076 }
2077
2078 /* 64-bit load */
2079 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2080 HReg res = newVRegD(env);
2081 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2082 vassert(ty == Ity_I64);
2083 addInstr(env, ARMInstr_NLdStD(True, res, am));
2084 return res;
2085 }
2086
2087 /* 64-bit GET */
2088 if (e->tag == Iex_Get) {
2089 HReg addr = newVRegI(env);
2090 HReg res = newVRegD(env);
2091 vassert(ty == Ity_I64);
2092 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2093 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2094 return res;
2095 }
2096
2097 /* --------- BINARY ops --------- */
2098 if (e->tag == Iex_Binop) {
2099 switch (e->Iex.Binop.op) {
2100
2101 /* 32 x 32 -> 64 multiply */
2102 case Iop_MullS32:
2103 case Iop_MullU32: {
2104 HReg rLo, rHi;
2105 HReg res = newVRegD(env);
2106 iselInt64Expr(&rHi, &rLo, env, e);
2107 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2108 return res;
2109 }
2110
2111 case Iop_And64: {
2112 HReg res = newVRegD(env);
2113 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2114 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2115 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2116 res, argL, argR, 4, False));
2117 return res;
2118 }
2119 case Iop_Or64: {
2120 HReg res = newVRegD(env);
2121 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2122 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2123 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2124 res, argL, argR, 4, False));
2125 return res;
2126 }
2127 case Iop_Xor64: {
2128 HReg res = newVRegD(env);
2129 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2130 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2131 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2132 res, argL, argR, 4, False));
2133 return res;
2134 }
2135
2136 /* 32HLto64(e1,e2) */
2137 case Iop_32HLto64: {
2138 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2139 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2140 HReg res = newVRegD(env);
2141 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2142 return res;
2143 }
2144
2145 case Iop_Add8x8:
2146 case Iop_Add16x4:
2147 case Iop_Add32x2:
2148 case Iop_Add64: {
2149 HReg res = newVRegD(env);
2150 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2151 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2152 UInt size;
2153 switch (e->Iex.Binop.op) {
2154 case Iop_Add8x8: size = 0; break;
2155 case Iop_Add16x4: size = 1; break;
2156 case Iop_Add32x2: size = 2; break;
2157 case Iop_Add64: size = 3; break;
2158 default: vassert(0);
2159 }
2160 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2161 res, argL, argR, size, False));
2162 return res;
2163 }
2164 case Iop_Add32Fx2: {
2165 HReg res = newVRegD(env);
2166 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2167 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2168 UInt size = 0;
2169 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2170 res, argL, argR, size, False));
2171 return res;
2172 }
2173 case Iop_Recps32Fx2: {
2174 HReg res = newVRegD(env);
2175 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2176 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2177 UInt size = 0;
2178 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2179 res, argL, argR, size, False));
2180 return res;
2181 }
2182 case Iop_Rsqrts32Fx2: {
2183 HReg res = newVRegD(env);
2184 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2185 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2186 UInt size = 0;
2187 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2188 res, argL, argR, size, False));
2189 return res;
2190 }
2191 case Iop_InterleaveOddLanes8x8:
2192 case Iop_InterleaveOddLanes16x4:
2193 case Iop_InterleaveLO32x2:
2194 case Iop_InterleaveEvenLanes8x8:
2195 case Iop_InterleaveEvenLanes16x4:
2196 case Iop_InterleaveHI32x2: {
2197 HReg tmp = newVRegD(env);
2198 HReg res = newVRegD(env);
2199 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2200 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2201 UInt size;
2202 UInt is_lo;
2203 switch (e->Iex.Binop.op) {
2204 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2205 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2206 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2207 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2208 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2209 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2210 default: vassert(0);
2211 }
2212 if (is_lo) {
2213 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2214 tmp, argL, 4, False));
2215 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2216 res, argR, 4, False));
2217 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2218 res, tmp, size, False));
2219 } else {
2220 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2221 tmp, argR, 4, False));
2222 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2223 res, argL, 4, False));
2224 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2225 tmp, res, size, False));
2226 }
2227 return res;
2228 }
2229 case Iop_InterleaveHI8x8:
2230 case Iop_InterleaveHI16x4:
2231 case Iop_InterleaveLO8x8:
2232 case Iop_InterleaveLO16x4: {
2233 HReg tmp = newVRegD(env);
2234 HReg res = newVRegD(env);
2235 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2236 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2237 UInt size;
2238 UInt is_lo;
2239 switch (e->Iex.Binop.op) {
2240 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2241 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2242 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2243 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2244 default: vassert(0);
2245 }
2246 if (is_lo) {
2247 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2248 tmp, argL, 4, False));
2249 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2250 res, argR, 4, False));
2251 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2252 res, tmp, size, False));
2253 } else {
2254 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2255 tmp, argR, 4, False));
2256 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2257 res, argL, 4, False));
2258 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2259 tmp, res, size, False));
2260 }
2261 return res;
2262 }
2263 case Iop_CatOddLanes8x8:
2264 case Iop_CatOddLanes16x4:
2265 case Iop_CatEvenLanes8x8:
2266 case Iop_CatEvenLanes16x4: {
2267 HReg tmp = newVRegD(env);
2268 HReg res = newVRegD(env);
2269 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2270 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2271 UInt size;
2272 UInt is_lo;
2273 switch (e->Iex.Binop.op) {
2274 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2275 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2276 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2277 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2278 default: vassert(0);
2279 }
2280 if (is_lo) {
2281 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2282 tmp, argL, 4, False));
2283 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2284 res, argR, 4, False));
2285 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2286 res, tmp, size, False));
2287 } else {
2288 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2289 tmp, argR, 4, False));
2290 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2291 res, argL, 4, False));
2292 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2293 tmp, res, size, False));
2294 }
2295 return res;
2296 }
2297 case Iop_QAdd8Ux8:
2298 case Iop_QAdd16Ux4:
2299 case Iop_QAdd32Ux2:
2300 case Iop_QAdd64Ux1: {
2301 HReg res = newVRegD(env);
2302 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2303 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2304 UInt size;
2305 switch (e->Iex.Binop.op) {
2306 case Iop_QAdd8Ux8: size = 0; break;
2307 case Iop_QAdd16Ux4: size = 1; break;
2308 case Iop_QAdd32Ux2: size = 2; break;
2309 case Iop_QAdd64Ux1: size = 3; break;
2310 default: vassert(0);
2311 }
2312 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2313 res, argL, argR, size, False));
2314 return res;
2315 }
2316 case Iop_QAdd8Sx8:
2317 case Iop_QAdd16Sx4:
2318 case Iop_QAdd32Sx2:
2319 case Iop_QAdd64Sx1: {
2320 HReg res = newVRegD(env);
2321 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2322 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2323 UInt size;
2324 switch (e->Iex.Binop.op) {
2325 case Iop_QAdd8Sx8: size = 0; break;
2326 case Iop_QAdd16Sx4: size = 1; break;
2327 case Iop_QAdd32Sx2: size = 2; break;
2328 case Iop_QAdd64Sx1: size = 3; break;
2329 default: vassert(0);
2330 }
2331 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2332 res, argL, argR, size, False));
2333 return res;
2334 }
2335 case Iop_Sub8x8:
2336 case Iop_Sub16x4:
2337 case Iop_Sub32x2:
2338 case Iop_Sub64: {
2339 HReg res = newVRegD(env);
2340 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2341 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2342 UInt size;
2343 switch (e->Iex.Binop.op) {
2344 case Iop_Sub8x8: size = 0; break;
2345 case Iop_Sub16x4: size = 1; break;
2346 case Iop_Sub32x2: size = 2; break;
2347 case Iop_Sub64: size = 3; break;
2348 default: vassert(0);
2349 }
2350 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2351 res, argL, argR, size, False));
2352 return res;
2353 }
2354 case Iop_Sub32Fx2: {
2355 HReg res = newVRegD(env);
2356 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2358 UInt size = 0;
2359 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2360 res, argL, argR, size, False));
2361 return res;
2362 }
2363 case Iop_QSub8Ux8:
2364 case Iop_QSub16Ux4:
2365 case Iop_QSub32Ux2:
2366 case Iop_QSub64Ux1: {
2367 HReg res = newVRegD(env);
2368 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2369 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2370 UInt size;
2371 switch (e->Iex.Binop.op) {
2372 case Iop_QSub8Ux8: size = 0; break;
2373 case Iop_QSub16Ux4: size = 1; break;
2374 case Iop_QSub32Ux2: size = 2; break;
2375 case Iop_QSub64Ux1: size = 3; break;
2376 default: vassert(0);
2377 }
2378 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2379 res, argL, argR, size, False));
2380 return res;
2381 }
2382 case Iop_QSub8Sx8:
2383 case Iop_QSub16Sx4:
2384 case Iop_QSub32Sx2:
2385 case Iop_QSub64Sx1: {
2386 HReg res = newVRegD(env);
2387 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2388 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2389 UInt size;
2390 switch (e->Iex.Binop.op) {
2391 case Iop_QSub8Sx8: size = 0; break;
2392 case Iop_QSub16Sx4: size = 1; break;
2393 case Iop_QSub32Sx2: size = 2; break;
2394 case Iop_QSub64Sx1: size = 3; break;
2395 default: vassert(0);
2396 }
2397 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2398 res, argL, argR, size, False));
2399 return res;
2400 }
2401 case Iop_Max8Ux8:
2402 case Iop_Max16Ux4:
2403 case Iop_Max32Ux2: {
2404 HReg res = newVRegD(env);
2405 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2406 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2407 UInt size;
2408 switch (e->Iex.Binop.op) {
2409 case Iop_Max8Ux8: size = 0; break;
2410 case Iop_Max16Ux4: size = 1; break;
2411 case Iop_Max32Ux2: size = 2; break;
2412 default: vassert(0);
2413 }
2414 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2415 res, argL, argR, size, False));
2416 return res;
2417 }
2418 case Iop_Max8Sx8:
2419 case Iop_Max16Sx4:
2420 case Iop_Max32Sx2: {
2421 HReg res = newVRegD(env);
2422 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2423 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2424 UInt size;
2425 switch (e->Iex.Binop.op) {
2426 case Iop_Max8Sx8: size = 0; break;
2427 case Iop_Max16Sx4: size = 1; break;
2428 case Iop_Max32Sx2: size = 2; break;
2429 default: vassert(0);
2430 }
2431 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2432 res, argL, argR, size, False));
2433 return res;
2434 }
2435 case Iop_Min8Ux8:
2436 case Iop_Min16Ux4:
2437 case Iop_Min32Ux2: {
2438 HReg res = newVRegD(env);
2439 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2440 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2441 UInt size;
2442 switch (e->Iex.Binop.op) {
2443 case Iop_Min8Ux8: size = 0; break;
2444 case Iop_Min16Ux4: size = 1; break;
2445 case Iop_Min32Ux2: size = 2; break;
2446 default: vassert(0);
2447 }
2448 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2449 res, argL, argR, size, False));
2450 return res;
2451 }
2452 case Iop_Min8Sx8:
2453 case Iop_Min16Sx4:
2454 case Iop_Min32Sx2: {
2455 HReg res = newVRegD(env);
2456 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458 UInt size;
2459 switch (e->Iex.Binop.op) {
2460 case Iop_Min8Sx8: size = 0; break;
2461 case Iop_Min16Sx4: size = 1; break;
2462 case Iop_Min32Sx2: size = 2; break;
2463 default: vassert(0);
2464 }
2465 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2466 res, argL, argR, size, False));
2467 return res;
2468 }
2469 case Iop_Sar8x8:
2470 case Iop_Sar16x4:
2471 case Iop_Sar32x2: {
2472 HReg res = newVRegD(env);
2473 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2474 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2475 HReg argR2 = newVRegD(env);
2476 HReg zero = newVRegD(env);
2477 UInt size;
2478 switch (e->Iex.Binop.op) {
2479 case Iop_Sar8x8: size = 0; break;
2480 case Iop_Sar16x4: size = 1; break;
2481 case Iop_Sar32x2: size = 2; break;
2482 case Iop_Sar64: size = 3; break;
2483 default: vassert(0);
2484 }
2485 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2486 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2487 argR2, zero, argR, size, False));
2488 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2489 res, argL, argR2, size, False));
2490 return res;
2491 }
2492 case Iop_Sal8x8:
2493 case Iop_Sal16x4:
2494 case Iop_Sal32x2:
2495 case Iop_Sal64x1: {
2496 HReg res = newVRegD(env);
2497 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2498 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2499 UInt size;
2500 switch (e->Iex.Binop.op) {
2501 case Iop_Sal8x8: size = 0; break;
2502 case Iop_Sal16x4: size = 1; break;
2503 case Iop_Sal32x2: size = 2; break;
2504 case Iop_Sal64x1: size = 3; break;
2505 default: vassert(0);
2506 }
2507 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2508 res, argL, argR, size, False));
2509 return res;
2510 }
2511 case Iop_Shr8x8:
2512 case Iop_Shr16x4:
2513 case Iop_Shr32x2: {
2514 HReg res = newVRegD(env);
2515 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2516 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2517 HReg argR2 = newVRegD(env);
2518 HReg zero = newVRegD(env);
2519 UInt size;
2520 switch (e->Iex.Binop.op) {
2521 case Iop_Shr8x8: size = 0; break;
2522 case Iop_Shr16x4: size = 1; break;
2523 case Iop_Shr32x2: size = 2; break;
2524 default: vassert(0);
2525 }
2526 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2527 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2528 argR2, zero, argR, size, False));
2529 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2530 res, argL, argR2, size, False));
2531 return res;
2532 }
2533 case Iop_Shl8x8:
2534 case Iop_Shl16x4:
2535 case Iop_Shl32x2: {
2536 HReg res = newVRegD(env);
2537 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2538 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2539 UInt size;
2540 switch (e->Iex.Binop.op) {
2541 case Iop_Shl8x8: size = 0; break;
2542 case Iop_Shl16x4: size = 1; break;
2543 case Iop_Shl32x2: size = 2; break;
2544 default: vassert(0);
2545 }
2546 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2547 res, argL, argR, size, False));
2548 return res;
2549 }
2550 case Iop_QShl8x8:
2551 case Iop_QShl16x4:
2552 case Iop_QShl32x2:
2553 case Iop_QShl64x1: {
2554 HReg res = newVRegD(env);
2555 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2556 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2557 UInt size;
2558 switch (e->Iex.Binop.op) {
2559 case Iop_QShl8x8: size = 0; break;
2560 case Iop_QShl16x4: size = 1; break;
2561 case Iop_QShl32x2: size = 2; break;
2562 case Iop_QShl64x1: size = 3; break;
2563 default: vassert(0);
2564 }
2565 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2566 res, argL, argR, size, False));
2567 return res;
2568 }
2569 case Iop_QSal8x8:
2570 case Iop_QSal16x4:
2571 case Iop_QSal32x2:
2572 case Iop_QSal64x1: {
2573 HReg res = newVRegD(env);
2574 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2575 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2576 UInt size;
2577 switch (e->Iex.Binop.op) {
2578 case Iop_QSal8x8: size = 0; break;
2579 case Iop_QSal16x4: size = 1; break;
2580 case Iop_QSal32x2: size = 2; break;
2581 case Iop_QSal64x1: size = 3; break;
2582 default: vassert(0);
2583 }
2584 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2585 res, argL, argR, size, False));
2586 return res;
2587 }
2588 case Iop_QShlN8x8:
2589 case Iop_QShlN16x4:
2590 case Iop_QShlN32x2:
2591 case Iop_QShlN64x1: {
2592 HReg res = newVRegD(env);
2593 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2594 UInt size, imm;
2595 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2596 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2597 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2598 "second argument only\n");
2599 }
2600 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2601 switch (e->Iex.Binop.op) {
2602 case Iop_QShlN8x8: size = 8 | imm; break;
2603 case Iop_QShlN16x4: size = 16 | imm; break;
2604 case Iop_QShlN32x2: size = 32 | imm; break;
2605 case Iop_QShlN64x1: size = 64 | imm; break;
2606 default: vassert(0);
2607 }
2608 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2609 res, argL, size, False));
2610 return res;
2611 }
2612 case Iop_QShlN8Sx8:
2613 case Iop_QShlN16Sx4:
2614 case Iop_QShlN32Sx2:
2615 case Iop_QShlN64Sx1: {
2616 HReg res = newVRegD(env);
2617 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2618 UInt size, imm;
2619 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2620 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2621 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2622 "second argument only\n");
2623 }
2624 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2625 switch (e->Iex.Binop.op) {
2626 case Iop_QShlN8Sx8: size = 8 | imm; break;
2627 case Iop_QShlN16Sx4: size = 16 | imm; break;
2628 case Iop_QShlN32Sx2: size = 32 | imm; break;
2629 case Iop_QShlN64Sx1: size = 64 | imm; break;
2630 default: vassert(0);
2631 }
2632 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2633 res, argL, size, False));
2634 return res;
2635 }
2636 case Iop_QSalN8x8:
2637 case Iop_QSalN16x4:
2638 case Iop_QSalN32x2:
2639 case Iop_QSalN64x1: {
2640 HReg res = newVRegD(env);
2641 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2642 UInt size, imm;
2643 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2644 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2645 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2646 "second argument only\n");
2647 }
2648 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2649 switch (e->Iex.Binop.op) {
2650 case Iop_QSalN8x8: size = 8 | imm; break;
2651 case Iop_QSalN16x4: size = 16 | imm; break;
2652 case Iop_QSalN32x2: size = 32 | imm; break;
2653 case Iop_QSalN64x1: size = 64 | imm; break;
2654 default: vassert(0);
2655 }
2656 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2657 res, argL, size, False));
2658 return res;
2659 }
2660 case Iop_ShrN8x8:
2661 case Iop_ShrN16x4:
2662 case Iop_ShrN32x2:
2663 case Iop_Shr64: {
2664 HReg res = newVRegD(env);
2665 HReg tmp = newVRegD(env);
2666 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2667 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2668 HReg argR2 = newVRegI(env);
2669 UInt size;
2670 switch (e->Iex.Binop.op) {
2671 case Iop_ShrN8x8: size = 0; break;
2672 case Iop_ShrN16x4: size = 1; break;
2673 case Iop_ShrN32x2: size = 2; break;
2674 case Iop_Shr64: size = 3; break;
2675 default: vassert(0);
2676 }
2677 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2678 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2679 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2680 res, argL, tmp, size, False));
2681 return res;
2682 }
2683 case Iop_ShlN8x8:
2684 case Iop_ShlN16x4:
2685 case Iop_ShlN32x2:
2686 case Iop_Shl64: {
2687 HReg res = newVRegD(env);
2688 HReg tmp = newVRegD(env);
2689 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2690 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2691 UInt size;
2692 switch (e->Iex.Binop.op) {
2693 case Iop_ShlN8x8: size = 0; break;
2694 case Iop_ShlN16x4: size = 1; break;
2695 case Iop_ShlN32x2: size = 2; break;
2696 case Iop_Shl64: size = 3; break;
2697 default: vassert(0);
2698 }
2699 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2700 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2701 res, argL, tmp, size, False));
2702 return res;
2703 }
2704 case Iop_SarN8x8:
2705 case Iop_SarN16x4:
2706 case Iop_SarN32x2:
2707 case Iop_Sar64: {
2708 HReg res = newVRegD(env);
2709 HReg tmp = newVRegD(env);
2710 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2711 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2712 HReg argR2 = newVRegI(env);
2713 UInt size;
2714 switch (e->Iex.Binop.op) {
2715 case Iop_SarN8x8: size = 0; break;
2716 case Iop_SarN16x4: size = 1; break;
2717 case Iop_SarN32x2: size = 2; break;
2718 case Iop_Sar64: size = 3; break;
2719 default: vassert(0);
2720 }
2721 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2722 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2723 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2724 res, argL, tmp, size, False));
2725 return res;
2726 }
2727 case Iop_CmpGT8Ux8:
2728 case Iop_CmpGT16Ux4:
2729 case Iop_CmpGT32Ux2: {
2730 HReg res = newVRegD(env);
2731 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2732 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2733 UInt size;
2734 switch (e->Iex.Binop.op) {
2735 case Iop_CmpGT8Ux8: size = 0; break;
2736 case Iop_CmpGT16Ux4: size = 1; break;
2737 case Iop_CmpGT32Ux2: size = 2; break;
2738 default: vassert(0);
2739 }
2740 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2741 res, argL, argR, size, False));
2742 return res;
2743 }
2744 case Iop_CmpGT8Sx8:
2745 case Iop_CmpGT16Sx4:
2746 case Iop_CmpGT32Sx2: {
2747 HReg res = newVRegD(env);
2748 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2749 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2750 UInt size;
2751 switch (e->Iex.Binop.op) {
2752 case Iop_CmpGT8Sx8: size = 0; break;
2753 case Iop_CmpGT16Sx4: size = 1; break;
2754 case Iop_CmpGT32Sx2: size = 2; break;
2755 default: vassert(0);
2756 }
2757 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2758 res, argL, argR, size, False));
2759 return res;
2760 }
2761 case Iop_CmpEQ8x8:
2762 case Iop_CmpEQ16x4:
2763 case Iop_CmpEQ32x2: {
2764 HReg res = newVRegD(env);
2765 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2766 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2767 UInt size;
2768 switch (e->Iex.Binop.op) {
2769 case Iop_CmpEQ8x8: size = 0; break;
2770 case Iop_CmpEQ16x4: size = 1; break;
2771 case Iop_CmpEQ32x2: size = 2; break;
2772 default: vassert(0);
2773 }
2774 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2775 res, argL, argR, size, False));
2776 return res;
2777 }
2778 case Iop_Mul8x8:
2779 case Iop_Mul16x4:
2780 case Iop_Mul32x2: {
2781 HReg res = newVRegD(env);
2782 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784 UInt size = 0;
2785 switch(e->Iex.Binop.op) {
2786 case Iop_Mul8x8: size = 0; break;
2787 case Iop_Mul16x4: size = 1; break;
2788 case Iop_Mul32x2: size = 2; break;
2789 default: vassert(0);
2790 }
2791 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2792 res, argL, argR, size, False));
2793 return res;
2794 }
2795 case Iop_Mul32Fx2: {
2796 HReg res = newVRegD(env);
2797 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2798 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2799 UInt size = 0;
2800 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2801 res, argL, argR, size, False));
2802 return res;
2803 }
2804 case Iop_QDMulHi16Sx4:
2805 case Iop_QDMulHi32Sx2: {
2806 HReg res = newVRegD(env);
2807 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2808 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2809 UInt size = 0;
2810 switch(e->Iex.Binop.op) {
2811 case Iop_QDMulHi16Sx4: size = 1; break;
2812 case Iop_QDMulHi32Sx2: size = 2; break;
2813 default: vassert(0);
2814 }
2815 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2816 res, argL, argR, size, False));
2817 return res;
2818 }
2819
2820 case Iop_QRDMulHi16Sx4:
2821 case Iop_QRDMulHi32Sx2: {
2822 HReg res = newVRegD(env);
2823 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2825 UInt size = 0;
2826 switch(e->Iex.Binop.op) {
2827 case Iop_QRDMulHi16Sx4: size = 1; break;
2828 case Iop_QRDMulHi32Sx2: size = 2; break;
2829 default: vassert(0);
2830 }
2831 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2832 res, argL, argR, size, False));
2833 return res;
2834 }
2835
2836 case Iop_PwAdd8x8:
2837 case Iop_PwAdd16x4:
2838 case Iop_PwAdd32x2: {
2839 HReg res = newVRegD(env);
2840 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2841 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2842 UInt size = 0;
2843 switch(e->Iex.Binop.op) {
2844 case Iop_PwAdd8x8: size = 0; break;
2845 case Iop_PwAdd16x4: size = 1; break;
2846 case Iop_PwAdd32x2: size = 2; break;
2847 default: vassert(0);
2848 }
2849 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2850 res, argL, argR, size, False));
2851 return res;
2852 }
2853 case Iop_PwAdd32Fx2: {
2854 HReg res = newVRegD(env);
2855 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2856 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2857 UInt size = 0;
2858 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2859 res, argL, argR, size, False));
2860 return res;
2861 }
2862 case Iop_PwMin8Ux8:
2863 case Iop_PwMin16Ux4:
2864 case Iop_PwMin32Ux2: {
2865 HReg res = newVRegD(env);
2866 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2867 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2868 UInt size = 0;
2869 switch(e->Iex.Binop.op) {
2870 case Iop_PwMin8Ux8: size = 0; break;
2871 case Iop_PwMin16Ux4: size = 1; break;
2872 case Iop_PwMin32Ux2: size = 2; break;
2873 default: vassert(0);
2874 }
2875 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2876 res, argL, argR, size, False));
2877 return res;
2878 }
2879 case Iop_PwMin8Sx8:
2880 case Iop_PwMin16Sx4:
2881 case Iop_PwMin32Sx2: {
2882 HReg res = newVRegD(env);
2883 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2884 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2885 UInt size = 0;
2886 switch(e->Iex.Binop.op) {
2887 case Iop_PwMin8Sx8: size = 0; break;
2888 case Iop_PwMin16Sx4: size = 1; break;
2889 case Iop_PwMin32Sx2: size = 2; break;
2890 default: vassert(0);
2891 }
2892 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2893 res, argL, argR, size, False));
2894 return res;
2895 }
2896 case Iop_PwMax8Ux8:
2897 case Iop_PwMax16Ux4:
2898 case Iop_PwMax32Ux2: {
2899 HReg res = newVRegD(env);
2900 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2902 UInt size = 0;
2903 switch(e->Iex.Binop.op) {
2904 case Iop_PwMax8Ux8: size = 0; break;
2905 case Iop_PwMax16Ux4: size = 1; break;
2906 case Iop_PwMax32Ux2: size = 2; break;
2907 default: vassert(0);
2908 }
2909 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2910 res, argL, argR, size, False));
2911 return res;
2912 }
2913 case Iop_PwMax8Sx8:
2914 case Iop_PwMax16Sx4:
2915 case Iop_PwMax32Sx2: {
2916 HReg res = newVRegD(env);
2917 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2919 UInt size = 0;
2920 switch(e->Iex.Binop.op) {
2921 case Iop_PwMax8Sx8: size = 0; break;
2922 case Iop_PwMax16Sx4: size = 1; break;
2923 case Iop_PwMax32Sx2: size = 2; break;
2924 default: vassert(0);
2925 }
2926 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2927 res, argL, argR, size, False));
2928 return res;
2929 }
2930 case Iop_Perm8x8: {
2931 HReg res = newVRegD(env);
2932 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2933 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2934 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2935 res, argL, argR, 0, False));
2936 return res;
2937 }
2938 case Iop_PolynomialMul8x8: {
2939 HReg res = newVRegD(env);
2940 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2941 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2942 UInt size = 0;
2943 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2944 res, argL, argR, size, False));
2945 return res;
2946 }
2947 case Iop_Max32Fx2: {
2948 HReg res = newVRegD(env);
2949 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2952 res, argL, argR, 2, False));
2953 return res;
2954 }
2955 case Iop_Min32Fx2: {
2956 HReg res = newVRegD(env);
2957 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2958 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2959 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2960 res, argL, argR, 2, False));
2961 return res;
2962 }
2963 case Iop_PwMax32Fx2: {
2964 HReg res = newVRegD(env);
2965 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2966 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2967 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2968 res, argL, argR, 2, False));
2969 return res;
2970 }
2971 case Iop_PwMin32Fx2: {
2972 HReg res = newVRegD(env);
2973 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2974 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2975 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2976 res, argL, argR, 2, False));
2977 return res;
2978 }
2979 case Iop_CmpGT32Fx2: {
2980 HReg res = newVRegD(env);
2981 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2982 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2983 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
2984 res, argL, argR, 2, False));
2985 return res;
2986 }
2987 case Iop_CmpGE32Fx2: {
2988 HReg res = newVRegD(env);
2989 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
2992 res, argL, argR, 2, False));
2993 return res;
2994 }
2995 case Iop_CmpEQ32Fx2: {
2996 HReg res = newVRegD(env);
2997 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2998 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2999 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3000 res, argL, argR, 2, False));
3001 return res;
3002 }
3003 case Iop_F32ToFixed32Ux2_RZ:
3004 case Iop_F32ToFixed32Sx2_RZ:
3005 case Iop_Fixed32UToF32x2_RN:
3006 case Iop_Fixed32SToF32x2_RN: {
3007 HReg res = newVRegD(env);
3008 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3009 ARMNeonUnOp op;
3010 UInt imm6;
3011 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3012 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3013 vpanic("ARM supports FP <-> Fixed conversion with constant "
3014 "second argument less than 33 only\n");
3015 }
3016 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3017 vassert(imm6 <= 32 && imm6 > 0);
3018 imm6 = 64 - imm6;
3019 switch(e->Iex.Binop.op) {
3020 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3021 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3022 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3023 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3024 default: vassert(0);
3025 }
3026 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3027 return res;
3028 }
3029 /*
3030 FIXME: is this here or not?
3031 case Iop_VDup8x8:
3032 case Iop_VDup16x4:
3033 case Iop_VDup32x2: {
3034 HReg res = newVRegD(env);
3035 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3036 UInt index;
3037 UInt imm4;
3038 UInt size = 0;
3039 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3040 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3041 vpanic("ARM supports Iop_VDup with constant "
3042 "second argument less than 16 only\n");
3043 }
3044 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3045 switch(e->Iex.Binop.op) {
3046 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3047 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3048 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3049 default: vassert(0);
3050 }
3051 if (imm4 >= 16) {
3052 vpanic("ARM supports Iop_VDup with constant "
3053 "second argument less than 16 only\n");
3054 }
3055 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3056 res, argL, imm4, False));
3057 return res;
3058 }
3059 */
3060 default:
3061 break;
3062 }
3063 }
3064
3065 /* --------- UNARY ops --------- */
3066 if (e->tag == Iex_Unop) {
3067 switch (e->Iex.Unop.op) {
3068
3069 /* ReinterpF64asI64 */
3070 case Iop_ReinterpF64asI64:
3071 /* Left64(e) */
3072 case Iop_Left64:
3073 /* CmpwNEZ64(e) */
3074 //case Iop_CmpwNEZ64:
3075 case Iop_1Sto64: {
3076 HReg rLo, rHi;
3077 HReg res = newVRegD(env);
3078 iselInt64Expr(&rHi, &rLo, env, e);
3079 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3080 return res;
3081 }
3082 case Iop_Not64: {
3083 DECLARE_PATTERN(p_veqz_8x8);
3084 DECLARE_PATTERN(p_veqz_16x4);
3085 DECLARE_PATTERN(p_veqz_32x2);
3086 DECLARE_PATTERN(p_vcge_8sx8);
3087 DECLARE_PATTERN(p_vcge_16sx4);
3088 DECLARE_PATTERN(p_vcge_32sx2);
3089 DECLARE_PATTERN(p_vcge_8ux8);
3090 DECLARE_PATTERN(p_vcge_16ux4);
3091 DECLARE_PATTERN(p_vcge_32ux2);
3092 DEFINE_PATTERN(p_veqz_8x8,
3093 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3094 DEFINE_PATTERN(p_veqz_16x4,
3095 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3096 DEFINE_PATTERN(p_veqz_32x2,
3097 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3098 DEFINE_PATTERN(p_vcge_8sx8,
3099 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3100 DEFINE_PATTERN(p_vcge_16sx4,
3101 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3102 DEFINE_PATTERN(p_vcge_32sx2,
3103 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3104 DEFINE_PATTERN(p_vcge_8ux8,
3105 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3106 DEFINE_PATTERN(p_vcge_16ux4,
3107 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3108 DEFINE_PATTERN(p_vcge_32ux2,
3109 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3110 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3111 HReg res = newVRegD(env);
3112 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3113 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3114 return res;
3115 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3116 HReg res = newVRegD(env);
3117 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3118 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3119 return res;
3120 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3121 HReg res = newVRegD(env);
3122 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3123 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3124 return res;
3125 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3126 HReg res = newVRegD(env);
3127 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3128 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3129 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3130 res, argL, argR, 0, False));
3131 return res;
3132 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3133 HReg res = newVRegD(env);
3134 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3135 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3136 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3137 res, argL, argR, 1, False));
3138 return res;
3139 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3140 HReg res = newVRegD(env);
3141 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3142 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3143 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3144 res, argL, argR, 2, False));
3145 return res;
3146 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3147 HReg res = newVRegD(env);
3148 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3149 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3150 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3151 res, argL, argR, 0, False));
3152 return res;
3153 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3154 HReg res = newVRegD(env);
3155 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3156 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3157 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3158 res, argL, argR, 1, False));
3159 return res;
3160 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3161 HReg res = newVRegD(env);
3162 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3163 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3164 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3165 res, argL, argR, 2, False));
3166 return res;
3167 } else {
3168 HReg res = newVRegD(env);
3169 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3170 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3171 return res;
3172 }
3173 }
3174 case Iop_Dup8x8:
3175 case Iop_Dup16x4:
3176 case Iop_Dup32x2: {
3177 HReg res, arg;
3178 UInt size;
3179 DECLARE_PATTERN(p_vdup_8x8);
3180 DECLARE_PATTERN(p_vdup_16x4);
3181 DECLARE_PATTERN(p_vdup_32x2);
3182 DEFINE_PATTERN(p_vdup_8x8,
3183 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3184 DEFINE_PATTERN(p_vdup_16x4,
3185 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3186 DEFINE_PATTERN(p_vdup_32x2,
3187 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3188 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3189 UInt index;
3190 UInt imm4;
3191 if (mi.bindee[1]->tag == Iex_Const &&
3192 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3193 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3194 imm4 = (index << 1) + 1;
3195 if (index < 8) {
3196 res = newVRegD(env);
3197 arg = iselNeon64Expr(env, mi.bindee[0]);
3198 addInstr(env, ARMInstr_NUnaryS(
3199 ARMneon_VDUP,
3200 mkARMNRS(ARMNRS_Reg, res, 0),
3201 mkARMNRS(ARMNRS_Scalar, arg, index),
3202 imm4, False
3203 ));
3204 return res;
3205 }
3206 }
3207 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3208 UInt index;
3209 UInt imm4;
3210 if (mi.bindee[1]->tag == Iex_Const &&
3211 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3212 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3213 imm4 = (index << 2) + 2;
3214 if (index < 4) {
3215 res = newVRegD(env);
3216 arg = iselNeon64Expr(env, mi.bindee[0]);
3217 addInstr(env, ARMInstr_NUnaryS(
3218 ARMneon_VDUP,
3219 mkARMNRS(ARMNRS_Reg, res, 0),
3220 mkARMNRS(ARMNRS_Scalar, arg, index),
3221 imm4, False
3222 ));
3223 return res;
3224 }
3225 }
3226 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3227 UInt index;
3228 UInt imm4;
3229 if (mi.bindee[1]->tag == Iex_Const &&
3230 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3231 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3232 imm4 = (index << 3) + 4;
3233 if (index < 2) {
3234 res = newVRegD(env);
3235 arg = iselNeon64Expr(env, mi.bindee[0]);
3236 addInstr(env, ARMInstr_NUnaryS(
3237 ARMneon_VDUP,
3238 mkARMNRS(ARMNRS_Reg, res, 0),
3239 mkARMNRS(ARMNRS_Scalar, arg, index),
3240 imm4, False
3241 ));
3242 return res;
3243 }
3244 }
3245 }
3246 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3247 res = newVRegD(env);
3248 switch (e->Iex.Unop.op) {
3249 case Iop_Dup8x8: size = 0; break;
3250 case Iop_Dup16x4: size = 1; break;
3251 case Iop_Dup32x2: size = 2; break;
3252 default: vassert(0);
3253 }
3254 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3255 return res;
3256 }
3257 case Iop_Abs8x8:
3258 case Iop_Abs16x4:
3259 case Iop_Abs32x2: {
3260 HReg res = newVRegD(env);
3261 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3262 UInt size = 0;
3263 switch(e->Iex.Binop.op) {
3264 case Iop_Abs8x8: size = 0; break;
3265 case Iop_Abs16x4: size = 1; break;
3266 case Iop_Abs32x2: size = 2; break;
3267 default: vassert(0);
3268 }
3269 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3270 return res;
3271 }
3272 case Iop_Reverse64_8x8:
3273 case Iop_Reverse64_16x4:
3274 case Iop_Reverse64_32x2: {
3275 HReg res = newVRegD(env);
3276 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3277 UInt size = 0;
3278 switch(e->Iex.Binop.op) {
3279 case Iop_Reverse64_8x8: size = 0; break;
3280 case Iop_Reverse64_16x4: size = 1; break;
3281 case Iop_Reverse64_32x2: size = 2; break;
3282 default: vassert(0);
3283 }
3284 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3285 res, arg, size, False));
3286 return res;
3287 }
3288 case Iop_Reverse32_8x8:
3289 case Iop_Reverse32_16x4: {
3290 HReg res = newVRegD(env);
3291 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3292 UInt size = 0;
3293 switch(e->Iex.Binop.op) {
3294 case Iop_Reverse32_8x8: size = 0; break;
3295 case Iop_Reverse32_16x4: size = 1; break;
3296 default: vassert(0);
3297 }
3298 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3299 res, arg, size, False));
3300 return res;
3301 }
3302 case Iop_Reverse16_8x8: {
3303 HReg res = newVRegD(env);
3304 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305 UInt size = 0;
3306 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3307 res, arg, size, False));
3308 return res;
3309 }
3310 case Iop_CmpwNEZ64: {
3311 HReg x_lsh = newVRegD(env);
3312 HReg x_rsh = newVRegD(env);
3313 HReg lsh_amt = newVRegD(env);
3314 HReg rsh_amt = newVRegD(env);
3315 HReg zero = newVRegD(env);
3316 HReg tmp = newVRegD(env);
3317 HReg tmp2 = newVRegD(env);
3318 HReg res = newVRegD(env);
3319 HReg x = newVRegD(env);
3320 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3321 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3322 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3323 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3324 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3325 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3326 rsh_amt, zero, lsh_amt, 2, False));
3327 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3328 x_lsh, x, lsh_amt, 3, False));
3329 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3330 x_rsh, x, rsh_amt, 3, False));
3331 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3332 tmp, x_lsh, x_rsh, 0, False));
3333 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3334 res, tmp, x, 0, False));
3335 return res;
3336 }
3337 case Iop_CmpNEZ8x8:
3338 case Iop_CmpNEZ16x4:
3339 case Iop_CmpNEZ32x2: {
3340 HReg res = newVRegD(env);
3341 HReg tmp = newVRegD(env);
3342 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3343 UInt size;
3344 switch (e->Iex.Unop.op) {
3345 case Iop_CmpNEZ8x8: size = 0; break;
3346 case Iop_CmpNEZ16x4: size = 1; break;
3347 case Iop_CmpNEZ32x2: size = 2; break;
3348 default: vassert(0);
3349 }
3350 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3351 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3352 return res;
3353 }
sewardj5f438dd2011-06-16 11:36:23 +00003354 case Iop_NarrowUn16to8x8:
3355 case Iop_NarrowUn32to16x4:
3356 case Iop_NarrowUn64to32x2: {
sewardj6c60b322010-08-22 12:48:28 +00003357 HReg res = newVRegD(env);
3358 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3359 UInt size = 0;
3360 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003361 case Iop_NarrowUn16to8x8: size = 0; break;
3362 case Iop_NarrowUn32to16x4: size = 1; break;
3363 case Iop_NarrowUn64to32x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003364 default: vassert(0);
3365 }
3366 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3367 res, arg, size, False));
3368 return res;
3369 }
sewardj5f438dd2011-06-16 11:36:23 +00003370 case Iop_QNarrowUn16Sto8Sx8:
3371 case Iop_QNarrowUn32Sto16Sx4:
3372 case Iop_QNarrowUn64Sto32Sx2: {
sewardj6c60b322010-08-22 12:48:28 +00003373 HReg res = newVRegD(env);
3374 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3375 UInt size = 0;
3376 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003377 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3378 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3379 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003380 default: vassert(0);
3381 }
3382 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3383 res, arg, size, False));
3384 return res;
3385 }
sewardj5f438dd2011-06-16 11:36:23 +00003386 case Iop_QNarrowUn16Sto8Ux8:
3387 case Iop_QNarrowUn32Sto16Ux4:
3388 case Iop_QNarrowUn64Sto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003389 HReg res = newVRegD(env);
3390 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3391 UInt size = 0;
3392 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003393 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3394 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3395 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003396 default: vassert(0);
3397 }
3398 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3399 res, arg, size, False));
3400 return res;
3401 }
sewardj5f438dd2011-06-16 11:36:23 +00003402 case Iop_QNarrowUn16Uto8Ux8:
3403 case Iop_QNarrowUn32Uto16Ux4:
3404 case Iop_QNarrowUn64Uto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003405 HReg res = newVRegD(env);
3406 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3407 UInt size = 0;
3408 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003409 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3410 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3411 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003412 default: vassert(0);
3413 }
3414 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3415 res, arg, size, False));
3416 return res;
3417 }
3418 case Iop_PwAddL8Sx8:
3419 case Iop_PwAddL16Sx4:
3420 case Iop_PwAddL32Sx2: {
3421 HReg res = newVRegD(env);
3422 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3423 UInt size = 0;
3424 switch(e->Iex.Binop.op) {
3425 case Iop_PwAddL8Sx8: size = 0; break;
3426 case Iop_PwAddL16Sx4: size = 1; break;
3427 case Iop_PwAddL32Sx2: size = 2; break;
3428 default: vassert(0);
3429 }
3430 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3431 res, arg, size, False));
3432 return res;
3433 }
3434 case Iop_PwAddL8Ux8:
3435 case Iop_PwAddL16Ux4:
3436 case Iop_PwAddL32Ux2: {
3437 HReg res = newVRegD(env);
3438 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3439 UInt size = 0;
3440 switch(e->Iex.Binop.op) {
3441 case Iop_PwAddL8Ux8: size = 0; break;
3442 case Iop_PwAddL16Ux4: size = 1; break;
3443 case Iop_PwAddL32Ux2: size = 2; break;
3444 default: vassert(0);
3445 }
3446 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3447 res, arg, size, False));
3448 return res;
3449 }
3450 case Iop_Cnt8x8: {
3451 HReg res = newVRegD(env);
3452 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3453 UInt size = 0;
3454 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3455 res, arg, size, False));
3456 return res;
3457 }
3458 case Iop_Clz8Sx8:
3459 case Iop_Clz16Sx4:
3460 case Iop_Clz32Sx2: {
3461 HReg res = newVRegD(env);
3462 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3463 UInt size = 0;
3464 switch(e->Iex.Binop.op) {
3465 case Iop_Clz8Sx8: size = 0; break;
3466 case Iop_Clz16Sx4: size = 1; break;
3467 case Iop_Clz32Sx2: size = 2; break;
3468 default: vassert(0);
3469 }
3470 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3471 res, arg, size, False));
3472 return res;
3473 }
3474 case Iop_Cls8Sx8:
3475 case Iop_Cls16Sx4:
3476 case Iop_Cls32Sx2: {
3477 HReg res = newVRegD(env);
3478 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3479 UInt size = 0;
3480 switch(e->Iex.Binop.op) {
3481 case Iop_Cls8Sx8: size = 0; break;
3482 case Iop_Cls16Sx4: size = 1; break;
3483 case Iop_Cls32Sx2: size = 2; break;
3484 default: vassert(0);
3485 }
3486 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3487 res, arg, size, False));
3488 return res;
3489 }
3490 case Iop_FtoI32Sx2_RZ: {
3491 HReg res = newVRegD(env);
3492 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3493 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3494 res, arg, 2, False));
3495 return res;
3496 }
3497 case Iop_FtoI32Ux2_RZ: {
3498 HReg res = newVRegD(env);
3499 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3500 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3501 res, arg, 2, False));
3502 return res;
3503 }
3504 case Iop_I32StoFx2: {
3505 HReg res = newVRegD(env);
3506 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3507 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3508 res, arg, 2, False));
3509 return res;
3510 }
3511 case Iop_I32UtoFx2: {
3512 HReg res = newVRegD(env);
3513 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3514 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3515 res, arg, 2, False));
3516 return res;
3517 }
3518 case Iop_F32toF16x4: {
3519 HReg res = newVRegD(env);
3520 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3521 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3522 res, arg, 2, False));
3523 return res;
3524 }
3525 case Iop_Recip32Fx2: {
3526 HReg res = newVRegD(env);
3527 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3528 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3529 res, argL, 0, False));
3530 return res;
3531 }
3532 case Iop_Recip32x2: {
3533 HReg res = newVRegD(env);
3534 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3535 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3536 res, argL, 0, False));
3537 return res;
3538 }
3539 case Iop_Abs32Fx2: {
3540 DECLARE_PATTERN(p_vabd_32fx2);
3541 DEFINE_PATTERN(p_vabd_32fx2,
3542 unop(Iop_Abs32Fx2,
3543 binop(Iop_Sub32Fx2,
3544 bind(0),
3545 bind(1))));
3546 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3547 HReg res = newVRegD(env);
3548 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3549 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3550 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3551 res, argL, argR, 0, False));
3552 return res;
3553 } else {
3554 HReg res = newVRegD(env);
3555 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3556 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3557 res, arg, 0, False));
3558 return res;
3559 }
3560 }
3561 case Iop_Rsqrte32Fx2: {
3562 HReg res = newVRegD(env);
3563 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3564 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3565 res, arg, 0, False));
3566 return res;
3567 }
3568 case Iop_Rsqrte32x2: {
3569 HReg res = newVRegD(env);
3570 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3571 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3572 res, arg, 0, False));
3573 return res;
3574 }
3575 case Iop_Neg32Fx2: {
3576 HReg res = newVRegD(env);
3577 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3578 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3579 res, arg, 0, False));
3580 return res;
3581 }
3582 default:
3583 break;
3584 }
3585 } /* if (e->tag == Iex_Unop) */
3586
3587 if (e->tag == Iex_Triop) {
3588 switch (e->Iex.Triop.op) {
3589 case Iop_Extract64: {
3590 HReg res = newVRegD(env);
3591 HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
3592 HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
3593 UInt imm4;
3594 if (e->Iex.Triop.arg3->tag != Iex_Const ||
3595 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
3596 vpanic("ARM target supports Iop_Extract64 with constant "
3597 "third argument less than 16 only\n");
3598 }
3599 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
3600 if (imm4 >= 8) {
3601 vpanic("ARM target supports Iop_Extract64 with constant "
3602 "third argument less than 16 only\n");
3603 }
3604 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3605 res, argL, argR, imm4, False));
3606 return res;
3607 }
3608 case Iop_SetElem8x8:
3609 case Iop_SetElem16x4:
3610 case Iop_SetElem32x2: {
3611 HReg res = newVRegD(env);
3612 HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
3613 HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
3614 UInt index, size;
3615 if (e->Iex.Triop.arg2->tag != Iex_Const ||
3616 typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
3617 vpanic("ARM target supports SetElem with constant "
3618 "second argument only\n");
3619 }
3620 index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
3621 switch (e->Iex.Triop.op) {
3622 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3623 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3624 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3625 default: vassert(0);
3626 }
3627 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3628 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3629 mkARMNRS(ARMNRS_Scalar, res, index),
3630 mkARMNRS(ARMNRS_Reg, arg, 0),
3631 size, False));
3632 return res;
3633 }
3634 default:
3635 break;
3636 }
3637 }
3638
3639 /* --------- MULTIPLEX --------- */
3640 if (e->tag == Iex_Mux0X) {
3641 HReg rLo, rHi;
3642 HReg res = newVRegD(env);
3643 iselInt64Expr(&rHi, &rLo, env, e);
3644 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3645 return res;
3646 }
3647
3648 ppIRExpr(e);
3649 vpanic("iselNeon64Expr");
3650}
3651
3652static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3653{
3654 HReg r = iselNeonExpr_wrk( env, e );
3655 vassert(hregClass(r) == HRcVec128);
3656 vassert(hregIsVirtual(r));
3657 return r;
3658}
3659
3660/* DO NOT CALL THIS DIRECTLY */
3661static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3662{
3663 IRType ty = typeOfIRExpr(env->type_env, e);
3664 MatchInfo mi;
3665 vassert(e);
3666 vassert(ty == Ity_V128);
3667
3668 if (e->tag == Iex_RdTmp) {
3669 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3670 }
3671
3672 if (e->tag == Iex_Const) {
3673 /* At the moment there should be no 128-bit constants in IR for ARM
3674 generated during disassemble. They are represented as Iop_64HLtoV128
3675 binary operation and are handled among binary ops. */
3676 /* But zero can be created by valgrind internal optimizer */
3677 if (e->Iex.Const.con->Ico.V128 == 0) {
3678 HReg res = newVRegV(env);
3679 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3680 return res;
3681 }
3682 ppIRExpr(e);
3683 vpanic("128-bit constant is not implemented");
3684 }
3685
3686 if (e->tag == Iex_Load) {
3687 HReg res = newVRegV(env);
3688 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3689 vassert(ty == Ity_V128);
3690 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3691 return res;
3692 }
3693
3694 if (e->tag == Iex_Get) {
3695 HReg addr = newVRegI(env);
3696 HReg res = newVRegV(env);
3697 vassert(ty == Ity_V128);
3698 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3699 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3700 return res;
3701 }
3702
3703 if (e->tag == Iex_Unop) {
3704 switch (e->Iex.Unop.op) {
3705 case Iop_NotV128: {
3706 DECLARE_PATTERN(p_veqz_8x16);
3707 DECLARE_PATTERN(p_veqz_16x8);
3708 DECLARE_PATTERN(p_veqz_32x4);
3709 DECLARE_PATTERN(p_vcge_8sx16);
3710 DECLARE_PATTERN(p_vcge_16sx8);
3711 DECLARE_PATTERN(p_vcge_32sx4);
3712 DECLARE_PATTERN(p_vcge_8ux16);
3713 DECLARE_PATTERN(p_vcge_16ux8);
3714 DECLARE_PATTERN(p_vcge_32ux4);
3715 DEFINE_PATTERN(p_veqz_8x16,
3716 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3717 DEFINE_PATTERN(p_veqz_16x8,
3718 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3719 DEFINE_PATTERN(p_veqz_32x4,
3720 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3721 DEFINE_PATTERN(p_vcge_8sx16,
3722 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3723 DEFINE_PATTERN(p_vcge_16sx8,
3724 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3725 DEFINE_PATTERN(p_vcge_32sx4,
3726 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3727 DEFINE_PATTERN(p_vcge_8ux16,
3728 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3729 DEFINE_PATTERN(p_vcge_16ux8,
3730 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3731 DEFINE_PATTERN(p_vcge_32ux4,
3732 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3733 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3734 HReg res = newVRegV(env);
3735 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3736 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3737 return res;
3738 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3739 HReg res = newVRegV(env);
3740 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3741 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3742 return res;
3743 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3744 HReg res = newVRegV(env);
3745 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3746 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3747 return res;
3748 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3749 HReg res = newVRegV(env);
3750 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3751 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3752 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3753 res, argL, argR, 0, True));
3754 return res;
3755 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3756 HReg res = newVRegV(env);
3757 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3758 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3759 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3760 res, argL, argR, 1, True));
3761 return res;
3762 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3763 HReg res = newVRegV(env);
3764 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3765 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3766 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3767 res, argL, argR, 2, True));
3768 return res;
3769 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3770 HReg res = newVRegV(env);
3771 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3772 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3773 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3774 res, argL, argR, 0, True));
3775 return res;
3776 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3777 HReg res = newVRegV(env);
3778 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3779 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3780 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3781 res, argL, argR, 1, True));
3782 return res;
3783 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3784 HReg res = newVRegV(env);
3785 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3786 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3787 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3788 res, argL, argR, 2, True));
3789 return res;
3790 } else {
3791 HReg res = newVRegV(env);
3792 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3793 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3794 return res;
3795 }
3796 }
3797 case Iop_Dup8x16:
3798 case Iop_Dup16x8:
3799 case Iop_Dup32x4: {
3800 HReg res, arg;
3801 UInt size;
3802 DECLARE_PATTERN(p_vdup_8x16);
3803 DECLARE_PATTERN(p_vdup_16x8);
3804 DECLARE_PATTERN(p_vdup_32x4);
3805 DEFINE_PATTERN(p_vdup_8x16,
3806 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3807 DEFINE_PATTERN(p_vdup_16x8,
3808 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3809 DEFINE_PATTERN(p_vdup_32x4,
3810 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3811 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3812 UInt index;
3813 UInt imm4;
3814 if (mi.bindee[1]->tag == Iex_Const &&
3815 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3816 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3817 imm4 = (index << 1) + 1;
3818 if (index < 8) {
3819 res = newVRegV(env);
3820 arg = iselNeon64Expr(env, mi.bindee[0]);
3821 addInstr(env, ARMInstr_NUnaryS(
3822 ARMneon_VDUP,
3823 mkARMNRS(ARMNRS_Reg, res, 0),
3824 mkARMNRS(ARMNRS_Scalar, arg, index),
3825 imm4, True
3826 ));
3827 return res;
3828 }
3829 }
3830 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3831 UInt index;
3832 UInt imm4;
3833 if (mi.bindee[1]->tag == Iex_Const &&
3834 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3835 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3836 imm4 = (index << 2) + 2;
3837 if (index < 4) {
3838 res = newVRegV(env);
3839 arg = iselNeon64Expr(env, mi.bindee[0]);
3840 addInstr(env, ARMInstr_NUnaryS(
3841 ARMneon_VDUP,
3842 mkARMNRS(ARMNRS_Reg, res, 0),
3843 mkARMNRS(ARMNRS_Scalar, arg, index),
3844 imm4, True
3845 ));
3846 return res;
3847 }
3848 }
3849 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3850 UInt index;
3851 UInt imm4;
3852 if (mi.bindee[1]->tag == Iex_Const &&
3853 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3854 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3855 imm4 = (index << 3) + 4;
3856 if (index < 2) {
3857 res = newVRegV(env);
3858 arg = iselNeon64Expr(env, mi.bindee[0]);
3859 addInstr(env, ARMInstr_NUnaryS(
3860 ARMneon_VDUP,
3861 mkARMNRS(ARMNRS_Reg, res, 0),
3862 mkARMNRS(ARMNRS_Scalar, arg, index),
3863 imm4, True
3864 ));
3865 return res;
3866 }
3867 }
3868 }
3869 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3870 res = newVRegV(env);
3871 switch (e->Iex.Unop.op) {
3872 case Iop_Dup8x16: size = 0; break;
3873 case Iop_Dup16x8: size = 1; break;
3874 case Iop_Dup32x4: size = 2; break;
3875 default: vassert(0);
3876 }
3877 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3878 return res;
3879 }
3880 case Iop_Abs8x16:
3881 case Iop_Abs16x8:
3882 case Iop_Abs32x4: {
3883 HReg res = newVRegV(env);
3884 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3885 UInt size = 0;
3886 switch(e->Iex.Binop.op) {
3887 case Iop_Abs8x16: size = 0; break;
3888 case Iop_Abs16x8: size = 1; break;
3889 case Iop_Abs32x4: size = 2; break;
3890 default: vassert(0);
3891 }
3892 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3893 return res;
3894 }
3895 case Iop_Reverse64_8x16:
3896 case Iop_Reverse64_16x8:
3897 case Iop_Reverse64_32x4: {
3898 HReg res = newVRegV(env);
3899 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3900 UInt size = 0;
3901 switch(e->Iex.Binop.op) {
3902 case Iop_Reverse64_8x16: size = 0; break;
3903 case Iop_Reverse64_16x8: size = 1; break;
3904 case Iop_Reverse64_32x4: size = 2; break;
3905 default: vassert(0);
3906 }
3907 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3908 res, arg, size, True));
3909 return res;
3910 }
3911 case Iop_Reverse32_8x16:
3912 case Iop_Reverse32_16x8: {
3913 HReg res = newVRegV(env);
3914 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3915 UInt size = 0;
3916 switch(e->Iex.Binop.op) {
3917 case Iop_Reverse32_8x16: size = 0; break;
3918 case Iop_Reverse32_16x8: size = 1; break;
3919 default: vassert(0);
3920 }
3921 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3922 res, arg, size, True));
3923 return res;
3924 }
3925 case Iop_Reverse16_8x16: {
3926 HReg res = newVRegV(env);
3927 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3928 UInt size = 0;
3929 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3930 res, arg, size, True));
3931 return res;
3932 }
3933 case Iop_CmpNEZ64x2: {
3934 HReg x_lsh = newVRegV(env);
3935 HReg x_rsh = newVRegV(env);
3936 HReg lsh_amt = newVRegV(env);
3937 HReg rsh_amt = newVRegV(env);
3938 HReg zero = newVRegV(env);
3939 HReg tmp = newVRegV(env);
3940 HReg tmp2 = newVRegV(env);
3941 HReg res = newVRegV(env);
3942 HReg x = newVRegV(env);
3943 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3944 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3945 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3946 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3947 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3948 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3949 rsh_amt, zero, lsh_amt, 2, True));
3950 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3951 x_lsh, x, lsh_amt, 3, True));
3952 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3953 x_rsh, x, rsh_amt, 3, True));
3954 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3955 tmp, x_lsh, x_rsh, 0, True));
3956 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3957 res, tmp, x, 0, True));
3958 return res;
3959 }
3960 case Iop_CmpNEZ8x16:
3961 case Iop_CmpNEZ16x8:
3962 case Iop_CmpNEZ32x4: {
3963 HReg res = newVRegV(env);
3964 HReg tmp = newVRegV(env);
3965 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3966 UInt size;
3967 switch (e->Iex.Unop.op) {
3968 case Iop_CmpNEZ8x16: size = 0; break;
3969 case Iop_CmpNEZ16x8: size = 1; break;
3970 case Iop_CmpNEZ32x4: size = 2; break;
3971 default: vassert(0);
3972 }
3973 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3974 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3975 return res;
3976 }
sewardj5f438dd2011-06-16 11:36:23 +00003977 case Iop_Widen8Uto16x8:
3978 case Iop_Widen16Uto32x4:
3979 case Iop_Widen32Uto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00003980 HReg res = newVRegV(env);
3981 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3982 UInt size;
3983 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003984 case Iop_Widen8Uto16x8: size = 0; break;
3985 case Iop_Widen16Uto32x4: size = 1; break;
3986 case Iop_Widen32Uto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003987 default: vassert(0);
3988 }
3989 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
3990 res, arg, size, True));
3991 return res;
3992 }
sewardj5f438dd2011-06-16 11:36:23 +00003993 case Iop_Widen8Sto16x8:
3994 case Iop_Widen16Sto32x4:
3995 case Iop_Widen32Sto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00003996 HReg res = newVRegV(env);
3997 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3998 UInt size;
3999 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00004000 case Iop_Widen8Sto16x8: size = 0; break;
4001 case Iop_Widen16Sto32x4: size = 1; break;
4002 case Iop_Widen32Sto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004003 default: vassert(0);
4004 }
4005 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4006 res, arg, size, True));
4007 return res;
4008 }
4009 case Iop_PwAddL8Sx16:
4010 case Iop_PwAddL16Sx8:
4011 case Iop_PwAddL32Sx4: {
4012 HReg res = newVRegV(env);
4013 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4014 UInt size = 0;
4015 switch(e->Iex.Binop.op) {
4016 case Iop_PwAddL8Sx16: size = 0; break;
4017 case Iop_PwAddL16Sx8: size = 1; break;
4018 case Iop_PwAddL32Sx4: size = 2; break;
4019 default: vassert(0);
4020 }
4021 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4022 res, arg, size, True));
4023 return res;
4024 }
4025 case Iop_PwAddL8Ux16:
4026 case Iop_PwAddL16Ux8:
4027 case Iop_PwAddL32Ux4: {
4028 HReg res = newVRegV(env);
4029 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4030 UInt size = 0;
4031 switch(e->Iex.Binop.op) {
4032 case Iop_PwAddL8Ux16: size = 0; break;
4033 case Iop_PwAddL16Ux8: size = 1; break;
4034 case Iop_PwAddL32Ux4: size = 2; break;
4035 default: vassert(0);
4036 }
4037 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4038 res, arg, size, True));
4039 return res;
4040 }
4041 case Iop_Cnt8x16: {
4042 HReg res = newVRegV(env);
4043 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4044 UInt size = 0;
4045 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4046 return res;
4047 }
4048 case Iop_Clz8Sx16:
4049 case Iop_Clz16Sx8:
4050 case Iop_Clz32Sx4: {
4051 HReg res = newVRegV(env);
4052 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4053 UInt size = 0;
4054 switch(e->Iex.Binop.op) {
4055 case Iop_Clz8Sx16: size = 0; break;
4056 case Iop_Clz16Sx8: size = 1; break;
4057 case Iop_Clz32Sx4: size = 2; break;
4058 default: vassert(0);
4059 }
4060 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4061 return res;
4062 }
4063 case Iop_Cls8Sx16:
4064 case Iop_Cls16Sx8:
4065 case Iop_Cls32Sx4: {
4066 HReg res = newVRegV(env);
4067 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4068 UInt size = 0;
4069 switch(e->Iex.Binop.op) {
4070 case Iop_Cls8Sx16: size = 0; break;
4071 case Iop_Cls16Sx8: size = 1; break;
4072 case Iop_Cls32Sx4: size = 2; break;
4073 default: vassert(0);
4074 }
4075 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4076 return res;
4077 }
4078 case Iop_FtoI32Sx4_RZ: {
4079 HReg res = newVRegV(env);
4080 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4081 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4082 res, arg, 2, True));
4083 return res;
4084 }
4085 case Iop_FtoI32Ux4_RZ: {
4086 HReg res = newVRegV(env);
4087 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4088 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4089 res, arg, 2, True));
4090 return res;
4091 }
4092 case Iop_I32StoFx4: {
4093 HReg res = newVRegV(env);
4094 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4095 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4096 res, arg, 2, True));
4097 return res;
4098 }
4099 case Iop_I32UtoFx4: {
4100 HReg res = newVRegV(env);
4101 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4102 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4103 res, arg, 2, True));
4104 return res;
4105 }
4106 case Iop_F16toF32x4: {
4107 HReg res = newVRegV(env);
4108 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4109 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4110 res, arg, 2, True));
4111 return res;
4112 }
4113 case Iop_Recip32Fx4: {
4114 HReg res = newVRegV(env);
4115 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4116 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4117 res, argL, 0, True));
4118 return res;
4119 }
4120 case Iop_Recip32x4: {
4121 HReg res = newVRegV(env);
4122 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4123 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4124 res, argL, 0, True));
4125 return res;
4126 }
4127 case Iop_Abs32Fx4: {
4128 DECLARE_PATTERN(p_vabd_32fx4);
4129 DEFINE_PATTERN(p_vabd_32fx4,
4130 unop(Iop_Abs32Fx4,
4131 binop(Iop_Sub32Fx4,
4132 bind(0),
4133 bind(1))));
4134 if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4135 HReg res = newVRegV(env);
4136 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4137 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4138 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4139 res, argL, argR, 0, True));
4140 return res;
4141 } else {
4142 HReg res = newVRegV(env);
4143 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4144 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4145 res, argL, 0, True));
4146 return res;
4147 }
4148 }
4149 case Iop_Rsqrte32Fx4: {
4150 HReg res = newVRegV(env);
4151 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4152 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4153 res, argL, 0, True));
4154 return res;
4155 }
4156 case Iop_Rsqrte32x4: {
4157 HReg res = newVRegV(env);
4158 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4159 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4160 res, argL, 0, True));
4161 return res;
4162 }
4163 case Iop_Neg32Fx4: {
4164 HReg res = newVRegV(env);
4165 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4166 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4167 res, arg, 0, True));
4168 return res;
4169 }
4170 /* ... */
4171 default:
4172 break;
4173 }
4174 }
4175
4176 if (e->tag == Iex_Binop) {
4177 switch (e->Iex.Binop.op) {
4178 case Iop_64HLtoV128:
4179 /* Try to match into single "VMOV reg, imm" instruction */
4180 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4181 e->Iex.Binop.arg2->tag == Iex_Const &&
4182 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4183 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4184 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4185 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4186 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4187 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4188 if (imm) {
4189 HReg res = newVRegV(env);
4190 addInstr(env, ARMInstr_NeonImm(res, imm));
4191 return res;
4192 }
4193 if ((imm64 >> 32) == 0LL &&
4194 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4195 HReg tmp1 = newVRegV(env);
4196 HReg tmp2 = newVRegV(env);
4197 HReg res = newVRegV(env);
4198 if (imm->type < 10) {
4199 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4200 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4201 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4202 res, tmp1, tmp2, 4, True));
4203 return res;
4204 }
4205 }
4206 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4207 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4208 HReg tmp1 = newVRegV(env);
4209 HReg tmp2 = newVRegV(env);
4210 HReg res = newVRegV(env);
4211 if (imm->type < 10) {
4212 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4213 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4214 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4215 res, tmp1, tmp2, 4, True));
4216 return res;
4217 }
4218 }
4219 }
sewardj6828dc72011-09-30 08:49:02 +00004220 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4221 it the slow way. */
4222 {
4223 /* local scope */
4224 /* Done via the stack for ease of use. */
4225 /* FIXME: assumes little endian host */
4226 HReg w3, w2, w1, w0;
4227 HReg res = newVRegV(env);
4228 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
4229 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
4230 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
4231 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4232 ARMRI84* c_16 = ARMRI84_I84(16,0);
4233 /* Make space for SP */
4234 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4235 hregARM_R13(), c_16));
4236
4237 /* Store the less significant 64 bits */
4238 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4239 addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
4240 addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
4241
4242 /* Store the more significant 64 bits */
4243 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4244 addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
4245 addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
4246
4247 /* Load result back from stack. */
4248 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4249 mkARMAModeN_R(hregARM_R13())));
4250
4251 /* Restore SP */
4252 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4253 hregARM_R13(), c_16));
4254 return res;
4255 } /* local scope */
sewardj6c60b322010-08-22 12:48:28 +00004256 goto neon_expr_bad;
4257 case Iop_AndV128: {
4258 HReg res = newVRegV(env);
4259 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4260 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4261 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4262 res, argL, argR, 4, True));
4263 return res;
4264 }
4265 case Iop_OrV128: {
4266 HReg res = newVRegV(env);
4267 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4268 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4269 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4270 res, argL, argR, 4, True));
4271 return res;
4272 }
4273 case Iop_XorV128: {
4274 HReg res = newVRegV(env);
4275 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4276 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4277 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4278 res, argL, argR, 4, True));
4279 return res;
4280 }
4281 case Iop_Add8x16:
4282 case Iop_Add16x8:
4283 case Iop_Add32x4:
4284 case Iop_Add64x2: {
4285 /*
4286 FIXME: remove this if not used
4287 DECLARE_PATTERN(p_vrhadd_32sx4);
4288 ULong one = (1LL << 32) | 1LL;
4289 DEFINE_PATTERN(p_vrhadd_32sx4,
4290 binop(Iop_Add32x4,
4291 binop(Iop_Add32x4,
4292 binop(Iop_SarN32x4,
4293 bind(0),
4294 mkU8(1)),
4295 binop(Iop_SarN32x4,
4296 bind(1),
4297 mkU8(1))),
4298 binop(Iop_SarN32x4,
4299 binop(Iop_Add32x4,
4300 binop(Iop_Add32x4,
4301 binop(Iop_AndV128,
4302 bind(0),
4303 mkU128(one)),
4304 binop(Iop_AndV128,
4305 bind(1),
4306 mkU128(one))),
4307 mkU128(one)),
4308 mkU8(1))));
4309 */
4310 HReg res = newVRegV(env);
4311 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4312 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4313 UInt size;
4314 switch (e->Iex.Binop.op) {
4315 case Iop_Add8x16: size = 0; break;
4316 case Iop_Add16x8: size = 1; break;
4317 case Iop_Add32x4: size = 2; break;
4318 case Iop_Add64x2: size = 3; break;
4319 default:
4320 ppIROp(e->Iex.Binop.op);
4321 vpanic("Illegal element size in VADD");
4322 }
4323 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4324 res, argL, argR, size, True));
4325 return res;
4326 }
4327 case Iop_Add32Fx4: {
4328 HReg res = newVRegV(env);
4329 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4330 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4331 UInt size = 0;
4332 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4333 res, argL, argR, size, True));
4334 return res;
4335 }
4336 case Iop_Recps32Fx4: {
4337 HReg res = newVRegV(env);
4338 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4339 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4340 UInt size = 0;
4341 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4342 res, argL, argR, size, True));
4343 return res;
4344 }
4345 case Iop_Rsqrts32Fx4: {
4346 HReg res = newVRegV(env);
4347 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4348 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4349 UInt size = 0;
4350 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4351 res, argL, argR, size, True));
4352 return res;
4353 }
4354 case Iop_InterleaveEvenLanes8x16:
4355 case Iop_InterleaveEvenLanes16x8:
4356 case Iop_InterleaveEvenLanes32x4:
4357 case Iop_InterleaveOddLanes8x16:
4358 case Iop_InterleaveOddLanes16x8:
4359 case Iop_InterleaveOddLanes32x4: {
4360 HReg tmp = newVRegV(env);
4361 HReg res = newVRegV(env);
4362 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4363 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4364 UInt size;
4365 UInt is_lo;
4366 switch (e->Iex.Binop.op) {
4367 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4368 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4369 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4370 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4371 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4372 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4373 default:
4374 ppIROp(e->Iex.Binop.op);
4375 vpanic("Illegal element size in VTRN");
4376 }
4377 if (is_lo) {
4378 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4379 tmp, argL, 4, True));
4380 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4381 res, argR, 4, True));
4382 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4383 res, tmp, size, True));
4384 } else {
4385 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4386 tmp, argR, 4, True));
4387 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4388 res, argL, 4, True));
4389 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4390 tmp, res, size, True));
4391 }
4392 return res;
4393 }
4394 case Iop_InterleaveHI8x16:
4395 case Iop_InterleaveHI16x8:
4396 case Iop_InterleaveHI32x4:
4397 case Iop_InterleaveLO8x16:
4398 case Iop_InterleaveLO16x8:
4399 case Iop_InterleaveLO32x4: {
4400 HReg tmp = newVRegV(env);
4401 HReg res = newVRegV(env);
4402 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4403 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4404 UInt size;
4405 UInt is_lo;
4406 switch (e->Iex.Binop.op) {
4407 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4408 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4409 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4410 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4411 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4412 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4413 default:
4414 ppIROp(e->Iex.Binop.op);
4415 vpanic("Illegal element size in VZIP");
4416 }
4417 if (is_lo) {
4418 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4419 tmp, argL, 4, True));
4420 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4421 res, argR, 4, True));
4422 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4423 res, tmp, size, True));
4424 } else {
4425 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4426 tmp, argR, 4, True));
4427 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4428 res, argL, 4, True));
4429 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4430 tmp, res, size, True));
4431 }
4432 return res;
4433 }
4434 case Iop_CatOddLanes8x16:
4435 case Iop_CatOddLanes16x8:
4436 case Iop_CatOddLanes32x4:
4437 case Iop_CatEvenLanes8x16:
4438 case Iop_CatEvenLanes16x8:
4439 case Iop_CatEvenLanes32x4: {
4440 HReg tmp = newVRegV(env);
4441 HReg res = newVRegV(env);
4442 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4443 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4444 UInt size;
4445 UInt is_lo;
4446 switch (e->Iex.Binop.op) {
4447 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4448 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4449 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4450 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4451 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4452 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4453 default:
4454 ppIROp(e->Iex.Binop.op);
4455 vpanic("Illegal element size in VUZP");
4456 }
4457 if (is_lo) {
4458 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4459 tmp, argL, 4, True));
4460 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4461 res, argR, 4, True));
4462 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4463 res, tmp, size, True));
4464 } else {
4465 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4466 tmp, argR, 4, True));
4467 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4468 res, argL, 4, True));
4469 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4470 tmp, res, size, True));
4471 }
4472 return res;
4473 }
4474 case Iop_QAdd8Ux16:
4475 case Iop_QAdd16Ux8:
4476 case Iop_QAdd32Ux4:
4477 case Iop_QAdd64Ux2: {
4478 HReg res = newVRegV(env);
4479 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4480 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4481 UInt size;
4482 switch (e->Iex.Binop.op) {
4483 case Iop_QAdd8Ux16: size = 0; break;
4484 case Iop_QAdd16Ux8: size = 1; break;
4485 case Iop_QAdd32Ux4: size = 2; break;
4486 case Iop_QAdd64Ux2: size = 3; break;
4487 default:
4488 ppIROp(e->Iex.Binop.op);
4489 vpanic("Illegal element size in VQADDU");
4490 }
4491 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4492 res, argL, argR, size, True));
4493 return res;
4494 }
4495 case Iop_QAdd8Sx16:
4496 case Iop_QAdd16Sx8:
4497 case Iop_QAdd32Sx4:
4498 case Iop_QAdd64Sx2: {
4499 HReg res = newVRegV(env);
4500 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4501 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4502 UInt size;
4503 switch (e->Iex.Binop.op) {
4504 case Iop_QAdd8Sx16: size = 0; break;
4505 case Iop_QAdd16Sx8: size = 1; break;
4506 case Iop_QAdd32Sx4: size = 2; break;
4507 case Iop_QAdd64Sx2: size = 3; break;
4508 default:
4509 ppIROp(e->Iex.Binop.op);
4510 vpanic("Illegal element size in VQADDS");
4511 }
4512 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4513 res, argL, argR, size, True));
4514 return res;
4515 }
4516 case Iop_Sub8x16:
4517 case Iop_Sub16x8:
4518 case Iop_Sub32x4:
4519 case Iop_Sub64x2: {
4520 HReg res = newVRegV(env);
4521 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4522 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4523 UInt size;
4524 switch (e->Iex.Binop.op) {
4525 case Iop_Sub8x16: size = 0; break;
4526 case Iop_Sub16x8: size = 1; break;
4527 case Iop_Sub32x4: size = 2; break;
4528 case Iop_Sub64x2: size = 3; break;
4529 default:
4530 ppIROp(e->Iex.Binop.op);
4531 vpanic("Illegal element size in VSUB");
4532 }
4533 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4534 res, argL, argR, size, True));
4535 return res;
4536 }
4537 case Iop_Sub32Fx4: {
4538 HReg res = newVRegV(env);
4539 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4540 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4541 UInt size = 0;
4542 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4543 res, argL, argR, size, True));
4544 return res;
4545 }
4546 case Iop_QSub8Ux16:
4547 case Iop_QSub16Ux8:
4548 case Iop_QSub32Ux4:
4549 case Iop_QSub64Ux2: {
4550 HReg res = newVRegV(env);
4551 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4552 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4553 UInt size;
4554 switch (e->Iex.Binop.op) {
4555 case Iop_QSub8Ux16: size = 0; break;
4556 case Iop_QSub16Ux8: size = 1; break;
4557 case Iop_QSub32Ux4: size = 2; break;
4558 case Iop_QSub64Ux2: size = 3; break;
4559 default:
4560 ppIROp(e->Iex.Binop.op);
4561 vpanic("Illegal element size in VQSUBU");
4562 }
4563 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4564 res, argL, argR, size, True));
4565 return res;
4566 }
4567 case Iop_QSub8Sx16:
4568 case Iop_QSub16Sx8:
4569 case Iop_QSub32Sx4:
4570 case Iop_QSub64Sx2: {
4571 HReg res = newVRegV(env);
4572 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4573 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4574 UInt size;
4575 switch (e->Iex.Binop.op) {
4576 case Iop_QSub8Sx16: size = 0; break;
4577 case Iop_QSub16Sx8: size = 1; break;
4578 case Iop_QSub32Sx4: size = 2; break;
4579 case Iop_QSub64Sx2: size = 3; break;
4580 default:
4581 ppIROp(e->Iex.Binop.op);
4582 vpanic("Illegal element size in VQSUBS");
4583 }
4584 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4585 res, argL, argR, size, True));
4586 return res;
4587 }
4588 case Iop_Max8Ux16:
4589 case Iop_Max16Ux8:
4590 case Iop_Max32Ux4: {
4591 HReg res = newVRegV(env);
4592 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4593 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4594 UInt size;
4595 switch (e->Iex.Binop.op) {
4596 case Iop_Max8Ux16: size = 0; break;
4597 case Iop_Max16Ux8: size = 1; break;
4598 case Iop_Max32Ux4: size = 2; break;
4599 default: vpanic("Illegal element size in VMAXU");
4600 }
4601 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4602 res, argL, argR, size, True));
4603 return res;
4604 }
4605 case Iop_Max8Sx16:
4606 case Iop_Max16Sx8:
4607 case Iop_Max32Sx4: {
4608 HReg res = newVRegV(env);
4609 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4610 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4611 UInt size;
4612 switch (e->Iex.Binop.op) {
4613 case Iop_Max8Sx16: size = 0; break;
4614 case Iop_Max16Sx8: size = 1; break;
4615 case Iop_Max32Sx4: size = 2; break;
4616 default: vpanic("Illegal element size in VMAXU");
4617 }
4618 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4619 res, argL, argR, size, True));
4620 return res;
4621 }
4622 case Iop_Min8Ux16:
4623 case Iop_Min16Ux8:
4624 case Iop_Min32Ux4: {
4625 HReg res = newVRegV(env);
4626 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4627 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4628 UInt size;
4629 switch (e->Iex.Binop.op) {
4630 case Iop_Min8Ux16: size = 0; break;
4631 case Iop_Min16Ux8: size = 1; break;
4632 case Iop_Min32Ux4: size = 2; break;
4633 default: vpanic("Illegal element size in VMAXU");
4634 }
4635 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4636 res, argL, argR, size, True));
4637 return res;
4638 }
4639 case Iop_Min8Sx16:
4640 case Iop_Min16Sx8:
4641 case Iop_Min32Sx4: {
4642 HReg res = newVRegV(env);
4643 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4644 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4645 UInt size;
4646 switch (e->Iex.Binop.op) {
4647 case Iop_Min8Sx16: size = 0; break;
4648 case Iop_Min16Sx8: size = 1; break;
4649 case Iop_Min32Sx4: size = 2; break;
4650 default: vpanic("Illegal element size in VMAXU");
4651 }
4652 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4653 res, argL, argR, size, True));
4654 return res;
4655 }
4656 case Iop_Sar8x16:
4657 case Iop_Sar16x8:
4658 case Iop_Sar32x4:
4659 case Iop_Sar64x2: {
4660 HReg res = newVRegV(env);
4661 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4662 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4663 HReg argR2 = newVRegV(env);
4664 HReg zero = newVRegV(env);
4665 UInt size;
4666 switch (e->Iex.Binop.op) {
4667 case Iop_Sar8x16: size = 0; break;
4668 case Iop_Sar16x8: size = 1; break;
4669 case Iop_Sar32x4: size = 2; break;
4670 case Iop_Sar64x2: size = 3; break;
4671 default: vassert(0);
4672 }
4673 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4674 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4675 argR2, zero, argR, size, True));
4676 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4677 res, argL, argR2, size, True));
4678 return res;
4679 }
4680 case Iop_Sal8x16:
4681 case Iop_Sal16x8:
4682 case Iop_Sal32x4:
4683 case Iop_Sal64x2: {
4684 HReg res = newVRegV(env);
4685 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4686 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4687 UInt size;
4688 switch (e->Iex.Binop.op) {
4689 case Iop_Sal8x16: size = 0; break;
4690 case Iop_Sal16x8: size = 1; break;
4691 case Iop_Sal32x4: size = 2; break;
4692 case Iop_Sal64x2: size = 3; break;
4693 default: vassert(0);
4694 }
4695 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4696 res, argL, argR, size, True));
4697 return res;
4698 }
4699 case Iop_Shr8x16:
4700 case Iop_Shr16x8:
4701 case Iop_Shr32x4:
4702 case Iop_Shr64x2: {
4703 HReg res = newVRegV(env);
4704 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4705 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4706 HReg argR2 = newVRegV(env);
4707 HReg zero = newVRegV(env);
4708 UInt size;
4709 switch (e->Iex.Binop.op) {
4710 case Iop_Shr8x16: size = 0; break;
4711 case Iop_Shr16x8: size = 1; break;
4712 case Iop_Shr32x4: size = 2; break;
4713 case Iop_Shr64x2: size = 3; break;
4714 default: vassert(0);
4715 }
4716 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4717 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4718 argR2, zero, argR, size, True));
4719 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4720 res, argL, argR2, size, True));
4721 return res;
4722 }
4723 case Iop_Shl8x16:
4724 case Iop_Shl16x8:
4725 case Iop_Shl32x4:
4726 case Iop_Shl64x2: {
4727 HReg res = newVRegV(env);
4728 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4729 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4730 UInt size;
4731 switch (e->Iex.Binop.op) {
4732 case Iop_Shl8x16: size = 0; break;
4733 case Iop_Shl16x8: size = 1; break;
4734 case Iop_Shl32x4: size = 2; break;
4735 case Iop_Shl64x2: size = 3; break;
4736 default: vassert(0);
4737 }
4738 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4739 res, argL, argR, size, True));
4740 return res;
4741 }
4742 case Iop_QShl8x16:
4743 case Iop_QShl16x8:
4744 case Iop_QShl32x4:
4745 case Iop_QShl64x2: {
4746 HReg res = newVRegV(env);
4747 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4748 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4749 UInt size;
4750 switch (e->Iex.Binop.op) {
4751 case Iop_QShl8x16: size = 0; break;
4752 case Iop_QShl16x8: size = 1; break;
4753 case Iop_QShl32x4: size = 2; break;
4754 case Iop_QShl64x2: size = 3; break;
4755 default: vassert(0);
4756 }
4757 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4758 res, argL, argR, size, True));
4759 return res;
4760 }
4761 case Iop_QSal8x16:
4762 case Iop_QSal16x8:
4763 case Iop_QSal32x4:
4764 case Iop_QSal64x2: {
4765 HReg res = newVRegV(env);
4766 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4767 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4768 UInt size;
4769 switch (e->Iex.Binop.op) {
4770 case Iop_QSal8x16: size = 0; break;
4771 case Iop_QSal16x8: size = 1; break;
4772 case Iop_QSal32x4: size = 2; break;
4773 case Iop_QSal64x2: size = 3; break;
4774 default: vassert(0);
4775 }
4776 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4777 res, argL, argR, size, True));
4778 return res;
4779 }
4780 case Iop_QShlN8x16:
4781 case Iop_QShlN16x8:
4782 case Iop_QShlN32x4:
4783 case Iop_QShlN64x2: {
4784 HReg res = newVRegV(env);
4785 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4786 UInt size, imm;
4787 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4788 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4789 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4790 "second argument only\n");
4791 }
4792 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4793 switch (e->Iex.Binop.op) {
4794 case Iop_QShlN8x16: size = 8 | imm; break;
4795 case Iop_QShlN16x8: size = 16 | imm; break;
4796 case Iop_QShlN32x4: size = 32 | imm; break;
4797 case Iop_QShlN64x2: size = 64 | imm; break;
4798 default: vassert(0);
4799 }
4800 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4801 res, argL, size, True));
4802 return res;
4803 }
4804 case Iop_QShlN8Sx16:
4805 case Iop_QShlN16Sx8:
4806 case Iop_QShlN32Sx4:
4807 case Iop_QShlN64Sx2: {
4808 HReg res = newVRegV(env);
4809 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4810 UInt size, imm;
4811 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4812 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4813 vpanic("ARM taget supports Iop_QShlNASxB with constant "
4814 "second argument only\n");
4815 }
4816 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4817 switch (e->Iex.Binop.op) {
4818 case Iop_QShlN8Sx16: size = 8 | imm; break;
4819 case Iop_QShlN16Sx8: size = 16 | imm; break;
4820 case Iop_QShlN32Sx4: size = 32 | imm; break;
4821 case Iop_QShlN64Sx2: size = 64 | imm; break;
4822 default: vassert(0);
4823 }
4824 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4825 res, argL, size, True));
4826 return res;
4827 }
4828 case Iop_QSalN8x16:
4829 case Iop_QSalN16x8:
4830 case Iop_QSalN32x4:
4831 case Iop_QSalN64x2: {
4832 HReg res = newVRegV(env);
4833 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4834 UInt size, imm;
4835 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4836 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4837 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4838 "second argument only\n");
4839 }
4840 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4841 switch (e->Iex.Binop.op) {
4842 case Iop_QSalN8x16: size = 8 | imm; break;
4843 case Iop_QSalN16x8: size = 16 | imm; break;
4844 case Iop_QSalN32x4: size = 32 | imm; break;
4845 case Iop_QSalN64x2: size = 64 | imm; break;
4846 default: vassert(0);
4847 }
4848 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4849 res, argL, size, True));
4850 return res;
4851 }
4852 case Iop_ShrN8x16:
4853 case Iop_ShrN16x8:
4854 case Iop_ShrN32x4:
4855 case Iop_ShrN64x2: {
4856 HReg res = newVRegV(env);
4857 HReg tmp = newVRegV(env);
4858 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4859 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4860 HReg argR2 = newVRegI(env);
4861 UInt size;
4862 switch (e->Iex.Binop.op) {
4863 case Iop_ShrN8x16: size = 0; break;
4864 case Iop_ShrN16x8: size = 1; break;
4865 case Iop_ShrN32x4: size = 2; break;
4866 case Iop_ShrN64x2: size = 3; break;
4867 default: vassert(0);
4868 }
4869 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4870 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4871 tmp, argR2, 0, True));
4872 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4873 res, argL, tmp, size, True));
4874 return res;
4875 }
4876 case Iop_ShlN8x16:
4877 case Iop_ShlN16x8:
4878 case Iop_ShlN32x4:
4879 case Iop_ShlN64x2: {
4880 HReg res = newVRegV(env);
4881 HReg tmp = newVRegV(env);
4882 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4883 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4884 UInt size;
4885 switch (e->Iex.Binop.op) {
4886 case Iop_ShlN8x16: size = 0; break;
4887 case Iop_ShlN16x8: size = 1; break;
4888 case Iop_ShlN32x4: size = 2; break;
4889 case Iop_ShlN64x2: size = 3; break;
4890 default: vassert(0);
4891 }
4892 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4893 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4894 res, argL, tmp, size, True));
4895 return res;
4896 }
4897 case Iop_SarN8x16:
4898 case Iop_SarN16x8:
4899 case Iop_SarN32x4:
4900 case Iop_SarN64x2: {
4901 HReg res = newVRegV(env);
4902 HReg tmp = newVRegV(env);
4903 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4904 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4905 HReg argR2 = newVRegI(env);
4906 UInt size;
4907 switch (e->Iex.Binop.op) {
4908 case Iop_SarN8x16: size = 0; break;
4909 case Iop_SarN16x8: size = 1; break;
4910 case Iop_SarN32x4: size = 2; break;
4911 case Iop_SarN64x2: size = 3; break;
4912 default: vassert(0);
4913 }
4914 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4915 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4916 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4917 res, argL, tmp, size, True));
4918 return res;
4919 }
4920 case Iop_CmpGT8Ux16:
4921 case Iop_CmpGT16Ux8:
4922 case Iop_CmpGT32Ux4: {
4923 HReg res = newVRegV(env);
4924 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4925 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4926 UInt size;
4927 switch (e->Iex.Binop.op) {
4928 case Iop_CmpGT8Ux16: size = 0; break;
4929 case Iop_CmpGT16Ux8: size = 1; break;
4930 case Iop_CmpGT32Ux4: size = 2; break;
4931 default: vassert(0);
4932 }
4933 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4934 res, argL, argR, size, True));
4935 return res;
4936 }
4937 case Iop_CmpGT8Sx16:
4938 case Iop_CmpGT16Sx8:
4939 case Iop_CmpGT32Sx4: {
4940 HReg res = newVRegV(env);
4941 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4942 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4943 UInt size;
4944 switch (e->Iex.Binop.op) {
4945 case Iop_CmpGT8Sx16: size = 0; break;
4946 case Iop_CmpGT16Sx8: size = 1; break;
4947 case Iop_CmpGT32Sx4: size = 2; break;
4948 default: vassert(0);
4949 }
4950 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4951 res, argL, argR, size, True));
4952 return res;
4953 }
4954 case Iop_CmpEQ8x16:
4955 case Iop_CmpEQ16x8:
4956 case Iop_CmpEQ32x4: {
4957 HReg res = newVRegV(env);
4958 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4959 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4960 UInt size;
4961 switch (e->Iex.Binop.op) {
4962 case Iop_CmpEQ8x16: size = 0; break;
4963 case Iop_CmpEQ16x8: size = 1; break;
4964 case Iop_CmpEQ32x4: size = 2; break;
4965 default: vassert(0);
4966 }
4967 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4968 res, argL, argR, size, True));
4969 return res;
4970 }
4971 case Iop_Mul8x16:
4972 case Iop_Mul16x8:
4973 case Iop_Mul32x4: {
4974 HReg res = newVRegV(env);
4975 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4976 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4977 UInt size = 0;
4978 switch(e->Iex.Binop.op) {
4979 case Iop_Mul8x16: size = 0; break;
4980 case Iop_Mul16x8: size = 1; break;
4981 case Iop_Mul32x4: size = 2; break;
4982 default: vassert(0);
4983 }
4984 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
4985 res, argL, argR, size, True));
4986 return res;
4987 }
4988 case Iop_Mul32Fx4: {
4989 HReg res = newVRegV(env);
4990 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4991 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4992 UInt size = 0;
4993 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
4994 res, argL, argR, size, True));
4995 return res;
4996 }
4997 case Iop_Mull8Ux8:
4998 case Iop_Mull16Ux4:
4999 case Iop_Mull32Ux2: {
5000 HReg res = newVRegV(env);
5001 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5002 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5003 UInt size = 0;
5004 switch(e->Iex.Binop.op) {
5005 case Iop_Mull8Ux8: size = 0; break;
5006 case Iop_Mull16Ux4: size = 1; break;
5007 case Iop_Mull32Ux2: size = 2; break;
5008 default: vassert(0);
5009 }
5010 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5011 res, argL, argR, size, True));
5012 return res;
5013 }
5014
5015 case Iop_Mull8Sx8:
5016 case Iop_Mull16Sx4:
5017 case Iop_Mull32Sx2: {
5018 HReg res = newVRegV(env);
5019 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5020 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5021 UInt size = 0;
5022 switch(e->Iex.Binop.op) {
5023 case Iop_Mull8Sx8: size = 0; break;
5024 case Iop_Mull16Sx4: size = 1; break;
5025 case Iop_Mull32Sx2: size = 2; break;
5026 default: vassert(0);
5027 }
5028 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5029 res, argL, argR, size, True));
5030 return res;
5031 }
5032
5033 case Iop_QDMulHi16Sx8:
5034 case Iop_QDMulHi32Sx4: {
5035 HReg res = newVRegV(env);
5036 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5037 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5038 UInt size = 0;
5039 switch(e->Iex.Binop.op) {
5040 case Iop_QDMulHi16Sx8: size = 1; break;
5041 case Iop_QDMulHi32Sx4: size = 2; break;
5042 default: vassert(0);
5043 }
5044 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5045 res, argL, argR, size, True));
5046 return res;
5047 }
5048
5049 case Iop_QRDMulHi16Sx8:
5050 case Iop_QRDMulHi32Sx4: {
5051 HReg res = newVRegV(env);
5052 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5053 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5054 UInt size = 0;
5055 switch(e->Iex.Binop.op) {
5056 case Iop_QRDMulHi16Sx8: size = 1; break;
5057 case Iop_QRDMulHi32Sx4: size = 2; break;
5058 default: vassert(0);
5059 }
5060 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5061 res, argL, argR, size, True));
5062 return res;
5063 }
5064
5065 case Iop_QDMulLong16Sx4:
5066 case Iop_QDMulLong32Sx2: {
5067 HReg res = newVRegV(env);
5068 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5069 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5070 UInt size = 0;
5071 switch(e->Iex.Binop.op) {
5072 case Iop_QDMulLong16Sx4: size = 1; break;
5073 case Iop_QDMulLong32Sx2: size = 2; break;
5074 default: vassert(0);
5075 }
5076 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5077 res, argL, argR, size, True));
5078 return res;
5079 }
5080 case Iop_PolynomialMul8x16: {
5081 HReg res = newVRegV(env);
5082 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5083 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5084 UInt size = 0;
5085 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5086 res, argL, argR, size, True));
5087 return res;
5088 }
5089 case Iop_Max32Fx4: {
5090 HReg res = newVRegV(env);
5091 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5092 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5093 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5094 res, argL, argR, 2, True));
5095 return res;
5096 }
5097 case Iop_Min32Fx4: {
5098 HReg res = newVRegV(env);
5099 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5100 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5101 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5102 res, argL, argR, 2, True));
5103 return res;
5104 }
5105 case Iop_PwMax32Fx4: {
5106 HReg res = newVRegV(env);
5107 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5108 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5109 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5110 res, argL, argR, 2, True));
5111 return res;
5112 }
5113 case Iop_PwMin32Fx4: {
5114 HReg res = newVRegV(env);
5115 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5116 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5117 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5118 res, argL, argR, 2, True));
5119 return res;
5120 }
5121 case Iop_CmpGT32Fx4: {
5122 HReg res = newVRegV(env);
5123 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5124 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5125 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5126 res, argL, argR, 2, True));
5127 return res;
5128 }
5129 case Iop_CmpGE32Fx4: {
5130 HReg res = newVRegV(env);
5131 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5132 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5133 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5134 res, argL, argR, 2, True));
5135 return res;
5136 }
5137 case Iop_CmpEQ32Fx4: {
5138 HReg res = newVRegV(env);
5139 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5140 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5141 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5142 res, argL, argR, 2, True));
5143 return res;
5144 }
5145
5146 case Iop_PolynomialMull8x8: {
5147 HReg res = newVRegV(env);
5148 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5149 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5150 UInt size = 0;
5151 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5152 res, argL, argR, size, True));
5153 return res;
5154 }
5155 case Iop_F32ToFixed32Ux4_RZ:
5156 case Iop_F32ToFixed32Sx4_RZ:
5157 case Iop_Fixed32UToF32x4_RN:
5158 case Iop_Fixed32SToF32x4_RN: {
5159 HReg res = newVRegV(env);
5160 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5161 ARMNeonUnOp op;
5162 UInt imm6;
5163 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5164 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5165 vpanic("ARM supports FP <-> Fixed conversion with constant "
5166 "second argument less than 33 only\n");
5167 }
5168 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5169 vassert(imm6 <= 32 && imm6 > 0);
5170 imm6 = 64 - imm6;
5171 switch(e->Iex.Binop.op) {
5172 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5173 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5174 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5175 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5176 default: vassert(0);
5177 }
5178 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5179 return res;
5180 }
5181 /*
5182 FIXME remove if not used
5183 case Iop_VDup8x16:
5184 case Iop_VDup16x8:
5185 case Iop_VDup32x4: {
5186 HReg res = newVRegV(env);
5187 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5188 UInt imm4;
5189 UInt index;
5190 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5191 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5192 vpanic("ARM supports Iop_VDup with constant "
5193 "second argument less than 16 only\n");
5194 }
5195 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5196 switch(e->Iex.Binop.op) {
5197 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5198 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5199 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5200 default: vassert(0);
5201 }
5202 if (imm4 >= 16) {
5203 vpanic("ARM supports Iop_VDup with constant "
5204 "second argument less than 16 only\n");
5205 }
5206 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5207 res, argL, imm4, True));
5208 return res;
5209 }
5210 */
5211 case Iop_PwAdd8x16:
5212 case Iop_PwAdd16x8:
5213 case Iop_PwAdd32x4: {
5214 HReg res = newVRegV(env);
5215 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5216 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5217 UInt size = 0;
5218 switch(e->Iex.Binop.op) {
5219 case Iop_PwAdd8x16: size = 0; break;
5220 case Iop_PwAdd16x8: size = 1; break;
5221 case Iop_PwAdd32x4: size = 2; break;
5222 default: vassert(0);
5223 }
5224 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5225 res, argL, argR, size, True));
5226 return res;
5227 }
5228 /* ... */
5229 default:
5230 break;
5231 }
5232 }
5233
5234 if (e->tag == Iex_Triop) {
5235 switch (e->Iex.Triop.op) {
5236 case Iop_ExtractV128: {
5237 HReg res = newVRegV(env);
5238 HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
5239 HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
5240 UInt imm4;
5241 if (e->Iex.Triop.arg3->tag != Iex_Const ||
5242 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
5243 vpanic("ARM target supports Iop_ExtractV128 with constant "
5244 "third argument less than 16 only\n");
5245 }
5246 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
5247 if (imm4 >= 16) {
5248 vpanic("ARM target supports Iop_ExtractV128 with constant "
5249 "third argument less than 16 only\n");
5250 }
5251 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5252 res, argL, argR, imm4, True));
5253 return res;
5254 }
5255 default:
5256 break;
5257 }
5258 }
5259
5260 if (e->tag == Iex_Mux0X) {
5261 HReg r8;
5262 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5263 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5264 HReg dst = newVRegV(env);
5265 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5266 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5267 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5268 ARMRI84_I84(0xFF,0)));
5269 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5270 return dst;
5271 }
5272
5273 neon_expr_bad:
5274 ppIRExpr(e);
5275 vpanic("iselNeonExpr_wrk");
5276}
5277
5278/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +00005279/*--- ISEL: Floating point expressions (64 bit) ---*/
5280/*---------------------------------------------------------*/
5281
5282/* Compute a 64-bit floating point value into a register, the identity
5283 of which is returned. As with iselIntExpr_R, the reg may be either
5284 real or virtual; in any case it must not be changed by subsequent
5285 code emitted by the caller. */
5286
5287static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5288{
5289 HReg r = iselDblExpr_wrk( env, e );
5290# if 0
5291 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5292# endif
5293 vassert(hregClass(r) == HRcFlt64);
5294 vassert(hregIsVirtual(r));
5295 return r;
5296}
5297
5298/* DO NOT CALL THIS DIRECTLY */
5299static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5300{
5301 IRType ty = typeOfIRExpr(env->type_env,e);
5302 vassert(e);
5303 vassert(ty == Ity_F64);
5304
5305 if (e->tag == Iex_RdTmp) {
5306 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5307 }
5308
5309 if (e->tag == Iex_Const) {
5310 /* Just handle the zero case. */
5311 IRConst* con = e->Iex.Const.con;
5312 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5313 HReg z32 = newVRegI(env);
5314 HReg dst = newVRegD(env);
5315 addInstr(env, ARMInstr_Imm32(z32, 0));
5316 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5317 return dst;
5318 }
5319 }
5320
5321 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5322 ARMAModeV* am;
5323 HReg res = newVRegD(env);
5324 vassert(e->Iex.Load.ty == Ity_F64);
5325 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5326 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5327 return res;
5328 }
5329
5330 if (e->tag == Iex_Get) {
5331 // XXX This won't work if offset > 1020 or is not 0 % 4.
5332 // In which case we'll have to generate more longwinded code.
5333 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5334 HReg res = newVRegD(env);
5335 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5336 return res;
5337 }
5338
5339 if (e->tag == Iex_Unop) {
5340 switch (e->Iex.Unop.op) {
5341 case Iop_ReinterpI64asF64: {
sewardj6c60b322010-08-22 12:48:28 +00005342 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5343 return iselNeon64Expr(env, e->Iex.Unop.arg);
5344 } else {
5345 HReg srcHi, srcLo;
5346 HReg dst = newVRegD(env);
5347 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5348 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5349 return dst;
5350 }
sewardj6c299f32009-12-31 18:00:12 +00005351 }
5352 case Iop_NegF64: {
5353 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5354 HReg dst = newVRegD(env);
5355 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5356 return dst;
5357 }
5358 case Iop_AbsF64: {
5359 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5360 HReg dst = newVRegD(env);
5361 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5362 return dst;
5363 }
5364 case Iop_F32toF64: {
5365 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5366 HReg dst = newVRegD(env);
5367 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5368 return dst;
5369 }
5370 case Iop_I32UtoF64:
5371 case Iop_I32StoF64: {
5372 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5373 HReg f32 = newVRegF(env);
5374 HReg dst = newVRegD(env);
5375 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5376 /* VMOV f32, src */
5377 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5378 /* FSITOD dst, f32 */
5379 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5380 dst, f32));
5381 return dst;
5382 }
5383 default:
5384 break;
5385 }
5386 }
5387
5388 if (e->tag == Iex_Binop) {
5389 switch (e->Iex.Binop.op) {
5390 case Iop_SqrtF64: {
5391 /* first arg is rounding mode; we ignore it. */
5392 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5393 HReg dst = newVRegD(env);
5394 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5395 return dst;
5396 }
5397 default:
5398 break;
5399 }
5400 }
5401
5402 if (e->tag == Iex_Triop) {
5403 switch (e->Iex.Triop.op) {
5404 case Iop_DivF64:
5405 case Iop_MulF64:
5406 case Iop_AddF64:
5407 case Iop_SubF64: {
5408 ARMVfpOp op = 0; /*INVALID*/
5409 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
5410 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
5411 HReg dst = newVRegD(env);
5412 switch (e->Iex.Triop.op) {
5413 case Iop_DivF64: op = ARMvfp_DIV; break;
5414 case Iop_MulF64: op = ARMvfp_MUL; break;
5415 case Iop_AddF64: op = ARMvfp_ADD; break;
5416 case Iop_SubF64: op = ARMvfp_SUB; break;
5417 default: vassert(0);
5418 }
5419 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5420 return dst;
5421 }
5422 default:
5423 break;
5424 }
5425 }
5426
5427 if (e->tag == Iex_Mux0X) {
5428 if (ty == Ity_F64
5429 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5430 HReg r8;
5431 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
5432 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
5433 HReg dst = newVRegD(env);
5434 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5435 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5436 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5437 ARMRI84_I84(0xFF,0)));
5438 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5439 return dst;
5440 }
5441 }
5442
5443 ppIRExpr(e);
5444 vpanic("iselDblExpr_wrk");
5445}
5446
5447
5448/*---------------------------------------------------------*/
5449/*--- ISEL: Floating point expressions (32 bit) ---*/
5450/*---------------------------------------------------------*/
5451
5452/* Compute a 64-bit floating point value into a register, the identity
5453 of which is returned. As with iselIntExpr_R, the reg may be either
5454 real or virtual; in any case it must not be changed by subsequent
5455 code emitted by the caller. */
5456
5457static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5458{
5459 HReg r = iselFltExpr_wrk( env, e );
5460# if 0
5461 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5462# endif
5463 vassert(hregClass(r) == HRcFlt32);
5464 vassert(hregIsVirtual(r));
5465 return r;
5466}
5467
5468/* DO NOT CALL THIS DIRECTLY */
5469static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5470{
5471 IRType ty = typeOfIRExpr(env->type_env,e);
5472 vassert(e);
5473 vassert(ty == Ity_F32);
5474
5475 if (e->tag == Iex_RdTmp) {
5476 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5477 }
5478
5479 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5480 ARMAModeV* am;
5481 HReg res = newVRegF(env);
5482 vassert(e->Iex.Load.ty == Ity_F32);
5483 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5484 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5485 return res;
5486 }
5487
5488 if (e->tag == Iex_Get) {
5489 // XXX This won't work if offset > 1020 or is not 0 % 4.
5490 // In which case we'll have to generate more longwinded code.
5491 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5492 HReg res = newVRegF(env);
5493 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5494 return res;
5495 }
5496
5497 if (e->tag == Iex_Unop) {
5498 switch (e->Iex.Unop.op) {
5499 case Iop_ReinterpI32asF32: {
5500 HReg dst = newVRegF(env);
5501 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5502 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5503 return dst;
5504 }
5505 case Iop_NegF32: {
5506 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5507 HReg dst = newVRegF(env);
5508 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5509 return dst;
5510 }
5511 case Iop_AbsF32: {
5512 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5513 HReg dst = newVRegF(env);
5514 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5515 return dst;
5516 }
5517 default:
5518 break;
5519 }
5520 }
5521
5522 if (e->tag == Iex_Binop) {
5523 switch (e->Iex.Binop.op) {
5524 case Iop_SqrtF32: {
5525 /* first arg is rounding mode; we ignore it. */
5526 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5527 HReg dst = newVRegF(env);
5528 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5529 return dst;
5530 }
5531 case Iop_F64toF32: {
5532 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5533 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5534 HReg valS = newVRegF(env);
5535 /* FCVTSD valS, valD */
5536 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5537 set_VFP_rounding_default(env);
5538 return valS;
5539 }
5540 default:
5541 break;
5542 }
5543 }
5544
5545 if (e->tag == Iex_Triop) {
5546 switch (e->Iex.Triop.op) {
5547 case Iop_DivF32:
5548 case Iop_MulF32:
5549 case Iop_AddF32:
5550 case Iop_SubF32: {
5551 ARMVfpOp op = 0; /*INVALID*/
5552 HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
5553 HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
5554 HReg dst = newVRegF(env);
5555 switch (e->Iex.Triop.op) {
5556 case Iop_DivF32: op = ARMvfp_DIV; break;
5557 case Iop_MulF32: op = ARMvfp_MUL; break;
5558 case Iop_AddF32: op = ARMvfp_ADD; break;
5559 case Iop_SubF32: op = ARMvfp_SUB; break;
5560 default: vassert(0);
5561 }
5562 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5563 return dst;
5564 }
5565 default:
5566 break;
5567 }
5568 }
5569
5570 if (e->tag == Iex_Mux0X) {
5571 if (ty == Ity_F32
5572 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5573 HReg r8;
5574 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
5575 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
5576 HReg dst = newVRegF(env);
5577 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5578 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5579 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5580 ARMRI84_I84(0xFF,0)));
5581 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5582 return dst;
5583 }
5584 }
5585
5586 ppIRExpr(e);
5587 vpanic("iselFltExpr_wrk");
5588}
5589
cerioncee30312004-12-17 20:30:21 +00005590
5591/*---------------------------------------------------------*/
5592/*--- ISEL: Statements ---*/
5593/*---------------------------------------------------------*/
5594
5595static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5596{
5597 if (vex_traceflags & VEX_TRACE_VCODE) {
5598 vex_printf("\n-- ");
5599 ppIRStmt(stmt);
5600 vex_printf("\n");
5601 }
5602 switch (stmt->tag) {
5603
5604 /* --------- STORE --------- */
5605 /* little-endian write to memory */
sewardjaf1ceca2005-06-30 23:31:27 +00005606 case Ist_Store: {
sewardj6c299f32009-12-31 18:00:12 +00005607 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5608 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5609 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00005610
sewardj6c299f32009-12-31 18:00:12 +00005611 if (tya != Ity_I32 || end != Iend_LE)
5612 goto stmt_fail;
sewardjaf1ceca2005-06-30 23:31:27 +00005613
sewardj6c299f32009-12-31 18:00:12 +00005614 if (tyd == Ity_I32) {
5615 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5616 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5617 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5618 return;
5619 }
5620 if (tyd == Ity_I16) {
5621 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5622 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5623 addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5624 False/*!isSignedLoad*/, rD, am));
5625 return;
5626 }
5627 if (tyd == Ity_I8) {
5628 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5629 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5630 addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5631 return;
5632 }
5633 if (tyd == Ity_I64) {
sewardj6c60b322010-08-22 12:48:28 +00005634 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5635 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5636 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5637 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5638 } else {
5639 HReg rDhi, rDlo, rA;
5640 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5641 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5642 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5643 ARMAMode1_RI(rA,4)));
5644 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5645 ARMAMode1_RI(rA,0)));
5646 }
sewardj6c299f32009-12-31 18:00:12 +00005647 return;
5648 }
5649 if (tyd == Ity_F64) {
5650 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5651 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5652 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5653 return;
5654 }
5655 if (tyd == Ity_F32) {
5656 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5657 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5658 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5659 return;
5660 }
sewardj6c60b322010-08-22 12:48:28 +00005661 if (tyd == Ity_V128) {
5662 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5663 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5664 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5665 return;
5666 }
cerioncee30312004-12-17 20:30:21 +00005667
sewardj6c299f32009-12-31 18:00:12 +00005668 break;
cerioncee30312004-12-17 20:30:21 +00005669 }
5670
5671 /* --------- PUT --------- */
5672 /* write guest state, fixed offset */
5673 case Ist_Put: {
5674 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
cerioncee30312004-12-17 20:30:21 +00005675
cerioncee30312004-12-17 20:30:21 +00005676 if (tyd == Ity_I32) {
sewardj6c299f32009-12-31 18:00:12 +00005677 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5678 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5679 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5680 return;
cerioncee30312004-12-17 20:30:21 +00005681 }
sewardj6c299f32009-12-31 18:00:12 +00005682 if (tyd == Ity_I64) {
sewardj6c60b322010-08-22 12:48:28 +00005683 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5684 HReg addr = newVRegI(env);
5685 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5686 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5687 stmt->Ist.Put.offset));
5688 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5689 } else {
5690 HReg rDhi, rDlo;
5691 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5692 stmt->Ist.Put.offset + 0);
5693 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5694 stmt->Ist.Put.offset + 4);
5695 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5696 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5697 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5698 }
sewardj6c299f32009-12-31 18:00:12 +00005699 return;
cerioncee30312004-12-17 20:30:21 +00005700 }
sewardj6c299f32009-12-31 18:00:12 +00005701 if (tyd == Ity_F64) {
5702 // XXX This won't work if offset > 1020 or is not 0 % 4.
5703 // In which case we'll have to generate more longwinded code.
5704 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5705 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5706 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5707 return;
cerioncee30312004-12-17 20:30:21 +00005708 }
sewardj6c299f32009-12-31 18:00:12 +00005709 if (tyd == Ity_F32) {
5710 // XXX This won't work if offset > 1020 or is not 0 % 4.
5711 // In which case we'll have to generate more longwinded code.
5712 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5713 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5714 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5715 return;
5716 }
sewardj6c60b322010-08-22 12:48:28 +00005717 if (tyd == Ity_V128) {
5718 HReg addr = newVRegI(env);
5719 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5720 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5721 stmt->Ist.Put.offset));
5722 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5723 return;
5724 }
cerioncee30312004-12-17 20:30:21 +00005725 break;
5726 }
5727
sewardj6c299f32009-12-31 18:00:12 +00005728//zz /* --------- Indexed PUT --------- */
5729//zz /* write guest state, run-time offset */
5730//zz case Ist_PutI: {
5731//zz ARMAMode2* am2
5732//zz = genGuestArrayOffset(
5733//zz env, stmt->Ist.PutI.descr,
5734//zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5735//zz
5736//zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5737//zz
5738//zz if (tyd == Ity_I8) {
5739//zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5740//zz addInstr(env, ARMInstr_StoreB(reg, am2));
5741//zz return;
5742//zz }
5743//zz// CAB: Ity_I32, Ity_I16 ?
5744//zz break;
5745//zz }
cerioncee30312004-12-17 20:30:21 +00005746
5747 /* --------- TMP --------- */
5748 /* assign value to temporary */
sewardjdd40fdf2006-12-24 02:20:24 +00005749 case Ist_WrTmp: {
5750 IRTemp tmp = stmt->Ist.WrTmp.tmp;
cerioncee30312004-12-17 20:30:21 +00005751 IRType ty = typeOfIRTemp(env->type_env, tmp);
5752
5753 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005754 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5755 env, stmt->Ist.WrTmp.data);
5756 HReg dst = lookupIRTemp(env, tmp);
5757 addInstr(env, ARMInstr_Mov(dst,ri84));
cerioncee30312004-12-17 20:30:21 +00005758 return;
5759 }
sewardj6c299f32009-12-31 18:00:12 +00005760 if (ty == Ity_I1) {
5761 HReg dst = lookupIRTemp(env, tmp);
5762 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5763 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5764 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5765 return;
5766 }
5767 if (ty == Ity_I64) {
sewardj6c60b322010-08-22 12:48:28 +00005768 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5769 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5770 HReg dst = lookupIRTemp(env, tmp);
5771 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5772 } else {
5773 HReg rHi, rLo, dstHi, dstLo;
5774 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5775 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5776 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5777 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5778 }
sewardj6c299f32009-12-31 18:00:12 +00005779 return;
5780 }
5781 if (ty == Ity_F64) {
5782 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5783 HReg dst = lookupIRTemp(env, tmp);
5784 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5785 return;
5786 }
5787 if (ty == Ity_F32) {
5788 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5789 HReg dst = lookupIRTemp(env, tmp);
5790 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5791 return;
5792 }
sewardj6c60b322010-08-22 12:48:28 +00005793 if (ty == Ity_V128) {
5794 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5795 HReg dst = lookupIRTemp(env, tmp);
5796 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5797 return;
5798 }
cerioncee30312004-12-17 20:30:21 +00005799 break;
5800 }
5801
5802 /* --------- Call to DIRTY helper --------- */
5803 /* call complex ("dirty") helper function */
5804 case Ist_Dirty: {
sewardj6c299f32009-12-31 18:00:12 +00005805 IRType retty;
5806 IRDirty* d = stmt->Ist.Dirty.details;
5807 Bool passBBP = False;
cerioncee30312004-12-17 20:30:21 +00005808
5809 if (d->nFxState == 0)
5810 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00005811
5812 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
cerioncee30312004-12-17 20:30:21 +00005813
5814 /* Marshal args, do the call, clear stack. */
sewardj6c299f32009-12-31 18:00:12 +00005815 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5816 if (!ok)
5817 break; /* will go to stmt_fail: */
cerioncee30312004-12-17 20:30:21 +00005818
5819 /* Now figure out what to do with the returned value, if any. */
5820 if (d->tmp == IRTemp_INVALID)
sewardj6c299f32009-12-31 18:00:12 +00005821 /* No return value. Nothing to do. */
5822 return;
cerioncee30312004-12-17 20:30:21 +00005823
sewardj6c299f32009-12-31 18:00:12 +00005824 retty = typeOfIRTemp(env->type_env, d->tmp);
cerioncee30312004-12-17 20:30:21 +00005825
sewardj6c299f32009-12-31 18:00:12 +00005826 if (retty == Ity_I64) {
sewardj6c60b322010-08-22 12:48:28 +00005827 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5828 HReg tmp = lookupIRTemp(env, d->tmp);
5829 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5830 hregARM_R0()));
5831 } else {
5832 HReg dstHi, dstLo;
5833 /* The returned value is in r1:r0. Park it in the
5834 register-pair associated with tmp. */
5835 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5836 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5837 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5838 }
cerioncee30312004-12-17 20:30:21 +00005839 return;
5840 }
sewardj6c299f32009-12-31 18:00:12 +00005841 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5842 /* The returned value is in r0. Park it in the register
5843 associated with tmp. */
5844 HReg dst = lookupIRTemp(env, d->tmp);
5845 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5846 return;
5847 }
5848
cerioncee30312004-12-17 20:30:21 +00005849 break;
5850 }
5851
sewardj6c299f32009-12-31 18:00:12 +00005852 /* --------- Load Linked and Store Conditional --------- */
5853 case Ist_LLSC: {
5854 if (stmt->Ist.LLSC.storedata == NULL) {
5855 /* LL */
5856 IRTemp res = stmt->Ist.LLSC.result;
5857 IRType ty = typeOfIRTemp(env->type_env, res);
sewardjff7f5b72011-07-11 11:43:38 +00005858 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005859 Int szB = 0;
5860 HReg r_dst = lookupIRTemp(env, res);
5861 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5862 switch (ty) {
5863 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005864 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005865 case Ity_I32: szB = 4; break;
5866 default: vassert(0);
5867 }
sewardjff7f5b72011-07-11 11:43:38 +00005868 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
sewardj6c299f32009-12-31 18:00:12 +00005869 addInstr(env, ARMInstr_LdrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005870 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
sewardj6c299f32009-12-31 18:00:12 +00005871 return;
5872 }
sewardjff7f5b72011-07-11 11:43:38 +00005873 if (ty == Ity_I64) {
5874 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5875 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5876 addInstr(env, ARMInstr_LdrEX(8));
5877 /* Result is in r3:r2. On a non-NEON capable CPU, we must
5878 move it into a result register pair. On a NEON capable
5879 CPU, the result register will be a 64 bit NEON
5880 register, so we must move it there instead. */
5881 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5882 HReg dst = lookupIRTemp(env, res);
5883 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
5884 hregARM_R2()));
5885 } else {
5886 HReg r_dst_hi, r_dst_lo;
5887 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
5888 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
5889 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
5890 }
5891 return;
5892 }
5893 /*NOTREACHED*/
5894 vassert(0);
sewardj6c299f32009-12-31 18:00:12 +00005895 } else {
5896 /* SC */
sewardj6c299f32009-12-31 18:00:12 +00005897 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
sewardjff7f5b72011-07-11 11:43:38 +00005898 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
5899 Int szB = 0;
5900 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5901 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
sewardj6c299f32009-12-31 18:00:12 +00005902 switch (tyd) {
5903 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005904 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005905 case Ity_I32: szB = 4; break;
5906 default: vassert(0);
5907 }
sewardjff7f5b72011-07-11 11:43:38 +00005908 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5909 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
sewardj6c299f32009-12-31 18:00:12 +00005910 addInstr(env, ARMInstr_StrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005911 } else {
5912 vassert(tyd == Ity_I64);
5913 /* This is really ugly. There is no is/is-not NEON
5914 decision akin to the case for LL, because iselInt64Expr
5915 fudges this for us, and always gets the result into two
5916 GPRs even if this means moving it from a NEON
5917 register. */
5918 HReg rDhi, rDlo;
5919 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
5920 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5921 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
5922 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
5923 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5924 addInstr(env, ARMInstr_StrEX(8));
sewardj6c299f32009-12-31 18:00:12 +00005925 }
sewardjff7f5b72011-07-11 11:43:38 +00005926 /* now r0 is 1 if failed, 0 if success. Change to IR
5927 conventions (0 is fail, 1 is success). Also transfer
5928 result to r_res. */
5929 IRTemp res = stmt->Ist.LLSC.result;
5930 IRType ty = typeOfIRTemp(env->type_env, res);
5931 HReg r_res = lookupIRTemp(env, res);
5932 ARMRI84* one = ARMRI84_I84(1,0);
5933 vassert(ty == Ity_I1);
5934 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
5935 /* And be conservative -- mask off all but the lowest bit */
5936 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5937 return;
sewardj6c299f32009-12-31 18:00:12 +00005938 }
5939 break;
5940 }
5941
sewardj412098c2010-05-04 08:48:43 +00005942 /* --------- MEM FENCE --------- */
5943 case Ist_MBE:
5944 switch (stmt->Ist.MBE.event) {
5945 case Imbe_Fence:
sewardj6d615ba2011-09-26 16:19:43 +00005946 addInstr(env, ARMInstr_MFence());
5947 return;
5948 case Imbe_CancelReservation:
5949 addInstr(env, ARMInstr_CLREX());
sewardj412098c2010-05-04 08:48:43 +00005950 return;
5951 default:
5952 break;
5953 }
5954 break;
5955
sewardj6c299f32009-12-31 18:00:12 +00005956 /* --------- INSTR MARK --------- */
5957 /* Doesn't generate any executable code ... */
5958 case Ist_IMark:
5959 return;
5960
5961 /* --------- NO-OP --------- */
5962 case Ist_NoOp:
5963 return;
5964
cerioncee30312004-12-17 20:30:21 +00005965 /* --------- EXIT --------- */
cerioncee30312004-12-17 20:30:21 +00005966 case Ist_Exit: {
sewardj6c299f32009-12-31 18:00:12 +00005967 HReg gnext;
cerioncee30312004-12-17 20:30:21 +00005968 ARMCondCode cc;
5969 if (stmt->Ist.Exit.dst->tag != Ico_U32)
5970 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
sewardj6c299f32009-12-31 18:00:12 +00005971 gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
5972 cc = iselCondCode(env, stmt->Ist.Exit.guard);
5973 addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5974 addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
cerioncee30312004-12-17 20:30:21 +00005975 return;
5976 }
5977
5978 default: break;
5979 }
sewardjaf1ceca2005-06-30 23:31:27 +00005980 stmt_fail:
cerioncee30312004-12-17 20:30:21 +00005981 ppIRStmt(stmt);
5982 vpanic("iselStmt");
5983}
5984
5985
5986/*---------------------------------------------------------*/
5987/*--- ISEL: Basic block terminators (Nexts) ---*/
5988/*---------------------------------------------------------*/
5989
5990static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
5991{
sewardj6c299f32009-12-31 18:00:12 +00005992 HReg rDst;
5993 if (vex_traceflags & VEX_TRACE_VCODE) {
5994 vex_printf("\n-- goto {");
5995 ppIRJumpKind(jk);
5996 vex_printf("} ");
5997 ppIRExpr(next);
5998 vex_printf("\n");
5999 }
6000 rDst = iselIntExpr_R(env, next);
6001 addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
6002 addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
cerioncee30312004-12-17 20:30:21 +00006003}
6004
6005
6006/*---------------------------------------------------------*/
6007/*--- Insn selector top-level ---*/
6008/*---------------------------------------------------------*/
6009
sewardjdd40fdf2006-12-24 02:20:24 +00006010/* Translate an entire SB to arm code. */
cerioncee30312004-12-17 20:30:21 +00006011
sewardj6c299f32009-12-31 18:00:12 +00006012HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host,
6013 VexArchInfo* archinfo_host,
6014 VexAbiInfo* vbi/*UNUSED*/ )
cerioncee30312004-12-17 20:30:21 +00006015{
sewardj6c299f32009-12-31 18:00:12 +00006016 Int i, j;
6017 HReg hreg, hregHI;
6018 ISelEnv* env;
6019 UInt hwcaps_host = archinfo_host->hwcaps;
sewardj6c60b322010-08-22 12:48:28 +00006020 static UInt counter = 0;
cerioncee30312004-12-17 20:30:21 +00006021
sewardj6c299f32009-12-31 18:00:12 +00006022 /* sanity ... */
6023 vassert(arch_host == VexArchARM);
sewardj6c60b322010-08-22 12:48:28 +00006024
6025 /* hwcaps should not change from one ISEL call to another. */
6026 arm_hwcaps = hwcaps_host;
cerioncee30312004-12-17 20:30:21 +00006027
sewardj6c299f32009-12-31 18:00:12 +00006028 /* Make up an initial environment to use. */
6029 env = LibVEX_Alloc(sizeof(ISelEnv));
6030 env->vreg_ctr = 0;
6031
6032 /* Set up output code array. */
6033 env->code = newHInstrArray();
cerioncee30312004-12-17 20:30:21 +00006034
sewardj6c299f32009-12-31 18:00:12 +00006035 /* Copy BB's type env. */
6036 env->type_env = bb->tyenv;
cerioncee30312004-12-17 20:30:21 +00006037
sewardj6c299f32009-12-31 18:00:12 +00006038 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6039 change as we go along. */
6040 env->n_vregmap = bb->tyenv->types_used;
6041 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6042 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
cerioncee30312004-12-17 20:30:21 +00006043
sewardj6c299f32009-12-31 18:00:12 +00006044 /* For each IR temporary, allocate a suitably-kinded virtual
6045 register. */
6046 j = 0;
6047 for (i = 0; i < env->n_vregmap; i++) {
6048 hregHI = hreg = INVALID_HREG;
6049 switch (bb->tyenv->types[i]) {
6050 case Ity_I1:
6051 case Ity_I8:
6052 case Ity_I16:
6053 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
sewardj6c60b322010-08-22 12:48:28 +00006054 case Ity_I64:
6055 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
6056 hreg = mkHReg(j++, HRcFlt64, True);
sewardj6c60b322010-08-22 12:48:28 +00006057 } else {
6058 hregHI = mkHReg(j++, HRcInt32, True);
6059 hreg = mkHReg(j++, HRcInt32, True);
6060 }
6061 break;
sewardj6c299f32009-12-31 18:00:12 +00006062 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
6063 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
sewardj06122e72011-03-28 12:14:48 +00006064 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardj6c299f32009-12-31 18:00:12 +00006065 default: ppIRType(bb->tyenv->types[i]);
6066 vpanic("iselBB: IRTemp type");
6067 }
6068 env->vregmap[i] = hreg;
6069 env->vregmapHI[i] = hregHI;
6070 }
6071 env->vreg_ctr = j;
cerioncee30312004-12-17 20:30:21 +00006072
sewardj6c299f32009-12-31 18:00:12 +00006073 /* Keep a copy of the link reg, since any call to a helper function
6074 will trash it, and we can't get back to the dispatcher once that
6075 happens. */
6076 env->savedLR = newVRegI(env);
6077 addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
cerioncee30312004-12-17 20:30:21 +00006078
sewardj6c299f32009-12-31 18:00:12 +00006079 /* Ok, finally we can iterate over the statements. */
6080 for (i = 0; i < bb->stmts_used; i++)
6081 iselStmt(env,bb->stmts[i]);
6082
6083 iselNext(env,bb->next,bb->jumpkind);
6084
6085 /* record the number of vregs we used. */
6086 env->code->n_vregs = env->vreg_ctr;
sewardj6c60b322010-08-22 12:48:28 +00006087 counter++;
sewardj6c299f32009-12-31 18:00:12 +00006088 return env->code;
cerioncee30312004-12-17 20:30:21 +00006089}
6090
6091
cerioncee30312004-12-17 20:30:21 +00006092/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00006093/*--- end host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00006094/*---------------------------------------------------------------*/