blob: b19e6c4efe83f3879e11384e6cc0e4c6647a1e9f [file] [log] [blame]
cerioncee30312004-12-17 20:30:21 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
cerioncee30312004-12-17 20:30:21 +00009
sewardj25e54732012-08-05 15:36:51 +000010 Copyright (C) 2004-2012 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardj64733c42010-10-12 10:10:46 +000012
13 NEON support is
sewardj25e54732012-08-05 15:36:51 +000014 Copyright (C) 2010-2012 Samsung Electronics
sewardj64733c42010-10-12 10:10:46 +000015 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
cerioncee30312004-12-17 20:30:21 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
cerioncee30312004-12-17 20:30:21 +000022
sewardj752f9062010-05-03 21:38:49 +000023 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000031 02110-1301, USA.
32
sewardj752f9062010-05-03 21:38:49 +000033 The GNU General Public License is contained in the file COPYING.
cerioncee30312004-12-17 20:30:21 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
sewardj6c60b322010-08-22 12:48:28 +000039#include "ir_match.h"
cerioncee30312004-12-17 20:30:21 +000040
sewardjcef7d3e2009-07-02 12:21:59 +000041#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
sewardje2ea1762010-09-22 00:56:37 +000044#include "host_generic_simd64.h" // for 32-bit SIMD helpers
sewardjcef7d3e2009-07-02 12:21:59 +000045#include "host_arm_defs.h"
cerioncee30312004-12-17 20:30:21 +000046
47
cerioncee30312004-12-17 20:30:21 +000048/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +000049/*--- ARMvfp control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
56
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +000066/*--- ISelEnv ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
74
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
80
81 - vregmap holds the primary register for the IRTemp.
sewardj6c299f32009-12-31 18:00:12 +000082 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
86
cerioncee30312004-12-17 20:30:21 +000087 - The code array, that is, the insns selected so far.
88
89 - A counter, for generating new virtual registers.
90
sewardj6c299f32009-12-31 18:00:12 +000091 - The host hardware capabilities word. This is set at the start
92 and does not change.
93
sewardjc6f970f2012-04-02 21:54:49 +000094 - A Bool for indicating whether we may generate chain-me
95 instructions for control flow transfers, or whether we must use
96 XAssisted.
97
98 - The maximum guest address of any guest insn in this block.
99 Actually, the address of the highest-addressed byte from any insn
100 in this block. Is set at the start and does not change. This is
101 used for detecting jumps which are definitely forward-edges from
102 this block, and therefore can be made (chained) to the fast entry
103 point of the destination, thereby avoiding the destination's
104 event check.
105
106 Note, this is all (well, mostly) host-independent.
107*/
cerioncee30312004-12-17 20:30:21 +0000108
109typedef
110 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000111 /* Constant -- are set at the start and do not change. */
cerioncee30312004-12-17 20:30:21 +0000112 IRTypeEnv* type_env;
113
114 HReg* vregmap;
sewardj6c299f32009-12-31 18:00:12 +0000115 HReg* vregmapHI;
cerioncee30312004-12-17 20:30:21 +0000116 Int n_vregmap;
117
sewardj6c299f32009-12-31 18:00:12 +0000118 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000119
120 Bool chainingAllowed;
121 Addr64 max_ga;
122
123 /* These are modified as we go along. */
124 HInstrArray* code;
125 Int vreg_ctr;
cerioncee30312004-12-17 20:30:21 +0000126 }
127 ISelEnv;
128
129static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130{
131 vassert(tmp >= 0);
132 vassert(tmp < env->n_vregmap);
133 return env->vregmap[tmp];
134}
135
sewardj6c299f32009-12-31 18:00:12 +0000136static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137{
138 vassert(tmp >= 0);
139 vassert(tmp < env->n_vregmap);
140 vassert(env->vregmapHI[tmp] != INVALID_HREG);
141 *vrLO = env->vregmap[tmp];
142 *vrHI = env->vregmapHI[tmp];
143}
144
cerioncee30312004-12-17 20:30:21 +0000145static void addInstr ( ISelEnv* env, ARMInstr* instr )
146{
147 addHInstr(env->code, instr);
148 if (vex_traceflags & VEX_TRACE_VCODE) {
149 ppARMInstr(instr);
150 vex_printf("\n");
151 }
sewardj6c60b322010-08-22 12:48:28 +0000152#if 0
153 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
154 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
155 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
156 ppARMInstr(instr);
157 vex_printf("\n");
158 }
159#endif
cerioncee30312004-12-17 20:30:21 +0000160}
161
162static HReg newVRegI ( ISelEnv* env )
163{
164 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
165 env->vreg_ctr++;
166 return reg;
167}
168
sewardj6c299f32009-12-31 18:00:12 +0000169static HReg newVRegD ( ISelEnv* env )
170{
171 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174}
175
176static HReg newVRegF ( ISelEnv* env )
177{
178 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
179 env->vreg_ctr++;
180 return reg;
181}
cerioncee30312004-12-17 20:30:21 +0000182
sewardj6c60b322010-08-22 12:48:28 +0000183static HReg newVRegV ( ISelEnv* env )
184{
185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186 env->vreg_ctr++;
187 return reg;
188}
189
190/* These are duplicated in guest_arm_toIR.c */
191static IRExpr* unop ( IROp op, IRExpr* a )
192{
193 return IRExpr_Unop(op, a);
194}
195
196static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
197{
198 return IRExpr_Binop(op, a1, a2);
199}
200
sewardj6c60b322010-08-22 12:48:28 +0000201static IRExpr* bind ( Int binder )
202{
203 return IRExpr_Binder(binder);
204}
205
cerioncee30312004-12-17 20:30:21 +0000206
207/*---------------------------------------------------------*/
208/*--- ISEL: Forward declarations ---*/
209/*---------------------------------------------------------*/
210
211/* These are organised as iselXXX and iselXXX_wrk pairs. The
212 iselXXX_wrk do the real work, but are not to be called directly.
213 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
214 checks that all returned registers are virtual. You should not
215 call the _wrk version directly.
216*/
sewardj6c299f32009-12-31 18:00:12 +0000217static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
218static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000219
sewardj6c299f32009-12-31 18:00:12 +0000220static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
221static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000222
sewardj6c299f32009-12-31 18:00:12 +0000223static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
224static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000225
sewardjff7f5b72011-07-11 11:43:38 +0000226static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
227static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
sewardj6c60b322010-08-22 12:48:28 +0000228
sewardj6c299f32009-12-31 18:00:12 +0000229static ARMRI84* iselIntExpr_RI84_wrk
230 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
231static ARMRI84* iselIntExpr_RI84
232 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000233
sewardj6c299f32009-12-31 18:00:12 +0000234static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
235static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000236
sewardj6c299f32009-12-31 18:00:12 +0000237static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
238static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000239
sewardj6c299f32009-12-31 18:00:12 +0000240static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
241static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
242
243static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
244 ISelEnv* env, IRExpr* e );
245static void iselInt64Expr ( HReg* rHi, HReg* rLo,
246 ISelEnv* env, IRExpr* e );
247
248static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
250
251static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
252static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000253
sewardj6c60b322010-08-22 12:48:28 +0000254static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
255static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
256
257static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
258static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000259
260/*---------------------------------------------------------*/
261/*--- ISEL: Misc helpers ---*/
262/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000263
264static UInt ROR32 ( UInt x, UInt sh ) {
265 vassert(sh >= 0 && sh < 32);
266 if (sh == 0)
267 return x;
268 else
269 return (x << (32-sh)) | (x >> sh);
cerioncee30312004-12-17 20:30:21 +0000270}
sewardj6c299f32009-12-31 18:00:12 +0000271
272/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
273 form, and if so return the components. */
274static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
275{
276 UInt i;
277 for (i = 0; i < 16; i++) {
278 if (0 == (u & 0xFFFFFF00)) {
279 *u8 = u;
280 *u4 = i;
281 return True;
282 }
283 u = ROR32(u, 30);
284 }
285 vassert(i == 16);
286 return False;
287}
cerioncee30312004-12-17 20:30:21 +0000288
289/* Make a int reg-reg move. */
sewardj6c299f32009-12-31 18:00:12 +0000290static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
cerioncee30312004-12-17 20:30:21 +0000291{
292 vassert(hregClass(src) == HRcInt32);
293 vassert(hregClass(dst) == HRcInt32);
sewardj6c299f32009-12-31 18:00:12 +0000294 return ARMInstr_Mov(dst, ARMRI84_R(src));
cerioncee30312004-12-17 20:30:21 +0000295}
296
sewardj6c299f32009-12-31 18:00:12 +0000297/* Set the VFP unit's rounding mode to default (round to nearest). */
298static void set_VFP_rounding_default ( ISelEnv* env )
cerioncee30312004-12-17 20:30:21 +0000299{
sewardj6c299f32009-12-31 18:00:12 +0000300 /* mov rTmp, #DEFAULT_FPSCR
301 fmxr fpscr, rTmp
302 */
303 HReg rTmp = newVRegI(env);
304 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
305 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
cerioncee30312004-12-17 20:30:21 +0000306}
307
sewardj6c299f32009-12-31 18:00:12 +0000308/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
309 expression denoting a value in the range 0 .. 3, indicating a round
310 mode encoded as per type IRRoundingMode. Set FPSCR to have the
311 same rounding.
312*/
313static
314void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
cerioncee30312004-12-17 20:30:21 +0000315{
sewardj6c299f32009-12-31 18:00:12 +0000316 /* This isn't simple, because 'mode' carries an IR rounding
317 encoding, and we need to translate that to an ARMvfp one:
318 The IR encoding:
319 00 to nearest (the default)
320 10 to +infinity
321 01 to -infinity
322 11 to zero
323 The ARMvfp encoding:
324 00 to nearest
325 01 to +infinity
326 10 to -infinity
327 11 to zero
328 Easy enough to do; just swap the two bits.
329 */
330 HReg irrm = iselIntExpr_R(env, mode);
331 HReg tL = newVRegI(env);
332 HReg tR = newVRegI(env);
333 HReg t3 = newVRegI(env);
334 /* tL = irrm << 1;
335 tR = irrm >> 1; if we're lucky, these will issue together
336 tL &= 2;
337 tR &= 1; ditto
338 t3 = tL | tR;
339 t3 <<= 22;
340 fmxr fpscr, t3
341 */
342 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
343 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
344 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
345 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
346 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
347 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
348 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
cerioncee30312004-12-17 20:30:21 +0000349}
cerioncee30312004-12-17 20:30:21 +0000350
cerioncee30312004-12-17 20:30:21 +0000351
sewardj6c299f32009-12-31 18:00:12 +0000352/*---------------------------------------------------------*/
353/*--- ISEL: Function call helpers ---*/
354/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +0000355
cerioncee30312004-12-17 20:30:21 +0000356/* Used only in doHelperCall. See big comment in doHelperCall re
sewardj6c299f32009-12-31 18:00:12 +0000357 handling of register-parameter args. This function figures out
358 whether evaluation of an expression might require use of a fixed
359 register. If in doubt return True (safe but suboptimal).
cerioncee30312004-12-17 20:30:21 +0000360*/
361static
362Bool mightRequireFixedRegs ( IRExpr* e )
363{
364 switch (e->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000365 case Iex_RdTmp: case Iex_Const: case Iex_Get:
366 return False;
367 default:
368 return True;
cerioncee30312004-12-17 20:30:21 +0000369 }
370}
sewardj6c299f32009-12-31 18:00:12 +0000371
cerioncee30312004-12-17 20:30:21 +0000372
373/* Do a complete function call. guard is a Ity_Bit expression
374 indicating whether or not the call happens. If guard==NULL, the
sewardj6c299f32009-12-31 18:00:12 +0000375 call is unconditional. Returns True iff it managed to handle this
376 combination of arg/return types, else returns False. */
cerioncee30312004-12-17 20:30:21 +0000377
378static
sewardj6c299f32009-12-31 18:00:12 +0000379Bool doHelperCall ( ISelEnv* env,
380 Bool passBBP,
sewardjcfe046e2013-01-17 14:23:53 +0000381 IRExpr* guard, IRCallee* cee, IRExpr** args,
382 RetLoc rloc )
cerioncee30312004-12-17 20:30:21 +0000383{
cerioncee30312004-12-17 20:30:21 +0000384 ARMCondCode cc;
sewardj6c299f32009-12-31 18:00:12 +0000385 HReg argregs[ARM_N_ARGREGS];
386 HReg tmpregs[ARM_N_ARGREGS];
387 Bool go_fast;
388 Int n_args, i, nextArgReg;
389 ULong target;
cerioncee30312004-12-17 20:30:21 +0000390
sewardj6c299f32009-12-31 18:00:12 +0000391 vassert(ARM_N_ARGREGS == 4);
cerioncee30312004-12-17 20:30:21 +0000392
sewardj6c299f32009-12-31 18:00:12 +0000393 /* Marshal args for a call and do the call.
cerioncee30312004-12-17 20:30:21 +0000394
sewardj6c299f32009-12-31 18:00:12 +0000395 If passBBP is True, r8 (the baseblock pointer) is to be passed
396 as the first arg.
cerioncee30312004-12-17 20:30:21 +0000397
sewardj6c299f32009-12-31 18:00:12 +0000398 This function only deals with a tiny set of possibilities, which
399 cover all helpers in practice. The restrictions are that only
400 arguments in registers are supported, hence only ARM_N_REGPARMS
401 x 32 integer bits in total can be passed. In fact the only
402 supported arg types are I32 and I64.
cerioncee30312004-12-17 20:30:21 +0000403
sewardj6c299f32009-12-31 18:00:12 +0000404 Generating code which is both efficient and correct when
405 parameters are to be passed in registers is difficult, for the
406 reasons elaborated in detail in comments attached to
407 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
408 of the method described in those comments.
cerioncee30312004-12-17 20:30:21 +0000409
sewardj6c299f32009-12-31 18:00:12 +0000410 The problem is split into two cases: the fast scheme and the
411 slow scheme. In the fast scheme, arguments are computed
412 directly into the target (real) registers. This is only safe
413 when we can be sure that computation of each argument will not
414 trash any real registers set by computation of any other
415 argument.
cerioncee30312004-12-17 20:30:21 +0000416
sewardj6c299f32009-12-31 18:00:12 +0000417 In the slow scheme, all args are first computed into vregs, and
418 once they are all done, they are moved to the relevant real
419 regs. This always gives correct code, but it also gives a bunch
420 of vreg-to-rreg moves which are usually redundant but are hard
421 for the register allocator to get rid of.
422
423 To decide which scheme to use, all argument expressions are
424 first examined. If they are all so simple that it is clear they
425 will be evaluated without use of any fixed registers, use the
426 fast scheme, else use the slow scheme. Note also that only
427 unconditional calls may use the fast scheme, since having to
428 compute a condition expression could itself trash real
429 registers.
cerioncee30312004-12-17 20:30:21 +0000430
431 Note this requires being able to examine an expression and
432 determine whether or not evaluation of it might use a fixed
sewardj6c299f32009-12-31 18:00:12 +0000433 register. That requires knowledge of how the rest of this insn
434 selector works. Currently just the following 3 are regarded as
435 safe -- hopefully they cover the majority of arguments in
436 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
cerioncee30312004-12-17 20:30:21 +0000437 */
cerioncee30312004-12-17 20:30:21 +0000438
sewardj6c299f32009-12-31 18:00:12 +0000439 /* Note that the cee->regparms field is meaningless on ARM hosts
440 (since there is only one calling convention) and so we always
441 ignore it. */
cerioncee30312004-12-17 20:30:21 +0000442
sewardj6c299f32009-12-31 18:00:12 +0000443 n_args = 0;
444 for (i = 0; args[i]; i++)
445 n_args++;
cerioncee30312004-12-17 20:30:21 +0000446
sewardj6c299f32009-12-31 18:00:12 +0000447 argregs[0] = hregARM_R0();
448 argregs[1] = hregARM_R1();
449 argregs[2] = hregARM_R2();
450 argregs[3] = hregARM_R3();
cerioncee30312004-12-17 20:30:21 +0000451
sewardj6c299f32009-12-31 18:00:12 +0000452 tmpregs[0] = tmpregs[1] = tmpregs[2] =
453 tmpregs[3] = INVALID_HREG;
cerioncee30312004-12-17 20:30:21 +0000454
sewardj6c299f32009-12-31 18:00:12 +0000455 /* First decide which scheme (slow or fast) is to be used. First
456 assume the fast scheme, and select slow if any contraindications
457 (wow) appear. */
458
459 go_fast = True;
460
461 if (guard) {
462 if (guard->tag == Iex_Const
463 && guard->Iex.Const.con->tag == Ico_U1
464 && guard->Iex.Const.con->Ico.U1 == True) {
465 /* unconditional */
466 } else {
467 /* Not manifestly unconditional -- be conservative. */
468 go_fast = False;
469 }
cerioncee30312004-12-17 20:30:21 +0000470 }
471
sewardj6c299f32009-12-31 18:00:12 +0000472 if (go_fast) {
473 for (i = 0; i < n_args; i++) {
cerioncee30312004-12-17 20:30:21 +0000474 if (mightRequireFixedRegs(args[i])) {
sewardj6c299f32009-12-31 18:00:12 +0000475 go_fast = False;
cerioncee30312004-12-17 20:30:21 +0000476 break;
477 }
478 }
sewardj6c299f32009-12-31 18:00:12 +0000479 }
480 /* At this point the scheme to use has been established. Generate
481 code to get the arg values into the argument rregs. If we run
482 out of arg regs, give up. */
cerioncee30312004-12-17 20:30:21 +0000483
sewardj6c299f32009-12-31 18:00:12 +0000484 if (go_fast) {
cerioncee30312004-12-17 20:30:21 +0000485
sewardj6c299f32009-12-31 18:00:12 +0000486 /* FAST SCHEME */
487 nextArgReg = 0;
cerioncee30312004-12-17 20:30:21 +0000488 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000489 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
490 hregARM_R8() ));
491 nextArgReg++;
cerioncee30312004-12-17 20:30:21 +0000492 }
493
sewardj6c299f32009-12-31 18:00:12 +0000494 for (i = 0; i < n_args; i++) {
495 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
496 if (nextArgReg >= ARM_N_ARGREGS)
497 return False; /* out of argregs */
498 if (aTy == Ity_I32) {
499 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
500 iselIntExpr_R(env, args[i]) ));
501 nextArgReg++;
502 }
503 else if (aTy == Ity_I64) {
504 /* 64-bit args must be passed in an a reg-pair of the form
505 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
506 On a little-endian host, the less significant word is
507 passed in the lower-numbered register. */
508 if (nextArgReg & 1) {
509 if (nextArgReg >= ARM_N_ARGREGS)
510 return False; /* out of argregs */
511 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
512 nextArgReg++;
513 }
514 if (nextArgReg >= ARM_N_ARGREGS)
515 return False; /* out of argregs */
516 HReg raHi, raLo;
517 iselInt64Expr(&raHi, &raLo, env, args[i]);
518 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
519 nextArgReg++;
520 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
521 nextArgReg++;
522 }
523 else
524 return False; /* unhandled arg type */
525 }
526
527 /* Fast scheme only applies for unconditional calls. Hence: */
528 cc = ARMcc_AL;
cerioncee30312004-12-17 20:30:21 +0000529
530 } else {
531
sewardj6c299f32009-12-31 18:00:12 +0000532 /* SLOW SCHEME; move via temporaries */
533 nextArgReg = 0;
534
cerioncee30312004-12-17 20:30:21 +0000535 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000536 /* This is pretty stupid; better to move directly to r0
537 after the rest of the args are done. */
538 tmpregs[nextArgReg] = newVRegI(env);
539 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
540 hregARM_R8() ));
541 nextArgReg++;
542 }
543
544 for (i = 0; i < n_args; i++) {
545 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
546 if (nextArgReg >= ARM_N_ARGREGS)
547 return False; /* out of argregs */
548 if (aTy == Ity_I32) {
549 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
550 nextArgReg++;
551 }
552 else if (aTy == Ity_I64) {
553 /* Same comment applies as in the Fast-scheme case. */
554 if (nextArgReg & 1)
555 nextArgReg++;
556 if (nextArgReg + 1 >= ARM_N_ARGREGS)
557 return False; /* out of argregs */
558 HReg raHi, raLo;
559 iselInt64Expr(&raHi, &raLo, env, args[i]);
560 tmpregs[nextArgReg] = raLo;
561 nextArgReg++;
562 tmpregs[nextArgReg] = raHi;
563 nextArgReg++;
564 }
565 }
566
567 /* Now we can compute the condition. We can't do it earlier
568 because the argument computations could trash the condition
569 codes. Be a bit clever to handle the common case where the
570 guard is 1:Bit. */
571 cc = ARMcc_AL;
572 if (guard) {
573 if (guard->tag == Iex_Const
574 && guard->Iex.Const.con->tag == Ico_U1
575 && guard->Iex.Const.con->Ico.U1 == True) {
576 /* unconditional -- do nothing */
577 } else {
578 cc = iselCondCode( env, guard );
579 }
580 }
581
582 /* Move the args to their final destinations. */
583 for (i = 0; i < nextArgReg; i++) {
584 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
585 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
586 continue;
587 }
588 /* None of these insns, including any spill code that might
589 be generated, may alter the condition codes. */
590 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
cerioncee30312004-12-17 20:30:21 +0000591 }
592
593 }
594
sewardj6c299f32009-12-31 18:00:12 +0000595 /* Should be assured by checks above */
596 vassert(nextArgReg <= ARM_N_ARGREGS);
cerioncee30312004-12-17 20:30:21 +0000597
sewardj6c299f32009-12-31 18:00:12 +0000598 target = (HWord)Ptr_to_ULong(cee->addr);
cerioncee30312004-12-17 20:30:21 +0000599
sewardj6c299f32009-12-31 18:00:12 +0000600 /* nextArgReg doles out argument registers. Since these are
601 assigned in the order r0, r1, r2, r3, its numeric value at this
602 point, which must be between 0 and 4 inclusive, is going to be
603 equal to the number of arg regs in use for the call. Hence bake
604 that number into the call (we'll need to know it when doing
605 register allocation, to know what regs the call reads.)
cerioncee30312004-12-17 20:30:21 +0000606
sewardj6c299f32009-12-31 18:00:12 +0000607 There is a bit of a twist -- harmless but worth recording.
608 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
609 the first arg in r0 and the second in r3:r2, but r1 isn't used.
610 We nevertheless have nextArgReg==4 and bake that into the call
611 instruction. This will mean the register allocator wil believe
612 this insn reads r1 when in fact it doesn't. But that's
613 harmless; it just artificially extends the live range of r1
614 unnecessarily. The best fix would be to put into the
615 instruction, a bitmask indicating which of r0/1/2/3 carry live
616 values. But that's too much hassle. */
cerioncee30312004-12-17 20:30:21 +0000617
sewardj6c299f32009-12-31 18:00:12 +0000618 /* Finally, the call itself. */
sewardjcfe046e2013-01-17 14:23:53 +0000619 addInstr(env, ARMInstr_Call( cc, target, nextArgReg, rloc ));
cerioncee30312004-12-17 20:30:21 +0000620
sewardj6c299f32009-12-31 18:00:12 +0000621 return True; /* success */
cerioncee30312004-12-17 20:30:21 +0000622}
623
624
625/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000626/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
cerioncee30312004-12-17 20:30:21 +0000627/*---------------------------------------------------------*/
628
sewardj6c299f32009-12-31 18:00:12 +0000629/* Select insns for an integer-typed expression, and add them to the
630 code list. Return a reg holding the result. This reg will be a
631 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
632 want to modify it, ask for a new vreg, copy it in there, and modify
633 the copy. The register allocator will do its best to map both
634 vregs to the same real register, so the copies will often disappear
635 later in the game.
cerioncee30312004-12-17 20:30:21 +0000636
sewardj6c299f32009-12-31 18:00:12 +0000637 This should handle expressions of 32, 16 and 8-bit type. All
638 results are returned in a 32-bit register. For 16- and 8-bit
639 expressions, the upper 16/24 bits are arbitrary, so you should mask
640 or sign extend partial values if necessary.
cerioncee30312004-12-17 20:30:21 +0000641*/
642
sewardj6c299f32009-12-31 18:00:12 +0000643/* --------------------- AMode1 --------------------- */
644
645/* Return an AMode1 which computes the value of the specified
646 expression, possibly also adding insns to the code list as a
647 result. The expression may only be a 32-bit one.
648*/
cerioncee30312004-12-17 20:30:21 +0000649
650static Bool sane_AMode1 ( ARMAMode1* am )
651{
sewardj6c299f32009-12-31 18:00:12 +0000652 switch (am->tag) {
653 case ARMam1_RI:
654 return
655 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
656 && (hregIsVirtual(am->ARMam1.RI.reg)
657 || am->ARMam1.RI.reg == hregARM_R8())
658 && am->ARMam1.RI.simm13 >= -4095
659 && am->ARMam1.RI.simm13 <= 4095 );
660 case ARMam1_RRS:
661 return
662 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
663 && hregIsVirtual(am->ARMam1.RRS.base)
664 && hregClass(am->ARMam1.RRS.index) == HRcInt32
665 && hregIsVirtual(am->ARMam1.RRS.index)
666 && am->ARMam1.RRS.shift >= 0
667 && am->ARMam1.RRS.shift <= 3 );
668 default:
669 vpanic("sane_AMode: unknown ARM AMode1 tag");
670 }
cerioncee30312004-12-17 20:30:21 +0000671}
672
673static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
674{
sewardj6c299f32009-12-31 18:00:12 +0000675 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
676 vassert(sane_AMode1(am));
677 return am;
cerioncee30312004-12-17 20:30:21 +0000678}
679
cerioncee30312004-12-17 20:30:21 +0000680static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
681{
sewardj6c299f32009-12-31 18:00:12 +0000682 IRType ty = typeOfIRExpr(env->type_env,e);
683 vassert(ty == Ity_I32);
cerioncee30312004-12-17 20:30:21 +0000684
sewardj6c299f32009-12-31 18:00:12 +0000685 /* FIXME: add RRS matching */
cerioncee30312004-12-17 20:30:21 +0000686
sewardj6c299f32009-12-31 18:00:12 +0000687 /* {Add32,Sub32}(expr,simm13) */
688 if (e->tag == Iex_Binop
689 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
690 && e->Iex.Binop.arg2->tag == Iex_Const
691 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
692 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
693 if (simm >= -4095 && simm <= 4095) {
694 HReg reg;
695 if (e->Iex.Binop.op == Iop_Sub32)
696 simm = -simm;
697 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
698 return ARMAMode1_RI(reg, simm);
699 }
700 }
cerioncee30312004-12-17 20:30:21 +0000701
sewardj6c299f32009-12-31 18:00:12 +0000702 /* Doesn't match anything in particular. Generate it into
703 a register and use that. */
704 {
705 HReg reg = iselIntExpr_R(env, e);
706 return ARMAMode1_RI(reg, 0);
707 }
708
cerioncee30312004-12-17 20:30:21 +0000709}
710
711
sewardj6c299f32009-12-31 18:00:12 +0000712/* --------------------- AMode2 --------------------- */
cerioncee30312004-12-17 20:30:21 +0000713
sewardj6c299f32009-12-31 18:00:12 +0000714/* Return an AMode2 which computes the value of the specified
715 expression, possibly also adding insns to the code list as a
716 result. The expression may only be a 32-bit one.
717*/
cerioncee30312004-12-17 20:30:21 +0000718
719static Bool sane_AMode2 ( ARMAMode2* am )
720{
721 switch (am->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000722 case ARMam2_RI:
723 return
724 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
725 && hregIsVirtual(am->ARMam2.RI.reg)
726 && am->ARMam2.RI.simm9 >= -255
727 && am->ARMam2.RI.simm9 <= 255 );
728 case ARMam2_RR:
729 return
730 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
731 && hregIsVirtual(am->ARMam2.RR.base)
732 && hregClass(am->ARMam2.RR.index) == HRcInt32
733 && hregIsVirtual(am->ARMam2.RR.index) );
734 default:
735 vpanic("sane_AMode: unknown ARM AMode2 tag");
cerioncee30312004-12-17 20:30:21 +0000736 }
737}
738
sewardj6c299f32009-12-31 18:00:12 +0000739static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
740{
741 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
742 vassert(sane_AMode2(am));
743 return am;
744}
745
cerioncee30312004-12-17 20:30:21 +0000746static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
747{
sewardj6c299f32009-12-31 18:00:12 +0000748 IRType ty = typeOfIRExpr(env->type_env,e);
749 vassert(ty == Ity_I32);
750
751 /* FIXME: add RR matching */
752
753 /* {Add32,Sub32}(expr,simm8) */
754 if (e->tag == Iex_Binop
755 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
756 && e->Iex.Binop.arg2->tag == Iex_Const
757 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
758 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
759 if (simm >= -255 && simm <= 255) {
760 HReg reg;
761 if (e->Iex.Binop.op == Iop_Sub32)
762 simm = -simm;
763 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
764 return ARMAMode2_RI(reg, simm);
765 }
766 }
767
768 /* Doesn't match anything in particular. Generate it into
769 a register and use that. */
770 {
771 HReg reg = iselIntExpr_R(env, e);
772 return ARMAMode2_RI(reg, 0);
773 }
774
cerioncee30312004-12-17 20:30:21 +0000775}
sewardj6c299f32009-12-31 18:00:12 +0000776
777
778/* --------------------- AModeV --------------------- */
779
780/* Return an AModeV which computes the value of the specified
781 expression, possibly also adding insns to the code list as a
782 result. The expression may only be a 32-bit one.
sewardj48b279b2007-11-16 12:43:32 +0000783*/
cerioncee30312004-12-17 20:30:21 +0000784
sewardj6c299f32009-12-31 18:00:12 +0000785static Bool sane_AModeV ( ARMAModeV* am )
786{
787 return toBool( hregClass(am->reg) == HRcInt32
788 && hregIsVirtual(am->reg)
789 && am->simm11 >= -1020 && am->simm11 <= 1020
790 && 0 == (am->simm11 & 3) );
cerioncee30312004-12-17 20:30:21 +0000791}
792
sewardj6c299f32009-12-31 18:00:12 +0000793static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000794{
sewardj6c299f32009-12-31 18:00:12 +0000795 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
796 vassert(sane_AModeV(am));
797 return am;
798}
799
800static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
801{
802 IRType ty = typeOfIRExpr(env->type_env,e);
803 vassert(ty == Ity_I32);
804
805 /* {Add32,Sub32}(expr, simm8 << 2) */
806 if (e->tag == Iex_Binop
807 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
808 && e->Iex.Binop.arg2->tag == Iex_Const
809 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
810 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
811 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
812 HReg reg;
813 if (e->Iex.Binop.op == Iop_Sub32)
814 simm = -simm;
815 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
816 return mkARMAModeV(reg, simm);
817 }
cerioncee30312004-12-17 20:30:21 +0000818 }
sewardj6c299f32009-12-31 18:00:12 +0000819
820 /* Doesn't match anything in particular. Generate it into
821 a register and use that. */
822 {
823 HReg reg = iselIntExpr_R(env, e);
824 return mkARMAModeV(reg, 0);
825 }
826
cerioncee30312004-12-17 20:30:21 +0000827}
828
sewardj6c60b322010-08-22 12:48:28 +0000829/* -------------------- AModeN -------------------- */
830
831static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
832{
833 return iselIntExpr_AModeN_wrk(env, e);
834}
835
836static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
837{
838 HReg reg = iselIntExpr_R(env, e);
839 return mkARMAModeN_R(reg);
840}
841
sewardj6c299f32009-12-31 18:00:12 +0000842
843/* --------------------- RI84 --------------------- */
844
845/* Select instructions to generate 'e' into a RI84. If mayInv is
846 true, then the caller will also accept an I84 form that denotes
847 'not e'. In this case didInv may not be NULL, and *didInv is set
848 to True. This complication is so as to allow generation of an RI84
849 which is suitable for use in either an AND or BIC instruction,
850 without knowing (before this call) which one.
851*/
852static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
853 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000854{
sewardj6c299f32009-12-31 18:00:12 +0000855 ARMRI84* ri;
856 if (mayInv)
857 vassert(didInv != NULL);
858 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
859 /* sanity checks ... */
860 switch (ri->tag) {
861 case ARMri84_I84:
862 return ri;
863 case ARMri84_R:
864 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
865 vassert(hregIsVirtual(ri->ARMri84.R.reg));
866 return ri;
867 default:
868 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
cerioncee30312004-12-17 20:30:21 +0000869 }
870}
871
872/* DO NOT CALL THIS DIRECTLY ! */
sewardj6c299f32009-12-31 18:00:12 +0000873static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
874 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000875{
sewardj6c299f32009-12-31 18:00:12 +0000876 IRType ty = typeOfIRExpr(env->type_env,e);
877 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
cerioncee30312004-12-17 20:30:21 +0000878
sewardj6c299f32009-12-31 18:00:12 +0000879 if (didInv) *didInv = False;
880
881 /* special case: immediate */
882 if (e->tag == Iex_Const) {
883 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
884 switch (e->Iex.Const.con->tag) {
885 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
886 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
887 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
888 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
889 }
890 if (fitsIn8x4(&u8, &u4, u)) {
891 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
892 }
893 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
894 vassert(didInv);
895 *didInv = True;
896 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
897 }
898 /* else fail, fall through to default case */
899 }
900
901 /* default case: calculate into a register and return that */
902 {
903 HReg r = iselIntExpr_R ( env, e );
904 return ARMRI84_R(r);
905 }
cerioncee30312004-12-17 20:30:21 +0000906}
907
908
sewardj6c299f32009-12-31 18:00:12 +0000909/* --------------------- RI5 --------------------- */
910
911/* Select instructions to generate 'e' into a RI5. */
912
913static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
914{
915 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
916 /* sanity checks ... */
917 switch (ri->tag) {
918 case ARMri5_I5:
919 return ri;
920 case ARMri5_R:
921 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
922 vassert(hregIsVirtual(ri->ARMri5.R.reg));
923 return ri;
924 default:
925 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
926 }
927}
928
929/* DO NOT CALL THIS DIRECTLY ! */
930static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
931{
932 IRType ty = typeOfIRExpr(env->type_env,e);
933 vassert(ty == Ity_I32 || ty == Ity_I8);
934
935 /* special case: immediate */
936 if (e->tag == Iex_Const) {
937 UInt u; /* both invalid */
938 switch (e->Iex.Const.con->tag) {
939 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
940 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
941 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
942 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
943 }
944 if (u >= 1 && u <= 31) {
945 return ARMRI5_I5(u);
946 }
947 /* else fail, fall through to default case */
948 }
949
950 /* default case: calculate into a register and return that */
951 {
952 HReg r = iselIntExpr_R ( env, e );
953 return ARMRI5_R(r);
954 }
955}
cerioncee30312004-12-17 20:30:21 +0000956
957
sewardj6c299f32009-12-31 18:00:12 +0000958/* ------------------- CondCode ------------------- */
cerioncee30312004-12-17 20:30:21 +0000959
960/* Generate code to evaluated a bit-typed expression, returning the
961 condition code which would correspond when the expression would
962 notionally have returned 1. */
963
964static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
965{
sewardj6c299f32009-12-31 18:00:12 +0000966 ARMCondCode cc = iselCondCode_wrk(env,e);
sewardj6c60b322010-08-22 12:48:28 +0000967 vassert(cc != ARMcc_NV);
sewardj6c299f32009-12-31 18:00:12 +0000968 return cc;
cerioncee30312004-12-17 20:30:21 +0000969}
970
cerioncee30312004-12-17 20:30:21 +0000971static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
972{
sewardj6c299f32009-12-31 18:00:12 +0000973 vassert(e);
974 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
cerioncee30312004-12-17 20:30:21 +0000975
sewardj6c299f32009-12-31 18:00:12 +0000976 /* var */
977 if (e->tag == Iex_RdTmp) {
978 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
979 /* CmpOrTst doesn't modify rTmp; so this is OK. */
980 ARMRI84* one = ARMRI84_I84(1,0);
981 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
982 return ARMcc_NE;
983 }
984
985 /* Not1(e) */
986 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
987 /* Generate code for the arg, and negate the test condition */
988 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
989 }
990
991 /* --- patterns rooted at: 32to1 --- */
992
993 if (e->tag == Iex_Unop
994 && e->Iex.Unop.op == Iop_32to1) {
995 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
996 ARMRI84* one = ARMRI84_I84(1,0);
997 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
998 return ARMcc_NE;
999 }
1000
1001 /* --- patterns rooted at: CmpNEZ8 --- */
1002
1003 if (e->tag == Iex_Unop
1004 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1005 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1006 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1007 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1008 return ARMcc_NE;
1009 }
1010
1011 /* --- patterns rooted at: CmpNEZ32 --- */
1012
1013 if (e->tag == Iex_Unop
1014 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1015 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1016 ARMRI84* zero = ARMRI84_I84(0,0);
1017 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1018 return ARMcc_NE;
1019 }
1020
1021 /* --- patterns rooted at: CmpNEZ64 --- */
1022
1023 if (e->tag == Iex_Unop
1024 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1025 HReg tHi, tLo;
1026 HReg tmp = newVRegI(env);
1027 ARMRI84* zero = ARMRI84_I84(0,0);
1028 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1029 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1030 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1031 return ARMcc_NE;
1032 }
1033
1034 /* --- Cmp*32*(x,y) --- */
1035 if (e->tag == Iex_Binop
1036 && (e->Iex.Binop.op == Iop_CmpEQ32
1037 || e->Iex.Binop.op == Iop_CmpNE32
1038 || e->Iex.Binop.op == Iop_CmpLT32S
1039 || e->Iex.Binop.op == Iop_CmpLT32U
1040 || e->Iex.Binop.op == Iop_CmpLE32S
1041 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1042 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1043 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1044 env, e->Iex.Binop.arg2);
1045 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1046 switch (e->Iex.Binop.op) {
1047 case Iop_CmpEQ32: return ARMcc_EQ;
1048 case Iop_CmpNE32: return ARMcc_NE;
1049 case Iop_CmpLT32S: return ARMcc_LT;
1050 case Iop_CmpLT32U: return ARMcc_LO;
1051 case Iop_CmpLE32S: return ARMcc_LE;
1052 case Iop_CmpLE32U: return ARMcc_LS;
1053 default: vpanic("iselCondCode(arm): CmpXX32");
1054 }
1055 }
1056
sewardj6c60b322010-08-22 12:48:28 +00001057 /* --- CasCmpEQ* --- */
1058 /* Ist_Cas has a dummy argument to compare with, so comparison is
1059 always true. */
1060 if (e->tag == Iex_Binop
1061 && (e->Iex.Binop.op == Iop_CasCmpEQ32
1062 || e->Iex.Binop.op == Iop_CasCmpEQ16
1063 || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1064 return ARMcc_AL;
1065 }
1066
sewardj6c299f32009-12-31 18:00:12 +00001067 ppIRExpr(e);
1068 vpanic("iselCondCode");
cerioncee30312004-12-17 20:30:21 +00001069}
1070
1071
sewardj6c299f32009-12-31 18:00:12 +00001072/* --------------------- Reg --------------------- */
cerioncee30312004-12-17 20:30:21 +00001073
1074static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1075{
sewardj6c299f32009-12-31 18:00:12 +00001076 HReg r = iselIntExpr_R_wrk(env, e);
1077 /* sanity checks ... */
1078# if 0
1079 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1080# endif
1081 vassert(hregClass(r) == HRcInt32);
1082 vassert(hregIsVirtual(r));
1083 return r;
cerioncee30312004-12-17 20:30:21 +00001084}
1085
1086/* DO NOT CALL THIS DIRECTLY ! */
1087static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1088{
sewardj6c299f32009-12-31 18:00:12 +00001089 IRType ty = typeOfIRExpr(env->type_env,e);
1090 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1091
1092 switch (e->tag) {
1093
1094 /* --------- TEMP --------- */
1095 case Iex_RdTmp: {
1096 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1097 }
1098
1099 /* --------- LOAD --------- */
1100 case Iex_Load: {
1101 HReg dst = newVRegI(env);
1102
1103 if (e->Iex.Load.end != Iend_LE)
1104 goto irreducible;
1105
1106 if (ty == Ity_I32) {
1107 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
sewardjcfe046e2013-01-17 14:23:53 +00001108 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
sewardj6c299f32009-12-31 18:00:12 +00001109 return dst;
1110 }
1111 if (ty == Ity_I16) {
1112 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
sewardjcfe046e2013-01-17 14:23:53 +00001113 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1114 True/*isLoad*/, False/*!signedLoad*/,
sewardj6c299f32009-12-31 18:00:12 +00001115 dst, amode));
1116 return dst;
1117 }
1118 if (ty == Ity_I8) {
1119 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
sewardjcfe046e2013-01-17 14:23:53 +00001120 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
sewardj6c299f32009-12-31 18:00:12 +00001121 return dst;
1122 }
sewardj6c299f32009-12-31 18:00:12 +00001123 break;
1124 }
1125
1126//zz /* --------- TERNARY OP --------- */
1127//zz case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001128//zz IRTriop *triop = e->Iex.Triop.details;
sewardj6c299f32009-12-31 18:00:12 +00001129//zz /* C3210 flags following FPU partial remainder (fprem), both
1130//zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001131//zz if (triop->op == Iop_PRemC3210F64
1132//zz || triop->op == Iop_PRem1C3210F64) {
sewardj6c299f32009-12-31 18:00:12 +00001133//zz HReg junk = newVRegF(env);
1134//zz HReg dst = newVRegI(env);
florian420bfa92012-06-02 20:29:22 +00001135//zz HReg srcL = iselDblExpr(env, triop->arg2);
1136//zz HReg srcR = iselDblExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00001137//zz /* XXXROUNDINGFIXME */
1138//zz /* set roundingmode here */
1139//zz addInstr(env, X86Instr_FpBinary(
1140//zz e->Iex.Binop.op==Iop_PRemC3210F64
1141//zz ? Xfp_PREM : Xfp_PREM1,
1142//zz srcL,srcR,junk
1143//zz ));
1144//zz /* The previous pseudo-insn will have left the FPU's C3210
1145//zz flags set correctly. So bag them. */
1146//zz addInstr(env, X86Instr_FpStSW_AX());
1147//zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1148//zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1149//zz return dst;
1150//zz }
1151//zz
1152//zz break;
1153//zz }
1154
1155 /* --------- BINARY OP --------- */
1156 case Iex_Binop: {
1157
1158 ARMAluOp aop = 0; /* invalid */
1159 ARMShiftOp sop = 0; /* invalid */
1160
1161 /* ADD/SUB/AND/OR/XOR */
1162 switch (e->Iex.Binop.op) {
1163 case Iop_And32: {
1164 Bool didInv = False;
1165 HReg dst = newVRegI(env);
1166 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1167 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1168 env, e->Iex.Binop.arg2);
1169 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1170 dst, argL, argR));
1171 return dst;
1172 }
1173 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1174 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1175 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1176 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1177 std_binop: {
1178 HReg dst = newVRegI(env);
1179 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1180 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1181 env, e->Iex.Binop.arg2);
1182 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1183 return dst;
1184 }
1185 default: break;
1186 }
1187
1188 /* SHL/SHR/SAR */
1189 switch (e->Iex.Binop.op) {
1190 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1191 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1192 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1193 sh_binop: {
1194 HReg dst = newVRegI(env);
1195 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1196 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1197 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1198 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1199 return dst;
1200 }
1201 default: break;
1202 }
1203
1204 /* MUL */
1205 if (e->Iex.Binop.op == Iop_Mul32) {
1206 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1207 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1208 HReg dst = newVRegI(env);
1209 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1210 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1211 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1212 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1213 return dst;
1214 }
1215
1216 /* Handle misc other ops. */
1217
1218 if (e->Iex.Binop.op == Iop_Max32U) {
1219 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1220 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1221 HReg dst = newVRegI(env);
sewardj6c60b322010-08-22 12:48:28 +00001222 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1223 ARMRI84_R(argR)));
sewardj6c299f32009-12-31 18:00:12 +00001224 addInstr(env, mk_iMOVds_RR(dst, argL));
1225 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1226 return dst;
1227 }
1228
1229 if (e->Iex.Binop.op == Iop_CmpF64) {
1230 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1231 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1232 HReg dst = newVRegI(env);
1233 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1234 FMSTAT, so we can examine the results directly. */
1235 addInstr(env, ARMInstr_VCmpD(dL, dR));
1236 /* Create in dst, the IRCmpF64Result encoded result. */
1237 addInstr(env, ARMInstr_Imm32(dst, 0));
1238 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1239 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1240 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1241 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1242 return dst;
1243 }
1244
1245 if (e->Iex.Binop.op == Iop_F64toI32S
1246 || e->Iex.Binop.op == Iop_F64toI32U) {
1247 /* Wretched uglyness all round, due to having to deal
1248 with rounding modes. Oh well. */
1249 /* FIXME: if arg1 is a constant indicating round-to-zero,
1250 then we could skip all this arsing around with FPSCR and
1251 simply emit FTO{S,U}IZD. */
1252 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1253 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1254 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1255 /* FTO{S,U}ID valF, valD */
1256 HReg valF = newVRegF(env);
1257 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1258 valF, valD));
1259 set_VFP_rounding_default(env);
1260 /* VMOV dst, valF */
1261 HReg dst = newVRegI(env);
1262 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1263 return dst;
1264 }
1265
sewardj6c60b322010-08-22 12:48:28 +00001266 if (e->Iex.Binop.op == Iop_GetElem8x8
1267 || e->Iex.Binop.op == Iop_GetElem16x4
1268 || e->Iex.Binop.op == Iop_GetElem32x2) {
1269 HReg res = newVRegI(env);
florian1d0da842012-06-01 22:04:27 +00001270 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
sewardj6c60b322010-08-22 12:48:28 +00001271 UInt index, size;
1272 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1273 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1274 vpanic("ARM target supports GetElem with constant "
1275 "second argument only\n");
1276 }
1277 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1278 switch (e->Iex.Binop.op) {
1279 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1280 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1281 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1282 default: vassert(0);
1283 }
1284 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1285 mkARMNRS(ARMNRS_Reg, res, 0),
1286 mkARMNRS(ARMNRS_Scalar, arg, index),
1287 size, False));
1288 return res;
1289 }
1290
1291 if (e->Iex.Binop.op == Iop_GetElem8x16
1292 || e->Iex.Binop.op == Iop_GetElem16x8
1293 || e->Iex.Binop.op == Iop_GetElem32x4) {
1294 HReg res = newVRegI(env);
florian1d0da842012-06-01 22:04:27 +00001295 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
sewardj6c60b322010-08-22 12:48:28 +00001296 UInt index, size;
1297 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1298 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1299 vpanic("ARM target supports GetElem with constant "
1300 "second argument only\n");
1301 }
1302 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1303 switch (e->Iex.Binop.op) {
1304 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1305 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1306 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1307 default: vassert(0);
1308 }
1309 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1310 mkARMNRS(ARMNRS_Reg, res, 0),
1311 mkARMNRS(ARMNRS_Scalar, arg, index),
1312 size, True));
1313 return res;
1314 }
1315
sewardje2ea1762010-09-22 00:56:37 +00001316 /* All cases involving host-side helper calls. */
1317 void* fn = NULL;
1318 switch (e->Iex.Binop.op) {
1319 case Iop_Add16x2:
1320 fn = &h_generic_calc_Add16x2; break;
1321 case Iop_Sub16x2:
1322 fn = &h_generic_calc_Sub16x2; break;
1323 case Iop_HAdd16Ux2:
1324 fn = &h_generic_calc_HAdd16Ux2; break;
1325 case Iop_HAdd16Sx2:
1326 fn = &h_generic_calc_HAdd16Sx2; break;
1327 case Iop_HSub16Ux2:
1328 fn = &h_generic_calc_HSub16Ux2; break;
1329 case Iop_HSub16Sx2:
1330 fn = &h_generic_calc_HSub16Sx2; break;
1331 case Iop_QAdd16Sx2:
1332 fn = &h_generic_calc_QAdd16Sx2; break;
1333 case Iop_QSub16Sx2:
1334 fn = &h_generic_calc_QSub16Sx2; break;
1335 case Iop_Add8x4:
1336 fn = &h_generic_calc_Add8x4; break;
1337 case Iop_Sub8x4:
1338 fn = &h_generic_calc_Sub8x4; break;
1339 case Iop_HAdd8Ux4:
1340 fn = &h_generic_calc_HAdd8Ux4; break;
1341 case Iop_HAdd8Sx4:
1342 fn = &h_generic_calc_HAdd8Sx4; break;
1343 case Iop_HSub8Ux4:
1344 fn = &h_generic_calc_HSub8Ux4; break;
1345 case Iop_HSub8Sx4:
1346 fn = &h_generic_calc_HSub8Sx4; break;
1347 case Iop_QAdd8Sx4:
1348 fn = &h_generic_calc_QAdd8Sx4; break;
1349 case Iop_QAdd8Ux4:
1350 fn = &h_generic_calc_QAdd8Ux4; break;
1351 case Iop_QSub8Sx4:
1352 fn = &h_generic_calc_QSub8Sx4; break;
1353 case Iop_QSub8Ux4:
1354 fn = &h_generic_calc_QSub8Ux4; break;
sewardj310d6b22010-10-18 16:29:40 +00001355 case Iop_Sad8Ux4:
1356 fn = &h_generic_calc_Sad8Ux4; break;
sewardj44ce46d2012-07-11 13:19:10 +00001357 case Iop_QAdd32S:
1358 fn = &h_generic_calc_QAdd32S; break;
1359 case Iop_QSub32S:
1360 fn = &h_generic_calc_QSub32S; break;
sewardj32dd5382012-09-17 15:27:58 +00001361 case Iop_QSub16Ux2:
1362 fn = &h_generic_calc_QSub16Ux2; break;
sewardje2ea1762010-09-22 00:56:37 +00001363 default:
1364 break;
1365 }
1366
1367 if (fn) {
1368 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1369 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1370 HReg res = newVRegI(env);
1371 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1372 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
sewardjcfe046e2013-01-17 14:23:53 +00001373 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
1374 2, RetLocInt ));
sewardje2ea1762010-09-22 00:56:37 +00001375 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1376 return res;
1377 }
1378
sewardj6c299f32009-12-31 18:00:12 +00001379 break;
1380 }
1381
1382 /* --------- UNARY OP --------- */
1383 case Iex_Unop: {
1384
1385//zz /* 1Uto8(32to1(expr32)) */
1386//zz if (e->Iex.Unop.op == Iop_1Uto8) {
1387//zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1388//zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1389//zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1390//zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1391//zz IRExpr* expr32 = mi.bindee[0];
1392//zz HReg dst = newVRegI(env);
1393//zz HReg src = iselIntExpr_R(env, expr32);
1394//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1395//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1396//zz X86RMI_Imm(1), dst));
1397//zz return dst;
1398//zz }
1399//zz }
1400//zz
1401//zz /* 8Uto32(LDle(expr32)) */
1402//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1403//zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1404//zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1405//zz unop(Iop_8Uto32,
1406//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1407//zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1408//zz HReg dst = newVRegI(env);
1409//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1410//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1411//zz return dst;
1412//zz }
1413//zz }
1414//zz
1415//zz /* 8Sto32(LDle(expr32)) */
1416//zz if (e->Iex.Unop.op == Iop_8Sto32) {
1417//zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1418//zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1419//zz unop(Iop_8Sto32,
1420//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1421//zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1422//zz HReg dst = newVRegI(env);
1423//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1424//zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1425//zz return dst;
1426//zz }
1427//zz }
1428//zz
1429//zz /* 16Uto32(LDle(expr32)) */
1430//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1431//zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1432//zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1433//zz unop(Iop_16Uto32,
1434//zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1435//zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1436//zz HReg dst = newVRegI(env);
1437//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1438//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1439//zz return dst;
1440//zz }
1441//zz }
1442//zz
1443//zz /* 8Uto32(GET:I8) */
1444//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1445//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1446//zz HReg dst;
1447//zz X86AMode* amode;
1448//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1449//zz dst = newVRegI(env);
1450//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1451//zz hregX86_EBP());
1452//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1453//zz return dst;
1454//zz }
1455//zz }
1456//zz
1457//zz /* 16to32(GET:I16) */
1458//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1459//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1460//zz HReg dst;
1461//zz X86AMode* amode;
1462//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1463//zz dst = newVRegI(env);
1464//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1465//zz hregX86_EBP());
1466//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1467//zz return dst;
1468//zz }
1469//zz }
1470
1471 switch (e->Iex.Unop.op) {
1472 case Iop_8Uto32: {
1473 HReg dst = newVRegI(env);
1474 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1475 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1476 dst, src, ARMRI84_I84(0xFF,0)));
1477 return dst;
1478 }
1479//zz case Iop_8Uto16:
1480//zz case Iop_8Uto32:
1481//zz case Iop_16Uto32: {
1482//zz HReg dst = newVRegI(env);
1483//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1484//zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1485//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1486//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1487//zz X86RMI_Imm(mask), dst));
1488//zz return dst;
1489//zz }
1490//zz case Iop_8Sto16:
1491//zz case Iop_8Sto32:
1492 case Iop_16Uto32: {
1493 HReg dst = newVRegI(env);
1494 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1495 ARMRI5* amt = ARMRI5_I5(16);
1496 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1497 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1498 return dst;
1499 }
1500 case Iop_8Sto32:
1501 case Iop_16Sto32: {
1502 HReg dst = newVRegI(env);
1503 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1504 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1505 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1506 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1507 return dst;
1508 }
1509//zz case Iop_Not8:
1510//zz case Iop_Not16:
1511 case Iop_Not32: {
1512 HReg dst = newVRegI(env);
1513 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1514 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1515 return dst;
1516 }
1517 case Iop_64HIto32: {
1518 HReg rHi, rLo;
1519 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1520 return rHi; /* and abandon rLo .. poor wee thing :-) */
1521 }
1522 case Iop_64to32: {
1523 HReg rHi, rLo;
1524 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1525 return rLo; /* similar stupid comment to the above ... */
1526 }
sewardj6c60b322010-08-22 12:48:28 +00001527 case Iop_64to8: {
1528 HReg rHi, rLo;
sewardjc6f970f2012-04-02 21:54:49 +00001529 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001530 HReg tHi = newVRegI(env);
1531 HReg tLo = newVRegI(env);
1532 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1533 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1534 rHi = tHi;
1535 rLo = tLo;
1536 } else {
1537 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1538 }
1539 return rLo;
1540 }
sewardj6c299f32009-12-31 18:00:12 +00001541//zz case Iop_16HIto8:
1542//zz case Iop_32HIto16: {
1543//zz HReg dst = newVRegI(env);
1544//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1545//zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1546//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1547//zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1548//zz return dst;
1549//zz }
1550 case Iop_1Uto32:
1551 case Iop_1Uto8: {
1552 HReg dst = newVRegI(env);
1553 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1554 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1555 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1556 return dst;
1557 }
1558
1559 case Iop_1Sto32: {
1560 HReg dst = newVRegI(env);
1561 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1562 ARMRI5* amt = ARMRI5_I5(31);
1563 /* This is really rough. We could do much better here;
1564 perhaps mvn{cond} dst, #0 as the second insn?
1565 (same applies to 1Sto64) */
1566 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1567 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1568 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1569 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1570 return dst;
1571 }
1572
1573
1574//zz case Iop_1Sto8:
1575//zz case Iop_1Sto16:
1576//zz case Iop_1Sto32: {
1577//zz /* could do better than this, but for now ... */
1578//zz HReg dst = newVRegI(env);
1579//zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1580//zz addInstr(env, X86Instr_Set32(cond,dst));
1581//zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1582//zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1583//zz return dst;
1584//zz }
1585//zz case Iop_Ctz32: {
1586//zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1587//zz HReg dst = newVRegI(env);
1588//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1589//zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1590//zz return dst;
1591//zz }
1592 case Iop_Clz32: {
1593 /* Count leading zeroes; easy on ARM. */
1594 HReg dst = newVRegI(env);
1595 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1596 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1597 return dst;
1598 }
1599
1600 case Iop_CmpwNEZ32: {
1601 HReg dst = newVRegI(env);
1602 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1603 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1604 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1605 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1606 return dst;
1607 }
1608
1609 case Iop_Left32: {
1610 HReg dst = newVRegI(env);
1611 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1612 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1613 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1614 return dst;
1615 }
1616
1617//zz case Iop_V128to32: {
1618//zz HReg dst = newVRegI(env);
1619//zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1620//zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1621//zz sub_from_esp(env, 16);
1622//zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1623//zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1624//zz add_to_esp(env, 16);
1625//zz return dst;
1626//zz }
1627//zz
1628 case Iop_ReinterpF32asI32: {
1629 HReg dst = newVRegI(env);
1630 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1631 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1632 return dst;
1633 }
1634
1635//zz
1636//zz case Iop_16to8:
1637 case Iop_32to8:
1638 case Iop_32to16:
1639 /* These are no-ops. */
1640 return iselIntExpr_R(env, e->Iex.Unop.arg);
1641
sewardj6c60b322010-08-22 12:48:28 +00001642 default:
sewardj6c299f32009-12-31 18:00:12 +00001643 break;
1644 }
sewardje2ea1762010-09-22 00:56:37 +00001645
1646 /* All Unop cases involving host-side helper calls. */
1647 void* fn = NULL;
1648 switch (e->Iex.Unop.op) {
1649 case Iop_CmpNEZ16x2:
1650 fn = &h_generic_calc_CmpNEZ16x2; break;
1651 case Iop_CmpNEZ8x4:
1652 fn = &h_generic_calc_CmpNEZ8x4; break;
1653 default:
1654 break;
1655 }
1656
1657 if (fn) {
1658 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1659 HReg res = newVRegI(env);
1660 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
sewardjcfe046e2013-01-17 14:23:53 +00001661 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
1662 1, RetLocInt ));
sewardje2ea1762010-09-22 00:56:37 +00001663 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1664 return res;
1665 }
1666
sewardj6c299f32009-12-31 18:00:12 +00001667 break;
1668 }
1669
1670 /* --------- GET --------- */
1671 case Iex_Get: {
1672 if (ty == Ity_I32
1673 && 0 == (e->Iex.Get.offset & 3)
1674 && e->Iex.Get.offset < 4096-4) {
1675 HReg dst = newVRegI(env);
1676 addInstr(env, ARMInstr_LdSt32(
sewardjcfe046e2013-01-17 14:23:53 +00001677 ARMcc_AL, True/*isLoad*/,
sewardj6c299f32009-12-31 18:00:12 +00001678 dst,
1679 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1680 return dst;
1681 }
1682//zz if (ty == Ity_I8 || ty == Ity_I16) {
1683//zz HReg dst = newVRegI(env);
1684//zz addInstr(env, X86Instr_LoadEX(
1685//zz toUChar(ty==Ity_I8 ? 1 : 2),
1686//zz False,
1687//zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1688//zz dst));
1689//zz return dst;
1690//zz }
1691 break;
1692 }
1693
1694//zz case Iex_GetI: {
1695//zz X86AMode* am
1696//zz = genGuestArrayOffset(
1697//zz env, e->Iex.GetI.descr,
1698//zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1699//zz HReg dst = newVRegI(env);
1700//zz if (ty == Ity_I8) {
1701//zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1702//zz return dst;
1703//zz }
1704//zz if (ty == Ity_I32) {
1705//zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1706//zz return dst;
1707//zz }
1708//zz break;
1709//zz }
1710
1711 /* --------- CCALL --------- */
1712 case Iex_CCall: {
1713 HReg dst = newVRegI(env);
1714 vassert(ty == e->Iex.CCall.retty);
1715
sewardjcfe046e2013-01-17 14:23:53 +00001716 /* be very restrictive for now. Only 32/64-bit ints allowed for
1717 args, and 32 bits for return type. Don't forget to change
1718 the RetLoc if more types are allowed in future. */
sewardj6c299f32009-12-31 18:00:12 +00001719 if (e->Iex.CCall.retty != Ity_I32)
1720 goto irreducible;
1721
1722 /* Marshal args, do the call, clear stack. */
1723 Bool ok = doHelperCall( env, False,
sewardjcfe046e2013-01-17 14:23:53 +00001724 NULL, e->Iex.CCall.cee, e->Iex.CCall.args,
1725 RetLocInt );
sewardj6c299f32009-12-31 18:00:12 +00001726 if (ok) {
1727 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1728 return dst;
1729 }
1730 /* else fall through; will hit the irreducible: label */
1731 }
1732
1733 /* --------- LITERAL --------- */
1734 /* 32 literals */
1735 case Iex_Const: {
1736 UInt u = 0;
1737 HReg dst = newVRegI(env);
1738 switch (e->Iex.Const.con->tag) {
1739 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1740 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1741 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
sewardj6c60b322010-08-22 12:48:28 +00001742 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
sewardj6c299f32009-12-31 18:00:12 +00001743 }
1744 addInstr(env, ARMInstr_Imm32(dst, u));
1745 return dst;
1746 }
1747
1748 /* --------- MULTIPLEX --------- */
1749 case Iex_Mux0X: {
1750 IRExpr* cond = e->Iex.Mux0X.cond;
1751
1752 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1753 if (ty == Ity_I32
1754 && cond->tag == Iex_Unop
1755 && cond->Iex.Unop.op == Iop_32to8
1756 && cond->Iex.Unop.arg->tag == Iex_Unop
1757 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1758 ARMCondCode cc;
1759 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1760 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1761 HReg dst = newVRegI(env);
1762 addInstr(env, mk_iMOVds_RR(dst, rX));
1763 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1764 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1765 return dst;
1766 }
1767
1768 /* Mux0X(cond, expr0, exprX) (general case) */
1769 if (ty == Ity_I32) {
1770 HReg r8;
1771 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1772 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1773 HReg dst = newVRegI(env);
1774 addInstr(env, mk_iMOVds_RR(dst, rX));
1775 r8 = iselIntExpr_R(env, cond);
1776 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1777 ARMRI84_I84(0xFF,0)));
1778 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1779 return dst;
1780 }
1781 break;
1782 }
1783
1784 default:
1785 break;
1786 } /* switch (e->tag) */
1787
1788 /* We get here if no pattern matched. */
1789 irreducible:
1790 ppIRExpr(e);
1791 vpanic("iselIntExpr_R: cannot reduce tree");
cerioncee30312004-12-17 20:30:21 +00001792}
1793
1794
sewardj6c299f32009-12-31 18:00:12 +00001795/* -------------------- 64-bit -------------------- */
1796
1797/* Compute a 64-bit value into a register pair, which is returned as
1798 the first two parameters. As with iselIntExpr_R, these may be
1799 either real or virtual regs; in any case they must not be changed
1800 by subsequent code emitted by the caller. */
1801
1802static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1803{
1804 iselInt64Expr_wrk(rHi, rLo, env, e);
1805# if 0
1806 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1807# endif
1808 vassert(hregClass(*rHi) == HRcInt32);
1809 vassert(hregIsVirtual(*rHi));
1810 vassert(hregClass(*rLo) == HRcInt32);
1811 vassert(hregIsVirtual(*rLo));
1812}
1813
1814/* DO NOT CALL THIS DIRECTLY ! */
1815static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1816{
1817 vassert(e);
1818 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1819
1820 /* 64-bit literal */
1821 if (e->tag == Iex_Const) {
1822 ULong w64 = e->Iex.Const.con->Ico.U64;
1823 UInt wHi = toUInt(w64 >> 32);
1824 UInt wLo = toUInt(w64);
1825 HReg tHi = newVRegI(env);
1826 HReg tLo = newVRegI(env);
1827 vassert(e->Iex.Const.con->tag == Ico_U64);
1828 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1829 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1830 *rHi = tHi;
1831 *rLo = tLo;
1832 return;
1833 }
1834
1835 /* read 64-bit IRTemp */
1836 if (e->tag == Iex_RdTmp) {
sewardjc6f970f2012-04-02 21:54:49 +00001837 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001838 HReg tHi = newVRegI(env);
1839 HReg tLo = newVRegI(env);
1840 HReg tmp = iselNeon64Expr(env, e);
1841 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1842 *rHi = tHi;
1843 *rLo = tLo;
1844 } else {
1845 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1846 }
sewardj6c299f32009-12-31 18:00:12 +00001847 return;
1848 }
1849
1850 /* 64-bit load */
1851 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1852 HReg tLo, tHi, rA;
1853 vassert(e->Iex.Load.ty == Ity_I64);
1854 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1855 tHi = newVRegI(env);
1856 tLo = newVRegI(env);
sewardjcfe046e2013-01-17 14:23:53 +00001857 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1858 tHi, ARMAMode1_RI(rA, 4)));
1859 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1860 tLo, ARMAMode1_RI(rA, 0)));
sewardj6c299f32009-12-31 18:00:12 +00001861 *rHi = tHi;
1862 *rLo = tLo;
1863 return;
1864 }
1865
1866 /* 64-bit GET */
1867 if (e->tag == Iex_Get) {
1868 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1869 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1870 HReg tHi = newVRegI(env);
1871 HReg tLo = newVRegI(env);
sewardjcfe046e2013-01-17 14:23:53 +00001872 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
1873 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
sewardj6c299f32009-12-31 18:00:12 +00001874 *rHi = tHi;
1875 *rLo = tLo;
1876 return;
1877 }
1878
1879 /* --------- BINARY ops --------- */
1880 if (e->tag == Iex_Binop) {
1881 switch (e->Iex.Binop.op) {
1882
1883 /* 32 x 32 -> 64 multiply */
1884 case Iop_MullS32:
1885 case Iop_MullU32: {
1886 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1887 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1888 HReg tHi = newVRegI(env);
1889 HReg tLo = newVRegI(env);
1890 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
1891 ? ARMmul_SX : ARMmul_ZX;
1892 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1893 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1894 addInstr(env, ARMInstr_Mul(mop));
1895 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1896 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1897 *rHi = tHi;
1898 *rLo = tLo;
1899 return;
1900 }
1901
1902 case Iop_Or64: {
1903 HReg xLo, xHi, yLo, yHi;
1904 HReg tHi = newVRegI(env);
1905 HReg tLo = newVRegI(env);
1906 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1907 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1908 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1909 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1910 *rHi = tHi;
1911 *rLo = tLo;
1912 return;
1913 }
1914
1915 case Iop_Add64: {
1916 HReg xLo, xHi, yLo, yHi;
1917 HReg tHi = newVRegI(env);
1918 HReg tLo = newVRegI(env);
1919 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1920 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1921 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1922 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
1923 *rHi = tHi;
1924 *rLo = tLo;
1925 return;
1926 }
1927
1928 /* 32HLto64(e1,e2) */
1929 case Iop_32HLto64: {
1930 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1931 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1932 return;
1933 }
1934
1935 default:
1936 break;
1937 }
1938 }
1939
1940 /* --------- UNARY ops --------- */
1941 if (e->tag == Iex_Unop) {
1942 switch (e->Iex.Unop.op) {
1943
1944 /* ReinterpF64asI64 */
1945 case Iop_ReinterpF64asI64: {
1946 HReg dstHi = newVRegI(env);
1947 HReg dstLo = newVRegI(env);
1948 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1949 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1950 *rHi = dstHi;
1951 *rLo = dstLo;
1952 return;
1953 }
1954
1955 /* Left64(e) */
1956 case Iop_Left64: {
1957 HReg yLo, yHi;
1958 HReg tHi = newVRegI(env);
1959 HReg tLo = newVRegI(env);
1960 HReg zero = newVRegI(env);
1961 /* yHi:yLo = arg */
1962 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1963 /* zero = 0 */
1964 addInstr(env, ARMInstr_Imm32(zero, 0));
1965 /* tLo = 0 - yLo, and set carry */
sewardj6c60b322010-08-22 12:48:28 +00001966 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1967 tLo, zero, ARMRI84_R(yLo)));
sewardj6c299f32009-12-31 18:00:12 +00001968 /* tHi = 0 - yHi - carry */
sewardj6c60b322010-08-22 12:48:28 +00001969 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1970 tHi, zero, ARMRI84_R(yHi)));
sewardj6c299f32009-12-31 18:00:12 +00001971 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
1972 back in, so as to give the final result
1973 tHi:tLo = arg | -arg. */
1974 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1975 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1976 *rHi = tHi;
1977 *rLo = tLo;
1978 return;
1979 }
1980
1981 /* CmpwNEZ64(e) */
1982 case Iop_CmpwNEZ64: {
1983 HReg srcLo, srcHi;
1984 HReg tmp1 = newVRegI(env);
1985 HReg tmp2 = newVRegI(env);
1986 /* srcHi:srcLo = arg */
1987 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1988 /* tmp1 = srcHi | srcLo */
1989 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1990 tmp1, srcHi, ARMRI84_R(srcLo)));
1991 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1992 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1993 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1994 tmp2, tmp2, ARMRI84_R(tmp1)));
1995 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1996 tmp2, tmp2, ARMRI5_I5(31)));
1997 *rHi = tmp2;
1998 *rLo = tmp2;
1999 return;
2000 }
2001
2002 case Iop_1Sto64: {
2003 HReg dst = newVRegI(env);
2004 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2005 ARMRI5* amt = ARMRI5_I5(31);
2006 /* This is really rough. We could do much better here;
2007 perhaps mvn{cond} dst, #0 as the second insn?
2008 (same applies to 1Sto32) */
2009 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2010 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2011 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2012 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2013 *rHi = dst;
2014 *rLo = dst;
2015 return;
2016 }
2017
2018 default:
2019 break;
2020 }
2021 } /* if (e->tag == Iex_Unop) */
2022
2023 /* --------- MULTIPLEX --------- */
2024 if (e->tag == Iex_Mux0X) {
2025 IRType ty8;
2026 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2027 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2028 vassert(ty8 == Ity_I8);
2029 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2030 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2031 dstHi = newVRegI(env);
2032 dstLo = newVRegI(env);
2033 addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2034 addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2035 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2036 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2037 ARMRI84_I84(0xFF,0)));
2038 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2039 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2040 *rHi = dstHi;
2041 *rLo = dstLo;
2042 return;
2043 }
2044
sewardj6c60b322010-08-22 12:48:28 +00002045 /* It is convenient sometimes to call iselInt64Expr even when we
2046 have NEON support (e.g. in do_helper_call we need 64-bit
2047 arguments as 2 x 32 regs). */
sewardjc6f970f2012-04-02 21:54:49 +00002048 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00002049 HReg tHi = newVRegI(env);
2050 HReg tLo = newVRegI(env);
2051 HReg tmp = iselNeon64Expr(env, e);
2052 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2053 *rHi = tHi;
2054 *rLo = tLo;
2055 return ;
2056 }
2057
sewardj6c299f32009-12-31 18:00:12 +00002058 ppIRExpr(e);
2059 vpanic("iselInt64Expr");
2060}
2061
2062
2063/*---------------------------------------------------------*/
sewardj6c60b322010-08-22 12:48:28 +00002064/*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2065/*---------------------------------------------------------*/
2066
2067static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2068{
2069 HReg r = iselNeon64Expr_wrk( env, e );
2070 vassert(hregClass(r) == HRcFlt64);
2071 vassert(hregIsVirtual(r));
2072 return r;
2073}
2074
2075/* DO NOT CALL THIS DIRECTLY */
2076static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2077{
2078 IRType ty = typeOfIRExpr(env->type_env, e);
2079 MatchInfo mi;
2080 vassert(e);
2081 vassert(ty == Ity_I64);
2082
2083 if (e->tag == Iex_RdTmp) {
2084 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2085 }
2086
2087 if (e->tag == Iex_Const) {
2088 HReg rLo, rHi;
2089 HReg res = newVRegD(env);
2090 iselInt64Expr(&rHi, &rLo, env, e);
2091 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2092 return res;
2093 }
2094
2095 /* 64-bit load */
2096 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2097 HReg res = newVRegD(env);
2098 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2099 vassert(ty == Ity_I64);
2100 addInstr(env, ARMInstr_NLdStD(True, res, am));
2101 return res;
2102 }
2103
2104 /* 64-bit GET */
2105 if (e->tag == Iex_Get) {
2106 HReg addr = newVRegI(env);
2107 HReg res = newVRegD(env);
2108 vassert(ty == Ity_I64);
2109 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2110 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2111 return res;
2112 }
2113
2114 /* --------- BINARY ops --------- */
2115 if (e->tag == Iex_Binop) {
2116 switch (e->Iex.Binop.op) {
2117
2118 /* 32 x 32 -> 64 multiply */
2119 case Iop_MullS32:
2120 case Iop_MullU32: {
2121 HReg rLo, rHi;
2122 HReg res = newVRegD(env);
2123 iselInt64Expr(&rHi, &rLo, env, e);
2124 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2125 return res;
2126 }
2127
2128 case Iop_And64: {
2129 HReg res = newVRegD(env);
2130 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2131 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2132 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2133 res, argL, argR, 4, False));
2134 return res;
2135 }
2136 case Iop_Or64: {
2137 HReg res = newVRegD(env);
2138 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2139 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2140 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2141 res, argL, argR, 4, False));
2142 return res;
2143 }
2144 case Iop_Xor64: {
2145 HReg res = newVRegD(env);
2146 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2147 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2148 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2149 res, argL, argR, 4, False));
2150 return res;
2151 }
2152
2153 /* 32HLto64(e1,e2) */
2154 case Iop_32HLto64: {
2155 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2156 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2157 HReg res = newVRegD(env);
2158 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2159 return res;
2160 }
2161
2162 case Iop_Add8x8:
2163 case Iop_Add16x4:
2164 case Iop_Add32x2:
2165 case Iop_Add64: {
2166 HReg res = newVRegD(env);
2167 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2168 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2169 UInt size;
2170 switch (e->Iex.Binop.op) {
2171 case Iop_Add8x8: size = 0; break;
2172 case Iop_Add16x4: size = 1; break;
2173 case Iop_Add32x2: size = 2; break;
2174 case Iop_Add64: size = 3; break;
2175 default: vassert(0);
2176 }
2177 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2178 res, argL, argR, size, False));
2179 return res;
2180 }
2181 case Iop_Add32Fx2: {
2182 HReg res = newVRegD(env);
2183 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2184 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2185 UInt size = 0;
2186 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2187 res, argL, argR, size, False));
2188 return res;
2189 }
2190 case Iop_Recps32Fx2: {
2191 HReg res = newVRegD(env);
2192 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2193 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2194 UInt size = 0;
2195 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2196 res, argL, argR, size, False));
2197 return res;
2198 }
2199 case Iop_Rsqrts32Fx2: {
2200 HReg res = newVRegD(env);
2201 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2202 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2203 UInt size = 0;
2204 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2205 res, argL, argR, size, False));
2206 return res;
2207 }
2208 case Iop_InterleaveOddLanes8x8:
2209 case Iop_InterleaveOddLanes16x4:
2210 case Iop_InterleaveLO32x2:
2211 case Iop_InterleaveEvenLanes8x8:
2212 case Iop_InterleaveEvenLanes16x4:
2213 case Iop_InterleaveHI32x2: {
2214 HReg tmp = newVRegD(env);
2215 HReg res = newVRegD(env);
2216 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2217 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2218 UInt size;
2219 UInt is_lo;
2220 switch (e->Iex.Binop.op) {
2221 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2222 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2223 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2224 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2225 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2226 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2227 default: vassert(0);
2228 }
2229 if (is_lo) {
2230 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2231 tmp, argL, 4, False));
2232 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2233 res, argR, 4, False));
2234 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2235 res, tmp, size, False));
2236 } else {
2237 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2238 tmp, argR, 4, False));
2239 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2240 res, argL, 4, False));
2241 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2242 tmp, res, size, False));
2243 }
2244 return res;
2245 }
2246 case Iop_InterleaveHI8x8:
2247 case Iop_InterleaveHI16x4:
2248 case Iop_InterleaveLO8x8:
2249 case Iop_InterleaveLO16x4: {
2250 HReg tmp = newVRegD(env);
2251 HReg res = newVRegD(env);
2252 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2253 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2254 UInt size;
2255 UInt is_lo;
2256 switch (e->Iex.Binop.op) {
2257 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2258 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2259 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2260 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2261 default: vassert(0);
2262 }
2263 if (is_lo) {
2264 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2265 tmp, argL, 4, False));
2266 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2267 res, argR, 4, False));
2268 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2269 res, tmp, size, False));
2270 } else {
2271 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2272 tmp, argR, 4, False));
2273 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2274 res, argL, 4, False));
2275 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2276 tmp, res, size, False));
2277 }
2278 return res;
2279 }
2280 case Iop_CatOddLanes8x8:
2281 case Iop_CatOddLanes16x4:
2282 case Iop_CatEvenLanes8x8:
2283 case Iop_CatEvenLanes16x4: {
2284 HReg tmp = newVRegD(env);
2285 HReg res = newVRegD(env);
2286 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2287 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2288 UInt size;
2289 UInt is_lo;
2290 switch (e->Iex.Binop.op) {
2291 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2292 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2293 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2294 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2295 default: vassert(0);
2296 }
2297 if (is_lo) {
2298 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2299 tmp, argL, 4, False));
2300 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2301 res, argR, 4, False));
2302 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2303 res, tmp, size, False));
2304 } else {
2305 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2306 tmp, argR, 4, False));
2307 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2308 res, argL, 4, False));
2309 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2310 tmp, res, size, False));
2311 }
2312 return res;
2313 }
2314 case Iop_QAdd8Ux8:
2315 case Iop_QAdd16Ux4:
2316 case Iop_QAdd32Ux2:
2317 case Iop_QAdd64Ux1: {
2318 HReg res = newVRegD(env);
2319 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2320 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2321 UInt size;
2322 switch (e->Iex.Binop.op) {
2323 case Iop_QAdd8Ux8: size = 0; break;
2324 case Iop_QAdd16Ux4: size = 1; break;
2325 case Iop_QAdd32Ux2: size = 2; break;
2326 case Iop_QAdd64Ux1: size = 3; break;
2327 default: vassert(0);
2328 }
2329 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2330 res, argL, argR, size, False));
2331 return res;
2332 }
2333 case Iop_QAdd8Sx8:
2334 case Iop_QAdd16Sx4:
2335 case Iop_QAdd32Sx2:
2336 case Iop_QAdd64Sx1: {
2337 HReg res = newVRegD(env);
2338 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2339 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2340 UInt size;
2341 switch (e->Iex.Binop.op) {
2342 case Iop_QAdd8Sx8: size = 0; break;
2343 case Iop_QAdd16Sx4: size = 1; break;
2344 case Iop_QAdd32Sx2: size = 2; break;
2345 case Iop_QAdd64Sx1: size = 3; break;
2346 default: vassert(0);
2347 }
2348 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2349 res, argL, argR, size, False));
2350 return res;
2351 }
2352 case Iop_Sub8x8:
2353 case Iop_Sub16x4:
2354 case Iop_Sub32x2:
2355 case Iop_Sub64: {
2356 HReg res = newVRegD(env);
2357 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2358 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2359 UInt size;
2360 switch (e->Iex.Binop.op) {
2361 case Iop_Sub8x8: size = 0; break;
2362 case Iop_Sub16x4: size = 1; break;
2363 case Iop_Sub32x2: size = 2; break;
2364 case Iop_Sub64: size = 3; break;
2365 default: vassert(0);
2366 }
2367 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2368 res, argL, argR, size, False));
2369 return res;
2370 }
2371 case Iop_Sub32Fx2: {
2372 HReg res = newVRegD(env);
2373 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2374 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2375 UInt size = 0;
2376 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2377 res, argL, argR, size, False));
2378 return res;
2379 }
2380 case Iop_QSub8Ux8:
2381 case Iop_QSub16Ux4:
2382 case Iop_QSub32Ux2:
2383 case Iop_QSub64Ux1: {
2384 HReg res = newVRegD(env);
2385 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2386 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2387 UInt size;
2388 switch (e->Iex.Binop.op) {
2389 case Iop_QSub8Ux8: size = 0; break;
2390 case Iop_QSub16Ux4: size = 1; break;
2391 case Iop_QSub32Ux2: size = 2; break;
2392 case Iop_QSub64Ux1: size = 3; break;
2393 default: vassert(0);
2394 }
2395 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2396 res, argL, argR, size, False));
2397 return res;
2398 }
2399 case Iop_QSub8Sx8:
2400 case Iop_QSub16Sx4:
2401 case Iop_QSub32Sx2:
2402 case Iop_QSub64Sx1: {
2403 HReg res = newVRegD(env);
2404 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2405 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2406 UInt size;
2407 switch (e->Iex.Binop.op) {
2408 case Iop_QSub8Sx8: size = 0; break;
2409 case Iop_QSub16Sx4: size = 1; break;
2410 case Iop_QSub32Sx2: size = 2; break;
2411 case Iop_QSub64Sx1: size = 3; break;
2412 default: vassert(0);
2413 }
2414 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2415 res, argL, argR, size, False));
2416 return res;
2417 }
2418 case Iop_Max8Ux8:
2419 case Iop_Max16Ux4:
2420 case Iop_Max32Ux2: {
2421 HReg res = newVRegD(env);
2422 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2423 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2424 UInt size;
2425 switch (e->Iex.Binop.op) {
2426 case Iop_Max8Ux8: size = 0; break;
2427 case Iop_Max16Ux4: size = 1; break;
2428 case Iop_Max32Ux2: size = 2; break;
2429 default: vassert(0);
2430 }
2431 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2432 res, argL, argR, size, False));
2433 return res;
2434 }
2435 case Iop_Max8Sx8:
2436 case Iop_Max16Sx4:
2437 case Iop_Max32Sx2: {
2438 HReg res = newVRegD(env);
2439 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2440 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2441 UInt size;
2442 switch (e->Iex.Binop.op) {
2443 case Iop_Max8Sx8: size = 0; break;
2444 case Iop_Max16Sx4: size = 1; break;
2445 case Iop_Max32Sx2: size = 2; break;
2446 default: vassert(0);
2447 }
2448 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2449 res, argL, argR, size, False));
2450 return res;
2451 }
2452 case Iop_Min8Ux8:
2453 case Iop_Min16Ux4:
2454 case Iop_Min32Ux2: {
2455 HReg res = newVRegD(env);
2456 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458 UInt size;
2459 switch (e->Iex.Binop.op) {
2460 case Iop_Min8Ux8: size = 0; break;
2461 case Iop_Min16Ux4: size = 1; break;
2462 case Iop_Min32Ux2: size = 2; break;
2463 default: vassert(0);
2464 }
2465 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2466 res, argL, argR, size, False));
2467 return res;
2468 }
2469 case Iop_Min8Sx8:
2470 case Iop_Min16Sx4:
2471 case Iop_Min32Sx2: {
2472 HReg res = newVRegD(env);
2473 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2474 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2475 UInt size;
2476 switch (e->Iex.Binop.op) {
2477 case Iop_Min8Sx8: size = 0; break;
2478 case Iop_Min16Sx4: size = 1; break;
2479 case Iop_Min32Sx2: size = 2; break;
2480 default: vassert(0);
2481 }
2482 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2483 res, argL, argR, size, False));
2484 return res;
2485 }
2486 case Iop_Sar8x8:
2487 case Iop_Sar16x4:
2488 case Iop_Sar32x2: {
2489 HReg res = newVRegD(env);
2490 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2491 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2492 HReg argR2 = newVRegD(env);
2493 HReg zero = newVRegD(env);
2494 UInt size;
2495 switch (e->Iex.Binop.op) {
2496 case Iop_Sar8x8: size = 0; break;
2497 case Iop_Sar16x4: size = 1; break;
2498 case Iop_Sar32x2: size = 2; break;
2499 case Iop_Sar64: size = 3; break;
2500 default: vassert(0);
2501 }
2502 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2503 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2504 argR2, zero, argR, size, False));
2505 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2506 res, argL, argR2, size, False));
2507 return res;
2508 }
2509 case Iop_Sal8x8:
2510 case Iop_Sal16x4:
2511 case Iop_Sal32x2:
2512 case Iop_Sal64x1: {
2513 HReg res = newVRegD(env);
2514 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2515 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2516 UInt size;
2517 switch (e->Iex.Binop.op) {
2518 case Iop_Sal8x8: size = 0; break;
2519 case Iop_Sal16x4: size = 1; break;
2520 case Iop_Sal32x2: size = 2; break;
2521 case Iop_Sal64x1: size = 3; break;
2522 default: vassert(0);
2523 }
2524 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2525 res, argL, argR, size, False));
2526 return res;
2527 }
2528 case Iop_Shr8x8:
2529 case Iop_Shr16x4:
2530 case Iop_Shr32x2: {
2531 HReg res = newVRegD(env);
2532 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2533 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2534 HReg argR2 = newVRegD(env);
2535 HReg zero = newVRegD(env);
2536 UInt size;
2537 switch (e->Iex.Binop.op) {
2538 case Iop_Shr8x8: size = 0; break;
2539 case Iop_Shr16x4: size = 1; break;
2540 case Iop_Shr32x2: size = 2; break;
2541 default: vassert(0);
2542 }
2543 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2544 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2545 argR2, zero, argR, size, False));
2546 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2547 res, argL, argR2, size, False));
2548 return res;
2549 }
2550 case Iop_Shl8x8:
2551 case Iop_Shl16x4:
2552 case Iop_Shl32x2: {
2553 HReg res = newVRegD(env);
2554 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2555 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2556 UInt size;
2557 switch (e->Iex.Binop.op) {
2558 case Iop_Shl8x8: size = 0; break;
2559 case Iop_Shl16x4: size = 1; break;
2560 case Iop_Shl32x2: size = 2; break;
2561 default: vassert(0);
2562 }
2563 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2564 res, argL, argR, size, False));
2565 return res;
2566 }
2567 case Iop_QShl8x8:
2568 case Iop_QShl16x4:
2569 case Iop_QShl32x2:
2570 case Iop_QShl64x1: {
2571 HReg res = newVRegD(env);
2572 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2573 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2574 UInt size;
2575 switch (e->Iex.Binop.op) {
2576 case Iop_QShl8x8: size = 0; break;
2577 case Iop_QShl16x4: size = 1; break;
2578 case Iop_QShl32x2: size = 2; break;
2579 case Iop_QShl64x1: size = 3; break;
2580 default: vassert(0);
2581 }
2582 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2583 res, argL, argR, size, False));
2584 return res;
2585 }
2586 case Iop_QSal8x8:
2587 case Iop_QSal16x4:
2588 case Iop_QSal32x2:
2589 case Iop_QSal64x1: {
2590 HReg res = newVRegD(env);
2591 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2592 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2593 UInt size;
2594 switch (e->Iex.Binop.op) {
2595 case Iop_QSal8x8: size = 0; break;
2596 case Iop_QSal16x4: size = 1; break;
2597 case Iop_QSal32x2: size = 2; break;
2598 case Iop_QSal64x1: size = 3; break;
2599 default: vassert(0);
2600 }
2601 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2602 res, argL, argR, size, False));
2603 return res;
2604 }
2605 case Iop_QShlN8x8:
2606 case Iop_QShlN16x4:
2607 case Iop_QShlN32x2:
2608 case Iop_QShlN64x1: {
2609 HReg res = newVRegD(env);
2610 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2611 UInt size, imm;
2612 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2613 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2614 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2615 "second argument only\n");
2616 }
2617 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2618 switch (e->Iex.Binop.op) {
2619 case Iop_QShlN8x8: size = 8 | imm; break;
2620 case Iop_QShlN16x4: size = 16 | imm; break;
2621 case Iop_QShlN32x2: size = 32 | imm; break;
2622 case Iop_QShlN64x1: size = 64 | imm; break;
2623 default: vassert(0);
2624 }
2625 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2626 res, argL, size, False));
2627 return res;
2628 }
2629 case Iop_QShlN8Sx8:
2630 case Iop_QShlN16Sx4:
2631 case Iop_QShlN32Sx2:
2632 case Iop_QShlN64Sx1: {
2633 HReg res = newVRegD(env);
2634 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2635 UInt size, imm;
2636 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2637 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2638 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2639 "second argument only\n");
2640 }
2641 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2642 switch (e->Iex.Binop.op) {
2643 case Iop_QShlN8Sx8: size = 8 | imm; break;
2644 case Iop_QShlN16Sx4: size = 16 | imm; break;
2645 case Iop_QShlN32Sx2: size = 32 | imm; break;
2646 case Iop_QShlN64Sx1: size = 64 | imm; break;
2647 default: vassert(0);
2648 }
2649 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2650 res, argL, size, False));
2651 return res;
2652 }
2653 case Iop_QSalN8x8:
2654 case Iop_QSalN16x4:
2655 case Iop_QSalN32x2:
2656 case Iop_QSalN64x1: {
2657 HReg res = newVRegD(env);
2658 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2659 UInt size, imm;
2660 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2661 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2662 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2663 "second argument only\n");
2664 }
2665 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2666 switch (e->Iex.Binop.op) {
2667 case Iop_QSalN8x8: size = 8 | imm; break;
2668 case Iop_QSalN16x4: size = 16 | imm; break;
2669 case Iop_QSalN32x2: size = 32 | imm; break;
2670 case Iop_QSalN64x1: size = 64 | imm; break;
2671 default: vassert(0);
2672 }
2673 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2674 res, argL, size, False));
2675 return res;
2676 }
2677 case Iop_ShrN8x8:
2678 case Iop_ShrN16x4:
2679 case Iop_ShrN32x2:
2680 case Iop_Shr64: {
2681 HReg res = newVRegD(env);
2682 HReg tmp = newVRegD(env);
2683 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2684 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2685 HReg argR2 = newVRegI(env);
2686 UInt size;
2687 switch (e->Iex.Binop.op) {
2688 case Iop_ShrN8x8: size = 0; break;
2689 case Iop_ShrN16x4: size = 1; break;
2690 case Iop_ShrN32x2: size = 2; break;
2691 case Iop_Shr64: size = 3; break;
2692 default: vassert(0);
2693 }
2694 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2695 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2696 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2697 res, argL, tmp, size, False));
2698 return res;
2699 }
2700 case Iop_ShlN8x8:
2701 case Iop_ShlN16x4:
2702 case Iop_ShlN32x2:
2703 case Iop_Shl64: {
2704 HReg res = newVRegD(env);
2705 HReg tmp = newVRegD(env);
2706 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2707 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2708 UInt size;
2709 switch (e->Iex.Binop.op) {
2710 case Iop_ShlN8x8: size = 0; break;
2711 case Iop_ShlN16x4: size = 1; break;
2712 case Iop_ShlN32x2: size = 2; break;
2713 case Iop_Shl64: size = 3; break;
2714 default: vassert(0);
2715 }
2716 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2717 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2718 res, argL, tmp, size, False));
2719 return res;
2720 }
2721 case Iop_SarN8x8:
2722 case Iop_SarN16x4:
2723 case Iop_SarN32x2:
2724 case Iop_Sar64: {
2725 HReg res = newVRegD(env);
2726 HReg tmp = newVRegD(env);
2727 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2728 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2729 HReg argR2 = newVRegI(env);
2730 UInt size;
2731 switch (e->Iex.Binop.op) {
2732 case Iop_SarN8x8: size = 0; break;
2733 case Iop_SarN16x4: size = 1; break;
2734 case Iop_SarN32x2: size = 2; break;
2735 case Iop_Sar64: size = 3; break;
2736 default: vassert(0);
2737 }
2738 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2739 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2740 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2741 res, argL, tmp, size, False));
2742 return res;
2743 }
2744 case Iop_CmpGT8Ux8:
2745 case Iop_CmpGT16Ux4:
2746 case Iop_CmpGT32Ux2: {
2747 HReg res = newVRegD(env);
2748 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2749 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2750 UInt size;
2751 switch (e->Iex.Binop.op) {
2752 case Iop_CmpGT8Ux8: size = 0; break;
2753 case Iop_CmpGT16Ux4: size = 1; break;
2754 case Iop_CmpGT32Ux2: size = 2; break;
2755 default: vassert(0);
2756 }
2757 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2758 res, argL, argR, size, False));
2759 return res;
2760 }
2761 case Iop_CmpGT8Sx8:
2762 case Iop_CmpGT16Sx4:
2763 case Iop_CmpGT32Sx2: {
2764 HReg res = newVRegD(env);
2765 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2766 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2767 UInt size;
2768 switch (e->Iex.Binop.op) {
2769 case Iop_CmpGT8Sx8: size = 0; break;
2770 case Iop_CmpGT16Sx4: size = 1; break;
2771 case Iop_CmpGT32Sx2: size = 2; break;
2772 default: vassert(0);
2773 }
2774 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2775 res, argL, argR, size, False));
2776 return res;
2777 }
2778 case Iop_CmpEQ8x8:
2779 case Iop_CmpEQ16x4:
2780 case Iop_CmpEQ32x2: {
2781 HReg res = newVRegD(env);
2782 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784 UInt size;
2785 switch (e->Iex.Binop.op) {
2786 case Iop_CmpEQ8x8: size = 0; break;
2787 case Iop_CmpEQ16x4: size = 1; break;
2788 case Iop_CmpEQ32x2: size = 2; break;
2789 default: vassert(0);
2790 }
2791 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2792 res, argL, argR, size, False));
2793 return res;
2794 }
2795 case Iop_Mul8x8:
2796 case Iop_Mul16x4:
2797 case Iop_Mul32x2: {
2798 HReg res = newVRegD(env);
2799 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2800 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2801 UInt size = 0;
2802 switch(e->Iex.Binop.op) {
2803 case Iop_Mul8x8: size = 0; break;
2804 case Iop_Mul16x4: size = 1; break;
2805 case Iop_Mul32x2: size = 2; break;
2806 default: vassert(0);
2807 }
2808 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2809 res, argL, argR, size, False));
2810 return res;
2811 }
2812 case Iop_Mul32Fx2: {
2813 HReg res = newVRegD(env);
2814 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2815 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2816 UInt size = 0;
2817 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2818 res, argL, argR, size, False));
2819 return res;
2820 }
2821 case Iop_QDMulHi16Sx4:
2822 case Iop_QDMulHi32Sx2: {
2823 HReg res = newVRegD(env);
2824 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2825 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2826 UInt size = 0;
2827 switch(e->Iex.Binop.op) {
2828 case Iop_QDMulHi16Sx4: size = 1; break;
2829 case Iop_QDMulHi32Sx2: size = 2; break;
2830 default: vassert(0);
2831 }
2832 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2833 res, argL, argR, size, False));
2834 return res;
2835 }
2836
2837 case Iop_QRDMulHi16Sx4:
2838 case Iop_QRDMulHi32Sx2: {
2839 HReg res = newVRegD(env);
2840 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2841 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2842 UInt size = 0;
2843 switch(e->Iex.Binop.op) {
2844 case Iop_QRDMulHi16Sx4: size = 1; break;
2845 case Iop_QRDMulHi32Sx2: size = 2; break;
2846 default: vassert(0);
2847 }
2848 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2849 res, argL, argR, size, False));
2850 return res;
2851 }
2852
2853 case Iop_PwAdd8x8:
2854 case Iop_PwAdd16x4:
2855 case Iop_PwAdd32x2: {
2856 HReg res = newVRegD(env);
2857 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2858 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2859 UInt size = 0;
2860 switch(e->Iex.Binop.op) {
2861 case Iop_PwAdd8x8: size = 0; break;
2862 case Iop_PwAdd16x4: size = 1; break;
2863 case Iop_PwAdd32x2: size = 2; break;
2864 default: vassert(0);
2865 }
2866 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2867 res, argL, argR, size, False));
2868 return res;
2869 }
2870 case Iop_PwAdd32Fx2: {
2871 HReg res = newVRegD(env);
2872 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2873 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2874 UInt size = 0;
2875 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2876 res, argL, argR, size, False));
2877 return res;
2878 }
2879 case Iop_PwMin8Ux8:
2880 case Iop_PwMin16Ux4:
2881 case Iop_PwMin32Ux2: {
2882 HReg res = newVRegD(env);
2883 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2884 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2885 UInt size = 0;
2886 switch(e->Iex.Binop.op) {
2887 case Iop_PwMin8Ux8: size = 0; break;
2888 case Iop_PwMin16Ux4: size = 1; break;
2889 case Iop_PwMin32Ux2: size = 2; break;
2890 default: vassert(0);
2891 }
2892 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2893 res, argL, argR, size, False));
2894 return res;
2895 }
2896 case Iop_PwMin8Sx8:
2897 case Iop_PwMin16Sx4:
2898 case Iop_PwMin32Sx2: {
2899 HReg res = newVRegD(env);
2900 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2902 UInt size = 0;
2903 switch(e->Iex.Binop.op) {
2904 case Iop_PwMin8Sx8: size = 0; break;
2905 case Iop_PwMin16Sx4: size = 1; break;
2906 case Iop_PwMin32Sx2: size = 2; break;
2907 default: vassert(0);
2908 }
2909 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2910 res, argL, argR, size, False));
2911 return res;
2912 }
2913 case Iop_PwMax8Ux8:
2914 case Iop_PwMax16Ux4:
2915 case Iop_PwMax32Ux2: {
2916 HReg res = newVRegD(env);
2917 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2919 UInt size = 0;
2920 switch(e->Iex.Binop.op) {
2921 case Iop_PwMax8Ux8: size = 0; break;
2922 case Iop_PwMax16Ux4: size = 1; break;
2923 case Iop_PwMax32Ux2: size = 2; break;
2924 default: vassert(0);
2925 }
2926 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2927 res, argL, argR, size, False));
2928 return res;
2929 }
2930 case Iop_PwMax8Sx8:
2931 case Iop_PwMax16Sx4:
2932 case Iop_PwMax32Sx2: {
2933 HReg res = newVRegD(env);
2934 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2935 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2936 UInt size = 0;
2937 switch(e->Iex.Binop.op) {
2938 case Iop_PwMax8Sx8: size = 0; break;
2939 case Iop_PwMax16Sx4: size = 1; break;
2940 case Iop_PwMax32Sx2: size = 2; break;
2941 default: vassert(0);
2942 }
2943 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2944 res, argL, argR, size, False));
2945 return res;
2946 }
2947 case Iop_Perm8x8: {
2948 HReg res = newVRegD(env);
2949 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2952 res, argL, argR, 0, False));
2953 return res;
2954 }
2955 case Iop_PolynomialMul8x8: {
2956 HReg res = newVRegD(env);
2957 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2958 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2959 UInt size = 0;
2960 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2961 res, argL, argR, size, False));
2962 return res;
2963 }
2964 case Iop_Max32Fx2: {
2965 HReg res = newVRegD(env);
2966 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2967 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2968 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2969 res, argL, argR, 2, False));
2970 return res;
2971 }
2972 case Iop_Min32Fx2: {
2973 HReg res = newVRegD(env);
2974 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2975 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2976 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2977 res, argL, argR, 2, False));
2978 return res;
2979 }
2980 case Iop_PwMax32Fx2: {
2981 HReg res = newVRegD(env);
2982 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2983 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2984 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2985 res, argL, argR, 2, False));
2986 return res;
2987 }
2988 case Iop_PwMin32Fx2: {
2989 HReg res = newVRegD(env);
2990 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2991 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2992 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2993 res, argL, argR, 2, False));
2994 return res;
2995 }
2996 case Iop_CmpGT32Fx2: {
2997 HReg res = newVRegD(env);
2998 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2999 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3000 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3001 res, argL, argR, 2, False));
3002 return res;
3003 }
3004 case Iop_CmpGE32Fx2: {
3005 HReg res = newVRegD(env);
3006 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3007 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3008 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3009 res, argL, argR, 2, False));
3010 return res;
3011 }
3012 case Iop_CmpEQ32Fx2: {
3013 HReg res = newVRegD(env);
3014 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3015 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3016 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3017 res, argL, argR, 2, False));
3018 return res;
3019 }
3020 case Iop_F32ToFixed32Ux2_RZ:
3021 case Iop_F32ToFixed32Sx2_RZ:
3022 case Iop_Fixed32UToF32x2_RN:
3023 case Iop_Fixed32SToF32x2_RN: {
3024 HReg res = newVRegD(env);
3025 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3026 ARMNeonUnOp op;
3027 UInt imm6;
3028 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3029 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3030 vpanic("ARM supports FP <-> Fixed conversion with constant "
3031 "second argument less than 33 only\n");
3032 }
3033 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3034 vassert(imm6 <= 32 && imm6 > 0);
3035 imm6 = 64 - imm6;
3036 switch(e->Iex.Binop.op) {
3037 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3038 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3039 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3040 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3041 default: vassert(0);
3042 }
3043 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3044 return res;
3045 }
3046 /*
3047 FIXME: is this here or not?
3048 case Iop_VDup8x8:
3049 case Iop_VDup16x4:
3050 case Iop_VDup32x2: {
3051 HReg res = newVRegD(env);
3052 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3053 UInt index;
3054 UInt imm4;
3055 UInt size = 0;
3056 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3057 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3058 vpanic("ARM supports Iop_VDup with constant "
3059 "second argument less than 16 only\n");
3060 }
3061 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3062 switch(e->Iex.Binop.op) {
3063 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3064 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3065 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3066 default: vassert(0);
3067 }
3068 if (imm4 >= 16) {
3069 vpanic("ARM supports Iop_VDup with constant "
3070 "second argument less than 16 only\n");
3071 }
3072 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3073 res, argL, imm4, False));
3074 return res;
3075 }
3076 */
3077 default:
3078 break;
3079 }
3080 }
3081
3082 /* --------- UNARY ops --------- */
3083 if (e->tag == Iex_Unop) {
3084 switch (e->Iex.Unop.op) {
3085
sewardjabf39452012-12-12 00:16:41 +00003086 /* 32Uto64 */
3087 case Iop_32Uto64: {
3088 HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3089 HReg rHi = newVRegI(env);
3090 HReg res = newVRegD(env);
3091 addInstr(env, ARMInstr_Imm32(rHi, 0));
3092 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3093 return res;
3094 }
3095
sewardj6c60b322010-08-22 12:48:28 +00003096 /* ReinterpF64asI64 */
3097 case Iop_ReinterpF64asI64:
3098 /* Left64(e) */
3099 case Iop_Left64:
3100 /* CmpwNEZ64(e) */
3101 //case Iop_CmpwNEZ64:
3102 case Iop_1Sto64: {
3103 HReg rLo, rHi;
3104 HReg res = newVRegD(env);
3105 iselInt64Expr(&rHi, &rLo, env, e);
3106 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3107 return res;
3108 }
3109 case Iop_Not64: {
3110 DECLARE_PATTERN(p_veqz_8x8);
3111 DECLARE_PATTERN(p_veqz_16x4);
3112 DECLARE_PATTERN(p_veqz_32x2);
3113 DECLARE_PATTERN(p_vcge_8sx8);
3114 DECLARE_PATTERN(p_vcge_16sx4);
3115 DECLARE_PATTERN(p_vcge_32sx2);
3116 DECLARE_PATTERN(p_vcge_8ux8);
3117 DECLARE_PATTERN(p_vcge_16ux4);
3118 DECLARE_PATTERN(p_vcge_32ux2);
3119 DEFINE_PATTERN(p_veqz_8x8,
3120 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3121 DEFINE_PATTERN(p_veqz_16x4,
3122 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3123 DEFINE_PATTERN(p_veqz_32x2,
3124 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3125 DEFINE_PATTERN(p_vcge_8sx8,
3126 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3127 DEFINE_PATTERN(p_vcge_16sx4,
3128 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3129 DEFINE_PATTERN(p_vcge_32sx2,
3130 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3131 DEFINE_PATTERN(p_vcge_8ux8,
3132 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3133 DEFINE_PATTERN(p_vcge_16ux4,
3134 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3135 DEFINE_PATTERN(p_vcge_32ux2,
3136 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3137 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3138 HReg res = newVRegD(env);
3139 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3140 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3141 return res;
3142 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3143 HReg res = newVRegD(env);
3144 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3145 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3146 return res;
3147 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3148 HReg res = newVRegD(env);
3149 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3150 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3151 return res;
3152 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3153 HReg res = newVRegD(env);
3154 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3155 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3156 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3157 res, argL, argR, 0, False));
3158 return res;
3159 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3160 HReg res = newVRegD(env);
3161 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3162 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3163 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3164 res, argL, argR, 1, False));
3165 return res;
3166 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3167 HReg res = newVRegD(env);
3168 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3169 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3170 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3171 res, argL, argR, 2, False));
3172 return res;
3173 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3174 HReg res = newVRegD(env);
3175 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3176 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3177 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3178 res, argL, argR, 0, False));
3179 return res;
3180 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3181 HReg res = newVRegD(env);
3182 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3183 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3184 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3185 res, argL, argR, 1, False));
3186 return res;
3187 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3188 HReg res = newVRegD(env);
3189 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3190 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3191 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3192 res, argL, argR, 2, False));
3193 return res;
3194 } else {
3195 HReg res = newVRegD(env);
3196 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3197 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3198 return res;
3199 }
3200 }
3201 case Iop_Dup8x8:
3202 case Iop_Dup16x4:
3203 case Iop_Dup32x2: {
3204 HReg res, arg;
3205 UInt size;
3206 DECLARE_PATTERN(p_vdup_8x8);
3207 DECLARE_PATTERN(p_vdup_16x4);
3208 DECLARE_PATTERN(p_vdup_32x2);
3209 DEFINE_PATTERN(p_vdup_8x8,
3210 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3211 DEFINE_PATTERN(p_vdup_16x4,
3212 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3213 DEFINE_PATTERN(p_vdup_32x2,
3214 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3215 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3216 UInt index;
3217 UInt imm4;
3218 if (mi.bindee[1]->tag == Iex_Const &&
3219 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3220 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3221 imm4 = (index << 1) + 1;
3222 if (index < 8) {
3223 res = newVRegD(env);
3224 arg = iselNeon64Expr(env, mi.bindee[0]);
3225 addInstr(env, ARMInstr_NUnaryS(
3226 ARMneon_VDUP,
3227 mkARMNRS(ARMNRS_Reg, res, 0),
3228 mkARMNRS(ARMNRS_Scalar, arg, index),
3229 imm4, False
3230 ));
3231 return res;
3232 }
3233 }
3234 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3235 UInt index;
3236 UInt imm4;
3237 if (mi.bindee[1]->tag == Iex_Const &&
3238 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3239 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3240 imm4 = (index << 2) + 2;
3241 if (index < 4) {
3242 res = newVRegD(env);
3243 arg = iselNeon64Expr(env, mi.bindee[0]);
3244 addInstr(env, ARMInstr_NUnaryS(
3245 ARMneon_VDUP,
3246 mkARMNRS(ARMNRS_Reg, res, 0),
3247 mkARMNRS(ARMNRS_Scalar, arg, index),
3248 imm4, False
3249 ));
3250 return res;
3251 }
3252 }
3253 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3254 UInt index;
3255 UInt imm4;
3256 if (mi.bindee[1]->tag == Iex_Const &&
3257 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3258 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3259 imm4 = (index << 3) + 4;
3260 if (index < 2) {
3261 res = newVRegD(env);
3262 arg = iselNeon64Expr(env, mi.bindee[0]);
3263 addInstr(env, ARMInstr_NUnaryS(
3264 ARMneon_VDUP,
3265 mkARMNRS(ARMNRS_Reg, res, 0),
3266 mkARMNRS(ARMNRS_Scalar, arg, index),
3267 imm4, False
3268 ));
3269 return res;
3270 }
3271 }
3272 }
3273 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3274 res = newVRegD(env);
3275 switch (e->Iex.Unop.op) {
3276 case Iop_Dup8x8: size = 0; break;
3277 case Iop_Dup16x4: size = 1; break;
3278 case Iop_Dup32x2: size = 2; break;
3279 default: vassert(0);
3280 }
3281 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3282 return res;
3283 }
3284 case Iop_Abs8x8:
3285 case Iop_Abs16x4:
3286 case Iop_Abs32x2: {
3287 HReg res = newVRegD(env);
3288 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3289 UInt size = 0;
3290 switch(e->Iex.Binop.op) {
3291 case Iop_Abs8x8: size = 0; break;
3292 case Iop_Abs16x4: size = 1; break;
3293 case Iop_Abs32x2: size = 2; break;
3294 default: vassert(0);
3295 }
3296 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3297 return res;
3298 }
3299 case Iop_Reverse64_8x8:
3300 case Iop_Reverse64_16x4:
3301 case Iop_Reverse64_32x2: {
3302 HReg res = newVRegD(env);
3303 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3304 UInt size = 0;
3305 switch(e->Iex.Binop.op) {
3306 case Iop_Reverse64_8x8: size = 0; break;
3307 case Iop_Reverse64_16x4: size = 1; break;
3308 case Iop_Reverse64_32x2: size = 2; break;
3309 default: vassert(0);
3310 }
3311 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3312 res, arg, size, False));
3313 return res;
3314 }
3315 case Iop_Reverse32_8x8:
3316 case Iop_Reverse32_16x4: {
3317 HReg res = newVRegD(env);
3318 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3319 UInt size = 0;
3320 switch(e->Iex.Binop.op) {
3321 case Iop_Reverse32_8x8: size = 0; break;
3322 case Iop_Reverse32_16x4: size = 1; break;
3323 default: vassert(0);
3324 }
3325 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3326 res, arg, size, False));
3327 return res;
3328 }
3329 case Iop_Reverse16_8x8: {
3330 HReg res = newVRegD(env);
3331 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3332 UInt size = 0;
3333 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3334 res, arg, size, False));
3335 return res;
3336 }
3337 case Iop_CmpwNEZ64: {
3338 HReg x_lsh = newVRegD(env);
3339 HReg x_rsh = newVRegD(env);
3340 HReg lsh_amt = newVRegD(env);
3341 HReg rsh_amt = newVRegD(env);
3342 HReg zero = newVRegD(env);
3343 HReg tmp = newVRegD(env);
3344 HReg tmp2 = newVRegD(env);
3345 HReg res = newVRegD(env);
3346 HReg x = newVRegD(env);
3347 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3348 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3349 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3350 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3351 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3352 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3353 rsh_amt, zero, lsh_amt, 2, False));
3354 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3355 x_lsh, x, lsh_amt, 3, False));
3356 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3357 x_rsh, x, rsh_amt, 3, False));
3358 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3359 tmp, x_lsh, x_rsh, 0, False));
3360 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3361 res, tmp, x, 0, False));
3362 return res;
3363 }
3364 case Iop_CmpNEZ8x8:
3365 case Iop_CmpNEZ16x4:
3366 case Iop_CmpNEZ32x2: {
3367 HReg res = newVRegD(env);
3368 HReg tmp = newVRegD(env);
3369 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3370 UInt size;
3371 switch (e->Iex.Unop.op) {
3372 case Iop_CmpNEZ8x8: size = 0; break;
3373 case Iop_CmpNEZ16x4: size = 1; break;
3374 case Iop_CmpNEZ32x2: size = 2; break;
3375 default: vassert(0);
3376 }
3377 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3378 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3379 return res;
3380 }
sewardj5f438dd2011-06-16 11:36:23 +00003381 case Iop_NarrowUn16to8x8:
3382 case Iop_NarrowUn32to16x4:
3383 case Iop_NarrowUn64to32x2: {
sewardj6c60b322010-08-22 12:48:28 +00003384 HReg res = newVRegD(env);
3385 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3386 UInt size = 0;
3387 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003388 case Iop_NarrowUn16to8x8: size = 0; break;
3389 case Iop_NarrowUn32to16x4: size = 1; break;
3390 case Iop_NarrowUn64to32x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003391 default: vassert(0);
3392 }
3393 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3394 res, arg, size, False));
3395 return res;
3396 }
sewardj5f438dd2011-06-16 11:36:23 +00003397 case Iop_QNarrowUn16Sto8Sx8:
3398 case Iop_QNarrowUn32Sto16Sx4:
3399 case Iop_QNarrowUn64Sto32Sx2: {
sewardj6c60b322010-08-22 12:48:28 +00003400 HReg res = newVRegD(env);
3401 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3402 UInt size = 0;
3403 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003404 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3405 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3406 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003407 default: vassert(0);
3408 }
3409 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3410 res, arg, size, False));
3411 return res;
3412 }
sewardj5f438dd2011-06-16 11:36:23 +00003413 case Iop_QNarrowUn16Sto8Ux8:
3414 case Iop_QNarrowUn32Sto16Ux4:
3415 case Iop_QNarrowUn64Sto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003416 HReg res = newVRegD(env);
3417 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3418 UInt size = 0;
3419 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003420 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3421 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3422 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003423 default: vassert(0);
3424 }
3425 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3426 res, arg, size, False));
3427 return res;
3428 }
sewardj5f438dd2011-06-16 11:36:23 +00003429 case Iop_QNarrowUn16Uto8Ux8:
3430 case Iop_QNarrowUn32Uto16Ux4:
3431 case Iop_QNarrowUn64Uto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003432 HReg res = newVRegD(env);
3433 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3434 UInt size = 0;
3435 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003436 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3437 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3438 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003439 default: vassert(0);
3440 }
3441 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3442 res, arg, size, False));
3443 return res;
3444 }
3445 case Iop_PwAddL8Sx8:
3446 case Iop_PwAddL16Sx4:
3447 case Iop_PwAddL32Sx2: {
3448 HReg res = newVRegD(env);
3449 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3450 UInt size = 0;
3451 switch(e->Iex.Binop.op) {
3452 case Iop_PwAddL8Sx8: size = 0; break;
3453 case Iop_PwAddL16Sx4: size = 1; break;
3454 case Iop_PwAddL32Sx2: size = 2; break;
3455 default: vassert(0);
3456 }
3457 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3458 res, arg, size, False));
3459 return res;
3460 }
3461 case Iop_PwAddL8Ux8:
3462 case Iop_PwAddL16Ux4:
3463 case Iop_PwAddL32Ux2: {
3464 HReg res = newVRegD(env);
3465 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3466 UInt size = 0;
3467 switch(e->Iex.Binop.op) {
3468 case Iop_PwAddL8Ux8: size = 0; break;
3469 case Iop_PwAddL16Ux4: size = 1; break;
3470 case Iop_PwAddL32Ux2: size = 2; break;
3471 default: vassert(0);
3472 }
3473 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3474 res, arg, size, False));
3475 return res;
3476 }
3477 case Iop_Cnt8x8: {
3478 HReg res = newVRegD(env);
3479 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3480 UInt size = 0;
3481 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3482 res, arg, size, False));
3483 return res;
3484 }
3485 case Iop_Clz8Sx8:
3486 case Iop_Clz16Sx4:
3487 case Iop_Clz32Sx2: {
3488 HReg res = newVRegD(env);
3489 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3490 UInt size = 0;
3491 switch(e->Iex.Binop.op) {
3492 case Iop_Clz8Sx8: size = 0; break;
3493 case Iop_Clz16Sx4: size = 1; break;
3494 case Iop_Clz32Sx2: size = 2; break;
3495 default: vassert(0);
3496 }
3497 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3498 res, arg, size, False));
3499 return res;
3500 }
3501 case Iop_Cls8Sx8:
3502 case Iop_Cls16Sx4:
3503 case Iop_Cls32Sx2: {
3504 HReg res = newVRegD(env);
3505 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3506 UInt size = 0;
3507 switch(e->Iex.Binop.op) {
3508 case Iop_Cls8Sx8: size = 0; break;
3509 case Iop_Cls16Sx4: size = 1; break;
3510 case Iop_Cls32Sx2: size = 2; break;
3511 default: vassert(0);
3512 }
3513 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3514 res, arg, size, False));
3515 return res;
3516 }
3517 case Iop_FtoI32Sx2_RZ: {
3518 HReg res = newVRegD(env);
3519 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3520 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3521 res, arg, 2, False));
3522 return res;
3523 }
3524 case Iop_FtoI32Ux2_RZ: {
3525 HReg res = newVRegD(env);
3526 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3527 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3528 res, arg, 2, False));
3529 return res;
3530 }
3531 case Iop_I32StoFx2: {
3532 HReg res = newVRegD(env);
3533 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3534 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3535 res, arg, 2, False));
3536 return res;
3537 }
3538 case Iop_I32UtoFx2: {
3539 HReg res = newVRegD(env);
3540 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3541 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3542 res, arg, 2, False));
3543 return res;
3544 }
3545 case Iop_F32toF16x4: {
3546 HReg res = newVRegD(env);
3547 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3548 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3549 res, arg, 2, False));
3550 return res;
3551 }
3552 case Iop_Recip32Fx2: {
3553 HReg res = newVRegD(env);
3554 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3555 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3556 res, argL, 0, False));
3557 return res;
3558 }
3559 case Iop_Recip32x2: {
3560 HReg res = newVRegD(env);
3561 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3562 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3563 res, argL, 0, False));
3564 return res;
3565 }
3566 case Iop_Abs32Fx2: {
3567 DECLARE_PATTERN(p_vabd_32fx2);
3568 DEFINE_PATTERN(p_vabd_32fx2,
3569 unop(Iop_Abs32Fx2,
3570 binop(Iop_Sub32Fx2,
3571 bind(0),
3572 bind(1))));
3573 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3574 HReg res = newVRegD(env);
3575 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3576 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3577 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3578 res, argL, argR, 0, False));
3579 return res;
3580 } else {
3581 HReg res = newVRegD(env);
3582 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3583 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3584 res, arg, 0, False));
3585 return res;
3586 }
3587 }
3588 case Iop_Rsqrte32Fx2: {
3589 HReg res = newVRegD(env);
3590 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3591 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3592 res, arg, 0, False));
3593 return res;
3594 }
3595 case Iop_Rsqrte32x2: {
3596 HReg res = newVRegD(env);
3597 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3598 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3599 res, arg, 0, False));
3600 return res;
3601 }
3602 case Iop_Neg32Fx2: {
3603 HReg res = newVRegD(env);
3604 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3605 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3606 res, arg, 0, False));
3607 return res;
3608 }
3609 default:
3610 break;
3611 }
3612 } /* if (e->tag == Iex_Unop) */
3613
3614 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00003615 IRTriop *triop = e->Iex.Triop.details;
3616
3617 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00003618 case Iop_Extract64: {
3619 HReg res = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00003620 HReg argL = iselNeon64Expr(env, triop->arg1);
3621 HReg argR = iselNeon64Expr(env, triop->arg2);
sewardj6c60b322010-08-22 12:48:28 +00003622 UInt imm4;
florian420bfa92012-06-02 20:29:22 +00003623 if (triop->arg3->tag != Iex_Const ||
3624 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00003625 vpanic("ARM target supports Iop_Extract64 with constant "
3626 "third argument less than 16 only\n");
3627 }
florian420bfa92012-06-02 20:29:22 +00003628 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
sewardj6c60b322010-08-22 12:48:28 +00003629 if (imm4 >= 8) {
3630 vpanic("ARM target supports Iop_Extract64 with constant "
3631 "third argument less than 16 only\n");
3632 }
3633 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3634 res, argL, argR, imm4, False));
3635 return res;
3636 }
3637 case Iop_SetElem8x8:
3638 case Iop_SetElem16x4:
3639 case Iop_SetElem32x2: {
3640 HReg res = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00003641 HReg dreg = iselNeon64Expr(env, triop->arg1);
3642 HReg arg = iselIntExpr_R(env, triop->arg3);
sewardj6c60b322010-08-22 12:48:28 +00003643 UInt index, size;
florian420bfa92012-06-02 20:29:22 +00003644 if (triop->arg2->tag != Iex_Const ||
3645 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00003646 vpanic("ARM target supports SetElem with constant "
3647 "second argument only\n");
3648 }
florian420bfa92012-06-02 20:29:22 +00003649 index = triop->arg2->Iex.Const.con->Ico.U8;
3650 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00003651 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3652 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3653 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3654 default: vassert(0);
3655 }
3656 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3657 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3658 mkARMNRS(ARMNRS_Scalar, res, index),
3659 mkARMNRS(ARMNRS_Reg, arg, 0),
3660 size, False));
3661 return res;
3662 }
3663 default:
3664 break;
3665 }
3666 }
3667
3668 /* --------- MULTIPLEX --------- */
3669 if (e->tag == Iex_Mux0X) {
3670 HReg rLo, rHi;
3671 HReg res = newVRegD(env);
3672 iselInt64Expr(&rHi, &rLo, env, e);
3673 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3674 return res;
3675 }
3676
3677 ppIRExpr(e);
3678 vpanic("iselNeon64Expr");
3679}
3680
3681static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3682{
3683 HReg r = iselNeonExpr_wrk( env, e );
3684 vassert(hregClass(r) == HRcVec128);
3685 vassert(hregIsVirtual(r));
3686 return r;
3687}
3688
3689/* DO NOT CALL THIS DIRECTLY */
3690static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3691{
3692 IRType ty = typeOfIRExpr(env->type_env, e);
3693 MatchInfo mi;
3694 vassert(e);
3695 vassert(ty == Ity_V128);
3696
3697 if (e->tag == Iex_RdTmp) {
3698 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3699 }
3700
3701 if (e->tag == Iex_Const) {
3702 /* At the moment there should be no 128-bit constants in IR for ARM
3703 generated during disassemble. They are represented as Iop_64HLtoV128
3704 binary operation and are handled among binary ops. */
3705 /* But zero can be created by valgrind internal optimizer */
3706 if (e->Iex.Const.con->Ico.V128 == 0) {
3707 HReg res = newVRegV(env);
3708 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3709 return res;
3710 }
3711 ppIRExpr(e);
3712 vpanic("128-bit constant is not implemented");
3713 }
3714
3715 if (e->tag == Iex_Load) {
3716 HReg res = newVRegV(env);
3717 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3718 vassert(ty == Ity_V128);
3719 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3720 return res;
3721 }
3722
3723 if (e->tag == Iex_Get) {
3724 HReg addr = newVRegI(env);
3725 HReg res = newVRegV(env);
3726 vassert(ty == Ity_V128);
3727 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3728 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3729 return res;
3730 }
3731
3732 if (e->tag == Iex_Unop) {
3733 switch (e->Iex.Unop.op) {
3734 case Iop_NotV128: {
3735 DECLARE_PATTERN(p_veqz_8x16);
3736 DECLARE_PATTERN(p_veqz_16x8);
3737 DECLARE_PATTERN(p_veqz_32x4);
3738 DECLARE_PATTERN(p_vcge_8sx16);
3739 DECLARE_PATTERN(p_vcge_16sx8);
3740 DECLARE_PATTERN(p_vcge_32sx4);
3741 DECLARE_PATTERN(p_vcge_8ux16);
3742 DECLARE_PATTERN(p_vcge_16ux8);
3743 DECLARE_PATTERN(p_vcge_32ux4);
3744 DEFINE_PATTERN(p_veqz_8x16,
3745 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3746 DEFINE_PATTERN(p_veqz_16x8,
3747 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3748 DEFINE_PATTERN(p_veqz_32x4,
3749 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3750 DEFINE_PATTERN(p_vcge_8sx16,
3751 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3752 DEFINE_PATTERN(p_vcge_16sx8,
3753 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3754 DEFINE_PATTERN(p_vcge_32sx4,
3755 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3756 DEFINE_PATTERN(p_vcge_8ux16,
3757 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3758 DEFINE_PATTERN(p_vcge_16ux8,
3759 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3760 DEFINE_PATTERN(p_vcge_32ux4,
3761 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3762 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3763 HReg res = newVRegV(env);
3764 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3765 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3766 return res;
3767 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3768 HReg res = newVRegV(env);
3769 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3770 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3771 return res;
3772 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3773 HReg res = newVRegV(env);
3774 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3775 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3776 return res;
3777 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3778 HReg res = newVRegV(env);
3779 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3780 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3781 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3782 res, argL, argR, 0, True));
3783 return res;
3784 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3785 HReg res = newVRegV(env);
3786 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3787 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3788 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3789 res, argL, argR, 1, True));
3790 return res;
3791 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3792 HReg res = newVRegV(env);
3793 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3794 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3795 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3796 res, argL, argR, 2, True));
3797 return res;
3798 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3799 HReg res = newVRegV(env);
3800 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3801 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3802 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3803 res, argL, argR, 0, True));
3804 return res;
3805 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3806 HReg res = newVRegV(env);
3807 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3808 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3809 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3810 res, argL, argR, 1, True));
3811 return res;
3812 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3813 HReg res = newVRegV(env);
3814 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3815 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3816 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3817 res, argL, argR, 2, True));
3818 return res;
3819 } else {
3820 HReg res = newVRegV(env);
3821 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3822 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3823 return res;
3824 }
3825 }
3826 case Iop_Dup8x16:
3827 case Iop_Dup16x8:
3828 case Iop_Dup32x4: {
3829 HReg res, arg;
3830 UInt size;
3831 DECLARE_PATTERN(p_vdup_8x16);
3832 DECLARE_PATTERN(p_vdup_16x8);
3833 DECLARE_PATTERN(p_vdup_32x4);
3834 DEFINE_PATTERN(p_vdup_8x16,
3835 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3836 DEFINE_PATTERN(p_vdup_16x8,
3837 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3838 DEFINE_PATTERN(p_vdup_32x4,
3839 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3840 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3841 UInt index;
3842 UInt imm4;
3843 if (mi.bindee[1]->tag == Iex_Const &&
3844 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3845 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3846 imm4 = (index << 1) + 1;
3847 if (index < 8) {
3848 res = newVRegV(env);
3849 arg = iselNeon64Expr(env, mi.bindee[0]);
3850 addInstr(env, ARMInstr_NUnaryS(
3851 ARMneon_VDUP,
3852 mkARMNRS(ARMNRS_Reg, res, 0),
3853 mkARMNRS(ARMNRS_Scalar, arg, index),
3854 imm4, True
3855 ));
3856 return res;
3857 }
3858 }
3859 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3860 UInt index;
3861 UInt imm4;
3862 if (mi.bindee[1]->tag == Iex_Const &&
3863 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3864 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3865 imm4 = (index << 2) + 2;
3866 if (index < 4) {
3867 res = newVRegV(env);
3868 arg = iselNeon64Expr(env, mi.bindee[0]);
3869 addInstr(env, ARMInstr_NUnaryS(
3870 ARMneon_VDUP,
3871 mkARMNRS(ARMNRS_Reg, res, 0),
3872 mkARMNRS(ARMNRS_Scalar, arg, index),
3873 imm4, True
3874 ));
3875 return res;
3876 }
3877 }
3878 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3879 UInt index;
3880 UInt imm4;
3881 if (mi.bindee[1]->tag == Iex_Const &&
3882 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3883 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3884 imm4 = (index << 3) + 4;
3885 if (index < 2) {
3886 res = newVRegV(env);
3887 arg = iselNeon64Expr(env, mi.bindee[0]);
3888 addInstr(env, ARMInstr_NUnaryS(
3889 ARMneon_VDUP,
3890 mkARMNRS(ARMNRS_Reg, res, 0),
3891 mkARMNRS(ARMNRS_Scalar, arg, index),
3892 imm4, True
3893 ));
3894 return res;
3895 }
3896 }
3897 }
3898 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3899 res = newVRegV(env);
3900 switch (e->Iex.Unop.op) {
3901 case Iop_Dup8x16: size = 0; break;
3902 case Iop_Dup16x8: size = 1; break;
3903 case Iop_Dup32x4: size = 2; break;
3904 default: vassert(0);
3905 }
3906 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3907 return res;
3908 }
3909 case Iop_Abs8x16:
3910 case Iop_Abs16x8:
3911 case Iop_Abs32x4: {
3912 HReg res = newVRegV(env);
3913 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3914 UInt size = 0;
3915 switch(e->Iex.Binop.op) {
3916 case Iop_Abs8x16: size = 0; break;
3917 case Iop_Abs16x8: size = 1; break;
3918 case Iop_Abs32x4: size = 2; break;
3919 default: vassert(0);
3920 }
3921 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3922 return res;
3923 }
3924 case Iop_Reverse64_8x16:
3925 case Iop_Reverse64_16x8:
3926 case Iop_Reverse64_32x4: {
3927 HReg res = newVRegV(env);
3928 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3929 UInt size = 0;
3930 switch(e->Iex.Binop.op) {
3931 case Iop_Reverse64_8x16: size = 0; break;
3932 case Iop_Reverse64_16x8: size = 1; break;
3933 case Iop_Reverse64_32x4: size = 2; break;
3934 default: vassert(0);
3935 }
3936 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3937 res, arg, size, True));
3938 return res;
3939 }
3940 case Iop_Reverse32_8x16:
3941 case Iop_Reverse32_16x8: {
3942 HReg res = newVRegV(env);
3943 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3944 UInt size = 0;
3945 switch(e->Iex.Binop.op) {
3946 case Iop_Reverse32_8x16: size = 0; break;
3947 case Iop_Reverse32_16x8: size = 1; break;
3948 default: vassert(0);
3949 }
3950 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3951 res, arg, size, True));
3952 return res;
3953 }
3954 case Iop_Reverse16_8x16: {
3955 HReg res = newVRegV(env);
3956 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3957 UInt size = 0;
3958 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3959 res, arg, size, True));
3960 return res;
3961 }
3962 case Iop_CmpNEZ64x2: {
3963 HReg x_lsh = newVRegV(env);
3964 HReg x_rsh = newVRegV(env);
3965 HReg lsh_amt = newVRegV(env);
3966 HReg rsh_amt = newVRegV(env);
3967 HReg zero = newVRegV(env);
3968 HReg tmp = newVRegV(env);
3969 HReg tmp2 = newVRegV(env);
3970 HReg res = newVRegV(env);
3971 HReg x = newVRegV(env);
3972 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3973 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3974 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3975 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3976 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3977 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3978 rsh_amt, zero, lsh_amt, 2, True));
3979 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3980 x_lsh, x, lsh_amt, 3, True));
3981 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3982 x_rsh, x, rsh_amt, 3, True));
3983 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3984 tmp, x_lsh, x_rsh, 0, True));
3985 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3986 res, tmp, x, 0, True));
3987 return res;
3988 }
3989 case Iop_CmpNEZ8x16:
3990 case Iop_CmpNEZ16x8:
3991 case Iop_CmpNEZ32x4: {
3992 HReg res = newVRegV(env);
3993 HReg tmp = newVRegV(env);
3994 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3995 UInt size;
3996 switch (e->Iex.Unop.op) {
3997 case Iop_CmpNEZ8x16: size = 0; break;
3998 case Iop_CmpNEZ16x8: size = 1; break;
3999 case Iop_CmpNEZ32x4: size = 2; break;
4000 default: vassert(0);
4001 }
4002 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4003 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4004 return res;
4005 }
sewardj5f438dd2011-06-16 11:36:23 +00004006 case Iop_Widen8Uto16x8:
4007 case Iop_Widen16Uto32x4:
4008 case Iop_Widen32Uto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00004009 HReg res = newVRegV(env);
4010 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4011 UInt size;
4012 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00004013 case Iop_Widen8Uto16x8: size = 0; break;
4014 case Iop_Widen16Uto32x4: size = 1; break;
4015 case Iop_Widen32Uto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004016 default: vassert(0);
4017 }
4018 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4019 res, arg, size, True));
4020 return res;
4021 }
sewardj5f438dd2011-06-16 11:36:23 +00004022 case Iop_Widen8Sto16x8:
4023 case Iop_Widen16Sto32x4:
4024 case Iop_Widen32Sto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00004025 HReg res = newVRegV(env);
4026 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4027 UInt size;
4028 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00004029 case Iop_Widen8Sto16x8: size = 0; break;
4030 case Iop_Widen16Sto32x4: size = 1; break;
4031 case Iop_Widen32Sto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004032 default: vassert(0);
4033 }
4034 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4035 res, arg, size, True));
4036 return res;
4037 }
4038 case Iop_PwAddL8Sx16:
4039 case Iop_PwAddL16Sx8:
4040 case Iop_PwAddL32Sx4: {
4041 HReg res = newVRegV(env);
4042 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4043 UInt size = 0;
4044 switch(e->Iex.Binop.op) {
4045 case Iop_PwAddL8Sx16: size = 0; break;
4046 case Iop_PwAddL16Sx8: size = 1; break;
4047 case Iop_PwAddL32Sx4: size = 2; break;
4048 default: vassert(0);
4049 }
4050 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4051 res, arg, size, True));
4052 return res;
4053 }
4054 case Iop_PwAddL8Ux16:
4055 case Iop_PwAddL16Ux8:
4056 case Iop_PwAddL32Ux4: {
4057 HReg res = newVRegV(env);
4058 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4059 UInt size = 0;
4060 switch(e->Iex.Binop.op) {
4061 case Iop_PwAddL8Ux16: size = 0; break;
4062 case Iop_PwAddL16Ux8: size = 1; break;
4063 case Iop_PwAddL32Ux4: size = 2; break;
4064 default: vassert(0);
4065 }
4066 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4067 res, arg, size, True));
4068 return res;
4069 }
4070 case Iop_Cnt8x16: {
4071 HReg res = newVRegV(env);
4072 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4073 UInt size = 0;
4074 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4075 return res;
4076 }
4077 case Iop_Clz8Sx16:
4078 case Iop_Clz16Sx8:
4079 case Iop_Clz32Sx4: {
4080 HReg res = newVRegV(env);
4081 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4082 UInt size = 0;
4083 switch(e->Iex.Binop.op) {
4084 case Iop_Clz8Sx16: size = 0; break;
4085 case Iop_Clz16Sx8: size = 1; break;
4086 case Iop_Clz32Sx4: size = 2; break;
4087 default: vassert(0);
4088 }
4089 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4090 return res;
4091 }
4092 case Iop_Cls8Sx16:
4093 case Iop_Cls16Sx8:
4094 case Iop_Cls32Sx4: {
4095 HReg res = newVRegV(env);
4096 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4097 UInt size = 0;
4098 switch(e->Iex.Binop.op) {
4099 case Iop_Cls8Sx16: size = 0; break;
4100 case Iop_Cls16Sx8: size = 1; break;
4101 case Iop_Cls32Sx4: size = 2; break;
4102 default: vassert(0);
4103 }
4104 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4105 return res;
4106 }
4107 case Iop_FtoI32Sx4_RZ: {
4108 HReg res = newVRegV(env);
4109 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4110 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4111 res, arg, 2, True));
4112 return res;
4113 }
4114 case Iop_FtoI32Ux4_RZ: {
4115 HReg res = newVRegV(env);
4116 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4117 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4118 res, arg, 2, True));
4119 return res;
4120 }
4121 case Iop_I32StoFx4: {
4122 HReg res = newVRegV(env);
4123 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4124 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4125 res, arg, 2, True));
4126 return res;
4127 }
4128 case Iop_I32UtoFx4: {
4129 HReg res = newVRegV(env);
4130 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4131 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4132 res, arg, 2, True));
4133 return res;
4134 }
4135 case Iop_F16toF32x4: {
4136 HReg res = newVRegV(env);
4137 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4138 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4139 res, arg, 2, True));
4140 return res;
4141 }
4142 case Iop_Recip32Fx4: {
4143 HReg res = newVRegV(env);
4144 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4145 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4146 res, argL, 0, True));
4147 return res;
4148 }
4149 case Iop_Recip32x4: {
4150 HReg res = newVRegV(env);
4151 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4152 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4153 res, argL, 0, True));
4154 return res;
4155 }
4156 case Iop_Abs32Fx4: {
4157 DECLARE_PATTERN(p_vabd_32fx4);
4158 DEFINE_PATTERN(p_vabd_32fx4,
4159 unop(Iop_Abs32Fx4,
4160 binop(Iop_Sub32Fx4,
4161 bind(0),
4162 bind(1))));
4163 if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4164 HReg res = newVRegV(env);
4165 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4166 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4167 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4168 res, argL, argR, 0, True));
4169 return res;
4170 } else {
4171 HReg res = newVRegV(env);
4172 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4173 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4174 res, argL, 0, True));
4175 return res;
4176 }
4177 }
4178 case Iop_Rsqrte32Fx4: {
4179 HReg res = newVRegV(env);
4180 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4181 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4182 res, argL, 0, True));
4183 return res;
4184 }
4185 case Iop_Rsqrte32x4: {
4186 HReg res = newVRegV(env);
4187 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4188 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4189 res, argL, 0, True));
4190 return res;
4191 }
4192 case Iop_Neg32Fx4: {
4193 HReg res = newVRegV(env);
4194 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4195 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4196 res, arg, 0, True));
4197 return res;
4198 }
4199 /* ... */
4200 default:
4201 break;
4202 }
4203 }
4204
4205 if (e->tag == Iex_Binop) {
4206 switch (e->Iex.Binop.op) {
4207 case Iop_64HLtoV128:
4208 /* Try to match into single "VMOV reg, imm" instruction */
4209 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4210 e->Iex.Binop.arg2->tag == Iex_Const &&
4211 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4212 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4213 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4214 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4215 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4216 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4217 if (imm) {
4218 HReg res = newVRegV(env);
4219 addInstr(env, ARMInstr_NeonImm(res, imm));
4220 return res;
4221 }
4222 if ((imm64 >> 32) == 0LL &&
4223 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4224 HReg tmp1 = newVRegV(env);
4225 HReg tmp2 = newVRegV(env);
4226 HReg res = newVRegV(env);
4227 if (imm->type < 10) {
4228 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4229 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4230 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4231 res, tmp1, tmp2, 4, True));
4232 return res;
4233 }
4234 }
4235 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4236 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4237 HReg tmp1 = newVRegV(env);
4238 HReg tmp2 = newVRegV(env);
4239 HReg res = newVRegV(env);
4240 if (imm->type < 10) {
4241 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4242 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4243 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4244 res, tmp1, tmp2, 4, True));
4245 return res;
4246 }
4247 }
4248 }
sewardj6828dc72011-09-30 08:49:02 +00004249 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4250 it the slow way. */
4251 {
4252 /* local scope */
4253 /* Done via the stack for ease of use. */
4254 /* FIXME: assumes little endian host */
4255 HReg w3, w2, w1, w0;
4256 HReg res = newVRegV(env);
4257 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
4258 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
4259 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
4260 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4261 ARMRI84* c_16 = ARMRI84_I84(16,0);
4262 /* Make space for SP */
4263 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4264 hregARM_R13(), c_16));
4265
4266 /* Store the less significant 64 bits */
4267 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
sewardjcfe046e2013-01-17 14:23:53 +00004268 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4269 w0, sp_0));
4270 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4271 w1, sp_4));
sewardj6828dc72011-09-30 08:49:02 +00004272
4273 /* Store the more significant 64 bits */
4274 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
sewardjcfe046e2013-01-17 14:23:53 +00004275 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4276 w2, sp_8));
4277 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4278 w3, sp_12));
sewardj6828dc72011-09-30 08:49:02 +00004279
4280 /* Load result back from stack. */
4281 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4282 mkARMAModeN_R(hregARM_R13())));
4283
4284 /* Restore SP */
4285 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4286 hregARM_R13(), c_16));
4287 return res;
4288 } /* local scope */
sewardj6c60b322010-08-22 12:48:28 +00004289 goto neon_expr_bad;
4290 case Iop_AndV128: {
4291 HReg res = newVRegV(env);
4292 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4293 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4294 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4295 res, argL, argR, 4, True));
4296 return res;
4297 }
4298 case Iop_OrV128: {
4299 HReg res = newVRegV(env);
4300 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4301 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4302 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4303 res, argL, argR, 4, True));
4304 return res;
4305 }
4306 case Iop_XorV128: {
4307 HReg res = newVRegV(env);
4308 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4309 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4310 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4311 res, argL, argR, 4, True));
4312 return res;
4313 }
4314 case Iop_Add8x16:
4315 case Iop_Add16x8:
4316 case Iop_Add32x4:
4317 case Iop_Add64x2: {
4318 /*
4319 FIXME: remove this if not used
4320 DECLARE_PATTERN(p_vrhadd_32sx4);
4321 ULong one = (1LL << 32) | 1LL;
4322 DEFINE_PATTERN(p_vrhadd_32sx4,
4323 binop(Iop_Add32x4,
4324 binop(Iop_Add32x4,
4325 binop(Iop_SarN32x4,
4326 bind(0),
4327 mkU8(1)),
4328 binop(Iop_SarN32x4,
4329 bind(1),
4330 mkU8(1))),
4331 binop(Iop_SarN32x4,
4332 binop(Iop_Add32x4,
4333 binop(Iop_Add32x4,
4334 binop(Iop_AndV128,
4335 bind(0),
4336 mkU128(one)),
4337 binop(Iop_AndV128,
4338 bind(1),
4339 mkU128(one))),
4340 mkU128(one)),
4341 mkU8(1))));
4342 */
4343 HReg res = newVRegV(env);
4344 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4345 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4346 UInt size;
4347 switch (e->Iex.Binop.op) {
4348 case Iop_Add8x16: size = 0; break;
4349 case Iop_Add16x8: size = 1; break;
4350 case Iop_Add32x4: size = 2; break;
4351 case Iop_Add64x2: size = 3; break;
4352 default:
4353 ppIROp(e->Iex.Binop.op);
4354 vpanic("Illegal element size in VADD");
4355 }
4356 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4357 res, argL, argR, size, True));
4358 return res;
4359 }
4360 case Iop_Add32Fx4: {
4361 HReg res = newVRegV(env);
4362 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4363 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4364 UInt size = 0;
4365 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4366 res, argL, argR, size, True));
4367 return res;
4368 }
4369 case Iop_Recps32Fx4: {
4370 HReg res = newVRegV(env);
4371 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4372 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4373 UInt size = 0;
4374 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4375 res, argL, argR, size, True));
4376 return res;
4377 }
4378 case Iop_Rsqrts32Fx4: {
4379 HReg res = newVRegV(env);
4380 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4381 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4382 UInt size = 0;
4383 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4384 res, argL, argR, size, True));
4385 return res;
4386 }
4387 case Iop_InterleaveEvenLanes8x16:
4388 case Iop_InterleaveEvenLanes16x8:
4389 case Iop_InterleaveEvenLanes32x4:
4390 case Iop_InterleaveOddLanes8x16:
4391 case Iop_InterleaveOddLanes16x8:
4392 case Iop_InterleaveOddLanes32x4: {
4393 HReg tmp = newVRegV(env);
4394 HReg res = newVRegV(env);
4395 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4396 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4397 UInt size;
4398 UInt is_lo;
4399 switch (e->Iex.Binop.op) {
4400 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4401 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4402 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4403 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4404 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4405 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4406 default:
4407 ppIROp(e->Iex.Binop.op);
4408 vpanic("Illegal element size in VTRN");
4409 }
4410 if (is_lo) {
4411 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4412 tmp, argL, 4, True));
4413 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4414 res, argR, 4, True));
4415 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4416 res, tmp, size, True));
4417 } else {
4418 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4419 tmp, argR, 4, True));
4420 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4421 res, argL, 4, True));
4422 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4423 tmp, res, size, True));
4424 }
4425 return res;
4426 }
4427 case Iop_InterleaveHI8x16:
4428 case Iop_InterleaveHI16x8:
4429 case Iop_InterleaveHI32x4:
4430 case Iop_InterleaveLO8x16:
4431 case Iop_InterleaveLO16x8:
4432 case Iop_InterleaveLO32x4: {
4433 HReg tmp = newVRegV(env);
4434 HReg res = newVRegV(env);
4435 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4436 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4437 UInt size;
4438 UInt is_lo;
4439 switch (e->Iex.Binop.op) {
4440 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4441 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4442 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4443 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4444 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4445 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4446 default:
4447 ppIROp(e->Iex.Binop.op);
4448 vpanic("Illegal element size in VZIP");
4449 }
4450 if (is_lo) {
4451 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4452 tmp, argL, 4, True));
4453 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4454 res, argR, 4, True));
4455 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4456 res, tmp, size, True));
4457 } else {
4458 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4459 tmp, argR, 4, True));
4460 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4461 res, argL, 4, True));
4462 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4463 tmp, res, size, True));
4464 }
4465 return res;
4466 }
4467 case Iop_CatOddLanes8x16:
4468 case Iop_CatOddLanes16x8:
4469 case Iop_CatOddLanes32x4:
4470 case Iop_CatEvenLanes8x16:
4471 case Iop_CatEvenLanes16x8:
4472 case Iop_CatEvenLanes32x4: {
4473 HReg tmp = newVRegV(env);
4474 HReg res = newVRegV(env);
4475 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4476 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4477 UInt size;
4478 UInt is_lo;
4479 switch (e->Iex.Binop.op) {
4480 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4481 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4482 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4483 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4484 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4485 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4486 default:
4487 ppIROp(e->Iex.Binop.op);
4488 vpanic("Illegal element size in VUZP");
4489 }
4490 if (is_lo) {
4491 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4492 tmp, argL, 4, True));
4493 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4494 res, argR, 4, True));
4495 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4496 res, tmp, size, True));
4497 } else {
4498 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4499 tmp, argR, 4, True));
4500 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4501 res, argL, 4, True));
4502 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4503 tmp, res, size, True));
4504 }
4505 return res;
4506 }
4507 case Iop_QAdd8Ux16:
4508 case Iop_QAdd16Ux8:
4509 case Iop_QAdd32Ux4:
4510 case Iop_QAdd64Ux2: {
4511 HReg res = newVRegV(env);
4512 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4513 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4514 UInt size;
4515 switch (e->Iex.Binop.op) {
4516 case Iop_QAdd8Ux16: size = 0; break;
4517 case Iop_QAdd16Ux8: size = 1; break;
4518 case Iop_QAdd32Ux4: size = 2; break;
4519 case Iop_QAdd64Ux2: size = 3; break;
4520 default:
4521 ppIROp(e->Iex.Binop.op);
4522 vpanic("Illegal element size in VQADDU");
4523 }
4524 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4525 res, argL, argR, size, True));
4526 return res;
4527 }
4528 case Iop_QAdd8Sx16:
4529 case Iop_QAdd16Sx8:
4530 case Iop_QAdd32Sx4:
4531 case Iop_QAdd64Sx2: {
4532 HReg res = newVRegV(env);
4533 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4534 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4535 UInt size;
4536 switch (e->Iex.Binop.op) {
4537 case Iop_QAdd8Sx16: size = 0; break;
4538 case Iop_QAdd16Sx8: size = 1; break;
4539 case Iop_QAdd32Sx4: size = 2; break;
4540 case Iop_QAdd64Sx2: size = 3; break;
4541 default:
4542 ppIROp(e->Iex.Binop.op);
4543 vpanic("Illegal element size in VQADDS");
4544 }
4545 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4546 res, argL, argR, size, True));
4547 return res;
4548 }
4549 case Iop_Sub8x16:
4550 case Iop_Sub16x8:
4551 case Iop_Sub32x4:
4552 case Iop_Sub64x2: {
4553 HReg res = newVRegV(env);
4554 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4555 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4556 UInt size;
4557 switch (e->Iex.Binop.op) {
4558 case Iop_Sub8x16: size = 0; break;
4559 case Iop_Sub16x8: size = 1; break;
4560 case Iop_Sub32x4: size = 2; break;
4561 case Iop_Sub64x2: size = 3; break;
4562 default:
4563 ppIROp(e->Iex.Binop.op);
4564 vpanic("Illegal element size in VSUB");
4565 }
4566 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4567 res, argL, argR, size, True));
4568 return res;
4569 }
4570 case Iop_Sub32Fx4: {
4571 HReg res = newVRegV(env);
4572 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4573 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4574 UInt size = 0;
4575 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4576 res, argL, argR, size, True));
4577 return res;
4578 }
4579 case Iop_QSub8Ux16:
4580 case Iop_QSub16Ux8:
4581 case Iop_QSub32Ux4:
4582 case Iop_QSub64Ux2: {
4583 HReg res = newVRegV(env);
4584 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4585 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4586 UInt size;
4587 switch (e->Iex.Binop.op) {
4588 case Iop_QSub8Ux16: size = 0; break;
4589 case Iop_QSub16Ux8: size = 1; break;
4590 case Iop_QSub32Ux4: size = 2; break;
4591 case Iop_QSub64Ux2: size = 3; break;
4592 default:
4593 ppIROp(e->Iex.Binop.op);
4594 vpanic("Illegal element size in VQSUBU");
4595 }
4596 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4597 res, argL, argR, size, True));
4598 return res;
4599 }
4600 case Iop_QSub8Sx16:
4601 case Iop_QSub16Sx8:
4602 case Iop_QSub32Sx4:
4603 case Iop_QSub64Sx2: {
4604 HReg res = newVRegV(env);
4605 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4606 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4607 UInt size;
4608 switch (e->Iex.Binop.op) {
4609 case Iop_QSub8Sx16: size = 0; break;
4610 case Iop_QSub16Sx8: size = 1; break;
4611 case Iop_QSub32Sx4: size = 2; break;
4612 case Iop_QSub64Sx2: size = 3; break;
4613 default:
4614 ppIROp(e->Iex.Binop.op);
4615 vpanic("Illegal element size in VQSUBS");
4616 }
4617 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4618 res, argL, argR, size, True));
4619 return res;
4620 }
4621 case Iop_Max8Ux16:
4622 case Iop_Max16Ux8:
4623 case Iop_Max32Ux4: {
4624 HReg res = newVRegV(env);
4625 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4626 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4627 UInt size;
4628 switch (e->Iex.Binop.op) {
4629 case Iop_Max8Ux16: size = 0; break;
4630 case Iop_Max16Ux8: size = 1; break;
4631 case Iop_Max32Ux4: size = 2; break;
4632 default: vpanic("Illegal element size in VMAXU");
4633 }
4634 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4635 res, argL, argR, size, True));
4636 return res;
4637 }
4638 case Iop_Max8Sx16:
4639 case Iop_Max16Sx8:
4640 case Iop_Max32Sx4: {
4641 HReg res = newVRegV(env);
4642 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4643 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4644 UInt size;
4645 switch (e->Iex.Binop.op) {
4646 case Iop_Max8Sx16: size = 0; break;
4647 case Iop_Max16Sx8: size = 1; break;
4648 case Iop_Max32Sx4: size = 2; break;
4649 default: vpanic("Illegal element size in VMAXU");
4650 }
4651 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4652 res, argL, argR, size, True));
4653 return res;
4654 }
4655 case Iop_Min8Ux16:
4656 case Iop_Min16Ux8:
4657 case Iop_Min32Ux4: {
4658 HReg res = newVRegV(env);
4659 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4660 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4661 UInt size;
4662 switch (e->Iex.Binop.op) {
4663 case Iop_Min8Ux16: size = 0; break;
4664 case Iop_Min16Ux8: size = 1; break;
4665 case Iop_Min32Ux4: size = 2; break;
4666 default: vpanic("Illegal element size in VMAXU");
4667 }
4668 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4669 res, argL, argR, size, True));
4670 return res;
4671 }
4672 case Iop_Min8Sx16:
4673 case Iop_Min16Sx8:
4674 case Iop_Min32Sx4: {
4675 HReg res = newVRegV(env);
4676 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4677 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4678 UInt size;
4679 switch (e->Iex.Binop.op) {
4680 case Iop_Min8Sx16: size = 0; break;
4681 case Iop_Min16Sx8: size = 1; break;
4682 case Iop_Min32Sx4: size = 2; break;
4683 default: vpanic("Illegal element size in VMAXU");
4684 }
4685 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4686 res, argL, argR, size, True));
4687 return res;
4688 }
4689 case Iop_Sar8x16:
4690 case Iop_Sar16x8:
4691 case Iop_Sar32x4:
4692 case Iop_Sar64x2: {
4693 HReg res = newVRegV(env);
4694 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4695 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4696 HReg argR2 = newVRegV(env);
4697 HReg zero = newVRegV(env);
4698 UInt size;
4699 switch (e->Iex.Binop.op) {
4700 case Iop_Sar8x16: size = 0; break;
4701 case Iop_Sar16x8: size = 1; break;
4702 case Iop_Sar32x4: size = 2; break;
4703 case Iop_Sar64x2: size = 3; break;
4704 default: vassert(0);
4705 }
4706 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4707 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4708 argR2, zero, argR, size, True));
4709 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4710 res, argL, argR2, size, True));
4711 return res;
4712 }
4713 case Iop_Sal8x16:
4714 case Iop_Sal16x8:
4715 case Iop_Sal32x4:
4716 case Iop_Sal64x2: {
4717 HReg res = newVRegV(env);
4718 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4719 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4720 UInt size;
4721 switch (e->Iex.Binop.op) {
4722 case Iop_Sal8x16: size = 0; break;
4723 case Iop_Sal16x8: size = 1; break;
4724 case Iop_Sal32x4: size = 2; break;
4725 case Iop_Sal64x2: size = 3; break;
4726 default: vassert(0);
4727 }
4728 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4729 res, argL, argR, size, True));
4730 return res;
4731 }
4732 case Iop_Shr8x16:
4733 case Iop_Shr16x8:
4734 case Iop_Shr32x4:
4735 case Iop_Shr64x2: {
4736 HReg res = newVRegV(env);
4737 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4738 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4739 HReg argR2 = newVRegV(env);
4740 HReg zero = newVRegV(env);
4741 UInt size;
4742 switch (e->Iex.Binop.op) {
4743 case Iop_Shr8x16: size = 0; break;
4744 case Iop_Shr16x8: size = 1; break;
4745 case Iop_Shr32x4: size = 2; break;
4746 case Iop_Shr64x2: size = 3; break;
4747 default: vassert(0);
4748 }
4749 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4750 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4751 argR2, zero, argR, size, True));
4752 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4753 res, argL, argR2, size, True));
4754 return res;
4755 }
4756 case Iop_Shl8x16:
4757 case Iop_Shl16x8:
4758 case Iop_Shl32x4:
4759 case Iop_Shl64x2: {
4760 HReg res = newVRegV(env);
4761 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4762 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4763 UInt size;
4764 switch (e->Iex.Binop.op) {
4765 case Iop_Shl8x16: size = 0; break;
4766 case Iop_Shl16x8: size = 1; break;
4767 case Iop_Shl32x4: size = 2; break;
4768 case Iop_Shl64x2: size = 3; break;
4769 default: vassert(0);
4770 }
4771 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4772 res, argL, argR, size, True));
4773 return res;
4774 }
4775 case Iop_QShl8x16:
4776 case Iop_QShl16x8:
4777 case Iop_QShl32x4:
4778 case Iop_QShl64x2: {
4779 HReg res = newVRegV(env);
4780 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4781 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4782 UInt size;
4783 switch (e->Iex.Binop.op) {
4784 case Iop_QShl8x16: size = 0; break;
4785 case Iop_QShl16x8: size = 1; break;
4786 case Iop_QShl32x4: size = 2; break;
4787 case Iop_QShl64x2: size = 3; break;
4788 default: vassert(0);
4789 }
4790 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4791 res, argL, argR, size, True));
4792 return res;
4793 }
4794 case Iop_QSal8x16:
4795 case Iop_QSal16x8:
4796 case Iop_QSal32x4:
4797 case Iop_QSal64x2: {
4798 HReg res = newVRegV(env);
4799 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4800 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4801 UInt size;
4802 switch (e->Iex.Binop.op) {
4803 case Iop_QSal8x16: size = 0; break;
4804 case Iop_QSal16x8: size = 1; break;
4805 case Iop_QSal32x4: size = 2; break;
4806 case Iop_QSal64x2: size = 3; break;
4807 default: vassert(0);
4808 }
4809 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4810 res, argL, argR, size, True));
4811 return res;
4812 }
4813 case Iop_QShlN8x16:
4814 case Iop_QShlN16x8:
4815 case Iop_QShlN32x4:
4816 case Iop_QShlN64x2: {
4817 HReg res = newVRegV(env);
4818 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4819 UInt size, imm;
4820 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4821 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4822 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4823 "second argument only\n");
4824 }
4825 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4826 switch (e->Iex.Binop.op) {
4827 case Iop_QShlN8x16: size = 8 | imm; break;
4828 case Iop_QShlN16x8: size = 16 | imm; break;
4829 case Iop_QShlN32x4: size = 32 | imm; break;
4830 case Iop_QShlN64x2: size = 64 | imm; break;
4831 default: vassert(0);
4832 }
4833 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4834 res, argL, size, True));
4835 return res;
4836 }
4837 case Iop_QShlN8Sx16:
4838 case Iop_QShlN16Sx8:
4839 case Iop_QShlN32Sx4:
4840 case Iop_QShlN64Sx2: {
4841 HReg res = newVRegV(env);
4842 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4843 UInt size, imm;
4844 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4845 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4846 vpanic("ARM taget supports Iop_QShlNASxB with constant "
4847 "second argument only\n");
4848 }
4849 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4850 switch (e->Iex.Binop.op) {
4851 case Iop_QShlN8Sx16: size = 8 | imm; break;
4852 case Iop_QShlN16Sx8: size = 16 | imm; break;
4853 case Iop_QShlN32Sx4: size = 32 | imm; break;
4854 case Iop_QShlN64Sx2: size = 64 | imm; break;
4855 default: vassert(0);
4856 }
4857 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4858 res, argL, size, True));
4859 return res;
4860 }
4861 case Iop_QSalN8x16:
4862 case Iop_QSalN16x8:
4863 case Iop_QSalN32x4:
4864 case Iop_QSalN64x2: {
4865 HReg res = newVRegV(env);
4866 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4867 UInt size, imm;
4868 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4869 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4870 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4871 "second argument only\n");
4872 }
4873 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4874 switch (e->Iex.Binop.op) {
4875 case Iop_QSalN8x16: size = 8 | imm; break;
4876 case Iop_QSalN16x8: size = 16 | imm; break;
4877 case Iop_QSalN32x4: size = 32 | imm; break;
4878 case Iop_QSalN64x2: size = 64 | imm; break;
4879 default: vassert(0);
4880 }
4881 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4882 res, argL, size, True));
4883 return res;
4884 }
4885 case Iop_ShrN8x16:
4886 case Iop_ShrN16x8:
4887 case Iop_ShrN32x4:
4888 case Iop_ShrN64x2: {
4889 HReg res = newVRegV(env);
4890 HReg tmp = newVRegV(env);
4891 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4892 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4893 HReg argR2 = newVRegI(env);
4894 UInt size;
4895 switch (e->Iex.Binop.op) {
4896 case Iop_ShrN8x16: size = 0; break;
4897 case Iop_ShrN16x8: size = 1; break;
4898 case Iop_ShrN32x4: size = 2; break;
4899 case Iop_ShrN64x2: size = 3; break;
4900 default: vassert(0);
4901 }
4902 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4903 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4904 tmp, argR2, 0, True));
4905 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4906 res, argL, tmp, size, True));
4907 return res;
4908 }
4909 case Iop_ShlN8x16:
4910 case Iop_ShlN16x8:
4911 case Iop_ShlN32x4:
4912 case Iop_ShlN64x2: {
4913 HReg res = newVRegV(env);
4914 HReg tmp = newVRegV(env);
4915 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4916 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4917 UInt size;
4918 switch (e->Iex.Binop.op) {
4919 case Iop_ShlN8x16: size = 0; break;
4920 case Iop_ShlN16x8: size = 1; break;
4921 case Iop_ShlN32x4: size = 2; break;
4922 case Iop_ShlN64x2: size = 3; break;
4923 default: vassert(0);
4924 }
4925 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4926 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4927 res, argL, tmp, size, True));
4928 return res;
4929 }
4930 case Iop_SarN8x16:
4931 case Iop_SarN16x8:
4932 case Iop_SarN32x4:
4933 case Iop_SarN64x2: {
4934 HReg res = newVRegV(env);
4935 HReg tmp = newVRegV(env);
4936 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4937 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4938 HReg argR2 = newVRegI(env);
4939 UInt size;
4940 switch (e->Iex.Binop.op) {
4941 case Iop_SarN8x16: size = 0; break;
4942 case Iop_SarN16x8: size = 1; break;
4943 case Iop_SarN32x4: size = 2; break;
4944 case Iop_SarN64x2: size = 3; break;
4945 default: vassert(0);
4946 }
4947 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4948 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4949 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4950 res, argL, tmp, size, True));
4951 return res;
4952 }
4953 case Iop_CmpGT8Ux16:
4954 case Iop_CmpGT16Ux8:
4955 case Iop_CmpGT32Ux4: {
4956 HReg res = newVRegV(env);
4957 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4958 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4959 UInt size;
4960 switch (e->Iex.Binop.op) {
4961 case Iop_CmpGT8Ux16: size = 0; break;
4962 case Iop_CmpGT16Ux8: size = 1; break;
4963 case Iop_CmpGT32Ux4: size = 2; break;
4964 default: vassert(0);
4965 }
4966 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4967 res, argL, argR, size, True));
4968 return res;
4969 }
4970 case Iop_CmpGT8Sx16:
4971 case Iop_CmpGT16Sx8:
4972 case Iop_CmpGT32Sx4: {
4973 HReg res = newVRegV(env);
4974 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4975 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4976 UInt size;
4977 switch (e->Iex.Binop.op) {
4978 case Iop_CmpGT8Sx16: size = 0; break;
4979 case Iop_CmpGT16Sx8: size = 1; break;
4980 case Iop_CmpGT32Sx4: size = 2; break;
4981 default: vassert(0);
4982 }
4983 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4984 res, argL, argR, size, True));
4985 return res;
4986 }
4987 case Iop_CmpEQ8x16:
4988 case Iop_CmpEQ16x8:
4989 case Iop_CmpEQ32x4: {
4990 HReg res = newVRegV(env);
4991 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4992 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4993 UInt size;
4994 switch (e->Iex.Binop.op) {
4995 case Iop_CmpEQ8x16: size = 0; break;
4996 case Iop_CmpEQ16x8: size = 1; break;
4997 case Iop_CmpEQ32x4: size = 2; break;
4998 default: vassert(0);
4999 }
5000 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5001 res, argL, argR, size, True));
5002 return res;
5003 }
5004 case Iop_Mul8x16:
5005 case Iop_Mul16x8:
5006 case Iop_Mul32x4: {
5007 HReg res = newVRegV(env);
5008 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5009 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5010 UInt size = 0;
5011 switch(e->Iex.Binop.op) {
5012 case Iop_Mul8x16: size = 0; break;
5013 case Iop_Mul16x8: size = 1; break;
5014 case Iop_Mul32x4: size = 2; break;
5015 default: vassert(0);
5016 }
5017 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5018 res, argL, argR, size, True));
5019 return res;
5020 }
5021 case Iop_Mul32Fx4: {
5022 HReg res = newVRegV(env);
5023 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5024 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5025 UInt size = 0;
5026 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5027 res, argL, argR, size, True));
5028 return res;
5029 }
5030 case Iop_Mull8Ux8:
5031 case Iop_Mull16Ux4:
5032 case Iop_Mull32Ux2: {
5033 HReg res = newVRegV(env);
5034 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5035 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5036 UInt size = 0;
5037 switch(e->Iex.Binop.op) {
5038 case Iop_Mull8Ux8: size = 0; break;
5039 case Iop_Mull16Ux4: size = 1; break;
5040 case Iop_Mull32Ux2: size = 2; break;
5041 default: vassert(0);
5042 }
5043 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5044 res, argL, argR, size, True));
5045 return res;
5046 }
5047
5048 case Iop_Mull8Sx8:
5049 case Iop_Mull16Sx4:
5050 case Iop_Mull32Sx2: {
5051 HReg res = newVRegV(env);
5052 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5053 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5054 UInt size = 0;
5055 switch(e->Iex.Binop.op) {
5056 case Iop_Mull8Sx8: size = 0; break;
5057 case Iop_Mull16Sx4: size = 1; break;
5058 case Iop_Mull32Sx2: size = 2; break;
5059 default: vassert(0);
5060 }
5061 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5062 res, argL, argR, size, True));
5063 return res;
5064 }
5065
5066 case Iop_QDMulHi16Sx8:
5067 case Iop_QDMulHi32Sx4: {
5068 HReg res = newVRegV(env);
5069 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5070 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5071 UInt size = 0;
5072 switch(e->Iex.Binop.op) {
5073 case Iop_QDMulHi16Sx8: size = 1; break;
5074 case Iop_QDMulHi32Sx4: size = 2; break;
5075 default: vassert(0);
5076 }
5077 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5078 res, argL, argR, size, True));
5079 return res;
5080 }
5081
5082 case Iop_QRDMulHi16Sx8:
5083 case Iop_QRDMulHi32Sx4: {
5084 HReg res = newVRegV(env);
5085 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5086 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5087 UInt size = 0;
5088 switch(e->Iex.Binop.op) {
5089 case Iop_QRDMulHi16Sx8: size = 1; break;
5090 case Iop_QRDMulHi32Sx4: size = 2; break;
5091 default: vassert(0);
5092 }
5093 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5094 res, argL, argR, size, True));
5095 return res;
5096 }
5097
5098 case Iop_QDMulLong16Sx4:
5099 case Iop_QDMulLong32Sx2: {
5100 HReg res = newVRegV(env);
5101 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5102 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5103 UInt size = 0;
5104 switch(e->Iex.Binop.op) {
5105 case Iop_QDMulLong16Sx4: size = 1; break;
5106 case Iop_QDMulLong32Sx2: size = 2; break;
5107 default: vassert(0);
5108 }
5109 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5110 res, argL, argR, size, True));
5111 return res;
5112 }
5113 case Iop_PolynomialMul8x16: {
5114 HReg res = newVRegV(env);
5115 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5116 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5117 UInt size = 0;
5118 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5119 res, argL, argR, size, True));
5120 return res;
5121 }
5122 case Iop_Max32Fx4: {
5123 HReg res = newVRegV(env);
5124 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5125 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5126 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5127 res, argL, argR, 2, True));
5128 return res;
5129 }
5130 case Iop_Min32Fx4: {
5131 HReg res = newVRegV(env);
5132 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5133 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5134 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5135 res, argL, argR, 2, True));
5136 return res;
5137 }
5138 case Iop_PwMax32Fx4: {
5139 HReg res = newVRegV(env);
5140 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5141 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5142 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5143 res, argL, argR, 2, True));
5144 return res;
5145 }
5146 case Iop_PwMin32Fx4: {
5147 HReg res = newVRegV(env);
5148 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5149 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5150 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5151 res, argL, argR, 2, True));
5152 return res;
5153 }
5154 case Iop_CmpGT32Fx4: {
5155 HReg res = newVRegV(env);
5156 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5157 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5158 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5159 res, argL, argR, 2, True));
5160 return res;
5161 }
5162 case Iop_CmpGE32Fx4: {
5163 HReg res = newVRegV(env);
5164 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5165 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5166 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5167 res, argL, argR, 2, True));
5168 return res;
5169 }
5170 case Iop_CmpEQ32Fx4: {
5171 HReg res = newVRegV(env);
5172 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5173 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5174 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5175 res, argL, argR, 2, True));
5176 return res;
5177 }
5178
5179 case Iop_PolynomialMull8x8: {
5180 HReg res = newVRegV(env);
5181 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5182 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5183 UInt size = 0;
5184 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5185 res, argL, argR, size, True));
5186 return res;
5187 }
5188 case Iop_F32ToFixed32Ux4_RZ:
5189 case Iop_F32ToFixed32Sx4_RZ:
5190 case Iop_Fixed32UToF32x4_RN:
5191 case Iop_Fixed32SToF32x4_RN: {
5192 HReg res = newVRegV(env);
5193 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5194 ARMNeonUnOp op;
5195 UInt imm6;
5196 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5197 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5198 vpanic("ARM supports FP <-> Fixed conversion with constant "
5199 "second argument less than 33 only\n");
5200 }
5201 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5202 vassert(imm6 <= 32 && imm6 > 0);
5203 imm6 = 64 - imm6;
5204 switch(e->Iex.Binop.op) {
5205 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5206 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5207 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5208 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5209 default: vassert(0);
5210 }
5211 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5212 return res;
5213 }
5214 /*
5215 FIXME remove if not used
5216 case Iop_VDup8x16:
5217 case Iop_VDup16x8:
5218 case Iop_VDup32x4: {
5219 HReg res = newVRegV(env);
5220 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5221 UInt imm4;
5222 UInt index;
5223 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5224 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5225 vpanic("ARM supports Iop_VDup with constant "
5226 "second argument less than 16 only\n");
5227 }
5228 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5229 switch(e->Iex.Binop.op) {
5230 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5231 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5232 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5233 default: vassert(0);
5234 }
5235 if (imm4 >= 16) {
5236 vpanic("ARM supports Iop_VDup with constant "
5237 "second argument less than 16 only\n");
5238 }
5239 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5240 res, argL, imm4, True));
5241 return res;
5242 }
5243 */
5244 case Iop_PwAdd8x16:
5245 case Iop_PwAdd16x8:
5246 case Iop_PwAdd32x4: {
5247 HReg res = newVRegV(env);
5248 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5249 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5250 UInt size = 0;
5251 switch(e->Iex.Binop.op) {
5252 case Iop_PwAdd8x16: size = 0; break;
5253 case Iop_PwAdd16x8: size = 1; break;
5254 case Iop_PwAdd32x4: size = 2; break;
5255 default: vassert(0);
5256 }
5257 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5258 res, argL, argR, size, True));
5259 return res;
5260 }
5261 /* ... */
5262 default:
5263 break;
5264 }
5265 }
5266
5267 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005268 IRTriop *triop = e->Iex.Triop.details;
5269
5270 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00005271 case Iop_ExtractV128: {
5272 HReg res = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00005273 HReg argL = iselNeonExpr(env, triop->arg1);
5274 HReg argR = iselNeonExpr(env, triop->arg2);
sewardj6c60b322010-08-22 12:48:28 +00005275 UInt imm4;
florian420bfa92012-06-02 20:29:22 +00005276 if (triop->arg3->tag != Iex_Const ||
5277 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00005278 vpanic("ARM target supports Iop_ExtractV128 with constant "
5279 "third argument less than 16 only\n");
5280 }
florian420bfa92012-06-02 20:29:22 +00005281 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
sewardj6c60b322010-08-22 12:48:28 +00005282 if (imm4 >= 16) {
5283 vpanic("ARM target supports Iop_ExtractV128 with constant "
5284 "third argument less than 16 only\n");
5285 }
5286 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5287 res, argL, argR, imm4, True));
5288 return res;
5289 }
5290 default:
5291 break;
5292 }
5293 }
5294
5295 if (e->tag == Iex_Mux0X) {
5296 HReg r8;
5297 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5298 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5299 HReg dst = newVRegV(env);
5300 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5301 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5302 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5303 ARMRI84_I84(0xFF,0)));
5304 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5305 return dst;
5306 }
5307
5308 neon_expr_bad:
5309 ppIRExpr(e);
5310 vpanic("iselNeonExpr_wrk");
5311}
5312
5313/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +00005314/*--- ISEL: Floating point expressions (64 bit) ---*/
5315/*---------------------------------------------------------*/
5316
5317/* Compute a 64-bit floating point value into a register, the identity
5318 of which is returned. As with iselIntExpr_R, the reg may be either
5319 real or virtual; in any case it must not be changed by subsequent
5320 code emitted by the caller. */
5321
5322static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5323{
5324 HReg r = iselDblExpr_wrk( env, e );
5325# if 0
5326 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5327# endif
5328 vassert(hregClass(r) == HRcFlt64);
5329 vassert(hregIsVirtual(r));
5330 return r;
5331}
5332
5333/* DO NOT CALL THIS DIRECTLY */
5334static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5335{
5336 IRType ty = typeOfIRExpr(env->type_env,e);
5337 vassert(e);
5338 vassert(ty == Ity_F64);
5339
5340 if (e->tag == Iex_RdTmp) {
5341 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5342 }
5343
5344 if (e->tag == Iex_Const) {
5345 /* Just handle the zero case. */
5346 IRConst* con = e->Iex.Const.con;
5347 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5348 HReg z32 = newVRegI(env);
5349 HReg dst = newVRegD(env);
5350 addInstr(env, ARMInstr_Imm32(z32, 0));
5351 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5352 return dst;
5353 }
5354 }
5355
5356 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5357 ARMAModeV* am;
5358 HReg res = newVRegD(env);
5359 vassert(e->Iex.Load.ty == Ity_F64);
5360 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5361 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5362 return res;
5363 }
5364
5365 if (e->tag == Iex_Get) {
5366 // XXX This won't work if offset > 1020 or is not 0 % 4.
5367 // In which case we'll have to generate more longwinded code.
5368 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5369 HReg res = newVRegD(env);
5370 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5371 return res;
5372 }
5373
5374 if (e->tag == Iex_Unop) {
5375 switch (e->Iex.Unop.op) {
5376 case Iop_ReinterpI64asF64: {
sewardjc6f970f2012-04-02 21:54:49 +00005377 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005378 return iselNeon64Expr(env, e->Iex.Unop.arg);
5379 } else {
5380 HReg srcHi, srcLo;
5381 HReg dst = newVRegD(env);
5382 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5383 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5384 return dst;
5385 }
sewardj6c299f32009-12-31 18:00:12 +00005386 }
5387 case Iop_NegF64: {
5388 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5389 HReg dst = newVRegD(env);
5390 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5391 return dst;
5392 }
5393 case Iop_AbsF64: {
5394 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5395 HReg dst = newVRegD(env);
5396 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5397 return dst;
5398 }
5399 case Iop_F32toF64: {
5400 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5401 HReg dst = newVRegD(env);
5402 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5403 return dst;
5404 }
5405 case Iop_I32UtoF64:
5406 case Iop_I32StoF64: {
5407 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5408 HReg f32 = newVRegF(env);
5409 HReg dst = newVRegD(env);
5410 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5411 /* VMOV f32, src */
5412 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5413 /* FSITOD dst, f32 */
5414 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5415 dst, f32));
5416 return dst;
5417 }
5418 default:
5419 break;
5420 }
5421 }
5422
5423 if (e->tag == Iex_Binop) {
5424 switch (e->Iex.Binop.op) {
5425 case Iop_SqrtF64: {
5426 /* first arg is rounding mode; we ignore it. */
5427 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5428 HReg dst = newVRegD(env);
5429 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5430 return dst;
5431 }
5432 default:
5433 break;
5434 }
5435 }
5436
5437 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005438 IRTriop *triop = e->Iex.Triop.details;
5439
5440 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005441 case Iop_DivF64:
5442 case Iop_MulF64:
5443 case Iop_AddF64:
5444 case Iop_SubF64: {
5445 ARMVfpOp op = 0; /*INVALID*/
florian420bfa92012-06-02 20:29:22 +00005446 HReg argL = iselDblExpr(env, triop->arg2);
5447 HReg argR = iselDblExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00005448 HReg dst = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00005449 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005450 case Iop_DivF64: op = ARMvfp_DIV; break;
5451 case Iop_MulF64: op = ARMvfp_MUL; break;
5452 case Iop_AddF64: op = ARMvfp_ADD; break;
5453 case Iop_SubF64: op = ARMvfp_SUB; break;
5454 default: vassert(0);
5455 }
5456 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5457 return dst;
5458 }
5459 default:
5460 break;
5461 }
5462 }
5463
5464 if (e->tag == Iex_Mux0X) {
5465 if (ty == Ity_F64
5466 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5467 HReg r8;
5468 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
5469 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
5470 HReg dst = newVRegD(env);
5471 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5472 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5473 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5474 ARMRI84_I84(0xFF,0)));
5475 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5476 return dst;
5477 }
5478 }
5479
5480 ppIRExpr(e);
5481 vpanic("iselDblExpr_wrk");
5482}
5483
5484
5485/*---------------------------------------------------------*/
5486/*--- ISEL: Floating point expressions (32 bit) ---*/
5487/*---------------------------------------------------------*/
5488
5489/* Compute a 64-bit floating point value into a register, the identity
5490 of which is returned. As with iselIntExpr_R, the reg may be either
5491 real or virtual; in any case it must not be changed by subsequent
5492 code emitted by the caller. */
5493
5494static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5495{
5496 HReg r = iselFltExpr_wrk( env, e );
5497# if 0
5498 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5499# endif
5500 vassert(hregClass(r) == HRcFlt32);
5501 vassert(hregIsVirtual(r));
5502 return r;
5503}
5504
5505/* DO NOT CALL THIS DIRECTLY */
5506static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5507{
5508 IRType ty = typeOfIRExpr(env->type_env,e);
5509 vassert(e);
5510 vassert(ty == Ity_F32);
5511
5512 if (e->tag == Iex_RdTmp) {
5513 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5514 }
5515
5516 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5517 ARMAModeV* am;
5518 HReg res = newVRegF(env);
5519 vassert(e->Iex.Load.ty == Ity_F32);
5520 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5521 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5522 return res;
5523 }
5524
5525 if (e->tag == Iex_Get) {
5526 // XXX This won't work if offset > 1020 or is not 0 % 4.
5527 // In which case we'll have to generate more longwinded code.
5528 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5529 HReg res = newVRegF(env);
5530 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5531 return res;
5532 }
5533
5534 if (e->tag == Iex_Unop) {
5535 switch (e->Iex.Unop.op) {
5536 case Iop_ReinterpI32asF32: {
5537 HReg dst = newVRegF(env);
5538 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5539 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5540 return dst;
5541 }
5542 case Iop_NegF32: {
5543 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5544 HReg dst = newVRegF(env);
5545 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5546 return dst;
5547 }
5548 case Iop_AbsF32: {
5549 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5550 HReg dst = newVRegF(env);
5551 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5552 return dst;
5553 }
5554 default:
5555 break;
5556 }
5557 }
5558
5559 if (e->tag == Iex_Binop) {
5560 switch (e->Iex.Binop.op) {
5561 case Iop_SqrtF32: {
5562 /* first arg is rounding mode; we ignore it. */
5563 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5564 HReg dst = newVRegF(env);
5565 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5566 return dst;
5567 }
5568 case Iop_F64toF32: {
5569 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5570 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5571 HReg valS = newVRegF(env);
5572 /* FCVTSD valS, valD */
5573 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5574 set_VFP_rounding_default(env);
5575 return valS;
5576 }
5577 default:
5578 break;
5579 }
5580 }
5581
5582 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005583 IRTriop *triop = e->Iex.Triop.details;
5584
5585 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005586 case Iop_DivF32:
5587 case Iop_MulF32:
5588 case Iop_AddF32:
5589 case Iop_SubF32: {
5590 ARMVfpOp op = 0; /*INVALID*/
florian420bfa92012-06-02 20:29:22 +00005591 HReg argL = iselFltExpr(env, triop->arg2);
5592 HReg argR = iselFltExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00005593 HReg dst = newVRegF(env);
florian420bfa92012-06-02 20:29:22 +00005594 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005595 case Iop_DivF32: op = ARMvfp_DIV; break;
5596 case Iop_MulF32: op = ARMvfp_MUL; break;
5597 case Iop_AddF32: op = ARMvfp_ADD; break;
5598 case Iop_SubF32: op = ARMvfp_SUB; break;
5599 default: vassert(0);
5600 }
5601 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5602 return dst;
5603 }
5604 default:
5605 break;
5606 }
5607 }
5608
5609 if (e->tag == Iex_Mux0X) {
5610 if (ty == Ity_F32
5611 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5612 HReg r8;
5613 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
5614 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
5615 HReg dst = newVRegF(env);
5616 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5617 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5618 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5619 ARMRI84_I84(0xFF,0)));
5620 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5621 return dst;
5622 }
5623 }
5624
5625 ppIRExpr(e);
5626 vpanic("iselFltExpr_wrk");
5627}
5628
cerioncee30312004-12-17 20:30:21 +00005629
5630/*---------------------------------------------------------*/
5631/*--- ISEL: Statements ---*/
5632/*---------------------------------------------------------*/
5633
5634static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5635{
5636 if (vex_traceflags & VEX_TRACE_VCODE) {
5637 vex_printf("\n-- ");
5638 ppIRStmt(stmt);
5639 vex_printf("\n");
5640 }
5641 switch (stmt->tag) {
5642
5643 /* --------- STORE --------- */
5644 /* little-endian write to memory */
sewardjaf1ceca2005-06-30 23:31:27 +00005645 case Ist_Store: {
sewardj6c299f32009-12-31 18:00:12 +00005646 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5647 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5648 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00005649
sewardj6c299f32009-12-31 18:00:12 +00005650 if (tya != Ity_I32 || end != Iend_LE)
5651 goto stmt_fail;
sewardjaf1ceca2005-06-30 23:31:27 +00005652
sewardj6c299f32009-12-31 18:00:12 +00005653 if (tyd == Ity_I32) {
5654 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5655 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
sewardjcfe046e2013-01-17 14:23:53 +00005656 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
sewardj6c299f32009-12-31 18:00:12 +00005657 return;
5658 }
5659 if (tyd == Ity_I16) {
5660 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5661 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
sewardjcfe046e2013-01-17 14:23:53 +00005662 addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5663 False/*!isLoad*/,
sewardj6c299f32009-12-31 18:00:12 +00005664 False/*!isSignedLoad*/, rD, am));
5665 return;
5666 }
5667 if (tyd == Ity_I8) {
5668 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5669 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
sewardjcfe046e2013-01-17 14:23:53 +00005670 addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
sewardj6c299f32009-12-31 18:00:12 +00005671 return;
5672 }
5673 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005674 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005675 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5676 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5677 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5678 } else {
5679 HReg rDhi, rDlo, rA;
5680 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5681 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
sewardjcfe046e2013-01-17 14:23:53 +00005682 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
sewardj6c60b322010-08-22 12:48:28 +00005683 ARMAMode1_RI(rA,4)));
sewardjcfe046e2013-01-17 14:23:53 +00005684 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
sewardj6c60b322010-08-22 12:48:28 +00005685 ARMAMode1_RI(rA,0)));
5686 }
sewardj6c299f32009-12-31 18:00:12 +00005687 return;
5688 }
5689 if (tyd == Ity_F64) {
5690 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5691 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5692 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5693 return;
5694 }
5695 if (tyd == Ity_F32) {
5696 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5697 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5698 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5699 return;
5700 }
sewardj6c60b322010-08-22 12:48:28 +00005701 if (tyd == Ity_V128) {
5702 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5703 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5704 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5705 return;
5706 }
cerioncee30312004-12-17 20:30:21 +00005707
sewardj6c299f32009-12-31 18:00:12 +00005708 break;
cerioncee30312004-12-17 20:30:21 +00005709 }
5710
sewardjcfe046e2013-01-17 14:23:53 +00005711 /* --------- CONDITIONAL STORE --------- */
5712 /* conditional little-endian write to memory */
5713 case Ist_StoreG: {
5714 IRStoreG* sg = stmt->Ist.StoreG.details;
5715 IRType tya = typeOfIRExpr(env->type_env, sg->addr);
5716 IRType tyd = typeOfIRExpr(env->type_env, sg->data);
5717 IREndness end = sg->end;
5718
5719 if (tya != Ity_I32 || end != Iend_LE)
5720 goto stmt_fail;
5721
5722 switch (tyd) {
5723 case Ity_I8:
5724 case Ity_I32: {
5725 HReg rD = iselIntExpr_R(env, sg->data);
5726 ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
5727 ARMCondCode cc = iselCondCode(env, sg->guard);
5728 addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5729 (cc, False/*!isLoad*/, rD, am));
5730 return;
5731 }
5732 case Ity_I16: {
5733 HReg rD = iselIntExpr_R(env, sg->data);
5734 ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
5735 ARMCondCode cc = iselCondCode(env, sg->guard);
5736 addInstr(env, ARMInstr_LdSt16(cc,
5737 False/*!isLoad*/,
5738 False/*!isSignedLoad*/, rD, am));
5739 return;
5740 }
5741 default:
5742 break;
5743 }
5744 break;
5745 }
5746
5747 /* --------- CONDITIONAL LOAD --------- */
5748 /* conditional little-endian load from memory */
5749 case Ist_LoadG: {
5750 IRLoadG* lg = stmt->Ist.LoadG.details;
5751 IRType tya = typeOfIRExpr(env->type_env, lg->addr);
5752 IREndness end = lg->end;
5753
5754 if (tya != Ity_I32 || end != Iend_LE)
5755 goto stmt_fail;
5756
5757 switch (lg->cvt) {
5758 case ILGop_8Uto32:
5759 case ILGop_Ident32: {
5760 HReg rAlt = iselIntExpr_R(env, lg->alt);
5761 ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
5762 HReg rD = lookupIRTemp(env, lg->dst);
5763 addInstr(env, mk_iMOVds_RR(rD, rAlt));
5764 ARMCondCode cc = iselCondCode(env, lg->guard);
5765 addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
5766 : ARMInstr_LdSt8U)
5767 (cc, True/*isLoad*/, rD, am));
5768 return;
5769 }
5770 case ILGop_16Sto32:
5771 case ILGop_16Uto32:
5772 case ILGop_8Sto32: {
5773 HReg rAlt = iselIntExpr_R(env, lg->alt);
5774 ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
5775 HReg rD = lookupIRTemp(env, lg->dst);
5776 addInstr(env, mk_iMOVds_RR(rD, rAlt));
5777 ARMCondCode cc = iselCondCode(env, lg->guard);
5778 if (lg->cvt == ILGop_8Sto32) {
5779 addInstr(env, ARMInstr_Ld8S(cc, rD, am));
5780 } else {
5781 vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
5782 Bool sx = lg->cvt == ILGop_16Sto32;
5783 addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
5784 }
5785 return;
5786 }
5787 default:
5788 break;
5789 }
5790 break;
5791 }
5792
cerioncee30312004-12-17 20:30:21 +00005793 /* --------- PUT --------- */
5794 /* write guest state, fixed offset */
5795 case Ist_Put: {
5796 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
cerioncee30312004-12-17 20:30:21 +00005797
cerioncee30312004-12-17 20:30:21 +00005798 if (tyd == Ity_I32) {
sewardj6c299f32009-12-31 18:00:12 +00005799 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5800 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
sewardjcfe046e2013-01-17 14:23:53 +00005801 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
sewardj6c299f32009-12-31 18:00:12 +00005802 return;
cerioncee30312004-12-17 20:30:21 +00005803 }
sewardj6c299f32009-12-31 18:00:12 +00005804 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005805 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005806 HReg addr = newVRegI(env);
5807 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5808 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5809 stmt->Ist.Put.offset));
5810 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5811 } else {
5812 HReg rDhi, rDlo;
5813 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5814 stmt->Ist.Put.offset + 0);
5815 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5816 stmt->Ist.Put.offset + 4);
5817 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
sewardjcfe046e2013-01-17 14:23:53 +00005818 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5819 rDhi, am4));
5820 addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5821 rDlo, am0));
sewardj6c60b322010-08-22 12:48:28 +00005822 }
sewardj6c299f32009-12-31 18:00:12 +00005823 return;
cerioncee30312004-12-17 20:30:21 +00005824 }
sewardj6c299f32009-12-31 18:00:12 +00005825 if (tyd == Ity_F64) {
5826 // XXX This won't work if offset > 1020 or is not 0 % 4.
5827 // In which case we'll have to generate more longwinded code.
5828 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5829 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5830 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5831 return;
cerioncee30312004-12-17 20:30:21 +00005832 }
sewardj6c299f32009-12-31 18:00:12 +00005833 if (tyd == Ity_F32) {
5834 // XXX This won't work if offset > 1020 or is not 0 % 4.
5835 // In which case we'll have to generate more longwinded code.
5836 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5837 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5838 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5839 return;
5840 }
sewardj6c60b322010-08-22 12:48:28 +00005841 if (tyd == Ity_V128) {
5842 HReg addr = newVRegI(env);
5843 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5844 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5845 stmt->Ist.Put.offset));
5846 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5847 return;
5848 }
cerioncee30312004-12-17 20:30:21 +00005849 break;
5850 }
5851
cerioncee30312004-12-17 20:30:21 +00005852 /* --------- TMP --------- */
5853 /* assign value to temporary */
sewardjdd40fdf2006-12-24 02:20:24 +00005854 case Ist_WrTmp: {
5855 IRTemp tmp = stmt->Ist.WrTmp.tmp;
cerioncee30312004-12-17 20:30:21 +00005856 IRType ty = typeOfIRTemp(env->type_env, tmp);
5857
5858 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005859 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5860 env, stmt->Ist.WrTmp.data);
5861 HReg dst = lookupIRTemp(env, tmp);
5862 addInstr(env, ARMInstr_Mov(dst,ri84));
cerioncee30312004-12-17 20:30:21 +00005863 return;
5864 }
sewardj6c299f32009-12-31 18:00:12 +00005865 if (ty == Ity_I1) {
5866 HReg dst = lookupIRTemp(env, tmp);
5867 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5868 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5869 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5870 return;
5871 }
5872 if (ty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005873 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005874 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5875 HReg dst = lookupIRTemp(env, tmp);
5876 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5877 } else {
5878 HReg rHi, rLo, dstHi, dstLo;
5879 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5880 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5881 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5882 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5883 }
sewardj6c299f32009-12-31 18:00:12 +00005884 return;
5885 }
5886 if (ty == Ity_F64) {
5887 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5888 HReg dst = lookupIRTemp(env, tmp);
5889 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5890 return;
5891 }
5892 if (ty == Ity_F32) {
5893 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5894 HReg dst = lookupIRTemp(env, tmp);
5895 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5896 return;
5897 }
sewardj6c60b322010-08-22 12:48:28 +00005898 if (ty == Ity_V128) {
5899 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5900 HReg dst = lookupIRTemp(env, tmp);
5901 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5902 return;
5903 }
cerioncee30312004-12-17 20:30:21 +00005904 break;
5905 }
5906
5907 /* --------- Call to DIRTY helper --------- */
5908 /* call complex ("dirty") helper function */
5909 case Ist_Dirty: {
sewardj6c299f32009-12-31 18:00:12 +00005910 IRDirty* d = stmt->Ist.Dirty.details;
5911 Bool passBBP = False;
cerioncee30312004-12-17 20:30:21 +00005912
5913 if (d->nFxState == 0)
5914 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00005915
5916 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
cerioncee30312004-12-17 20:30:21 +00005917
sewardjcfe046e2013-01-17 14:23:53 +00005918 /* Figure out the return type, if any. */
5919 IRType retty = Ity_INVALID;
5920 if (d->tmp != IRTemp_INVALID)
5921 retty = typeOfIRTemp(env->type_env, d->tmp);
5922
5923 /* Marshal args, do the call, clear stack, set the return value
5924 to 0x555..555 if this is a conditional call that returns a
5925 value and the call is skipped. We need to set the ret-loc
5926 correctly in order to implement the IRDirty semantics that
5927 the return value is 0x555..555 if the call doesn't happen. */
5928 RetLoc rloc = RetLocINVALID;
5929 switch (retty) {
5930 case Ity_INVALID: /* function doesn't return anything */
5931 rloc = RetLocNone; break;
5932 case Ity_I64:
5933 rloc = RetLoc2Int; break;
5934 case Ity_I32: case Ity_I16: case Ity_I8:
5935 rloc = RetLocInt; break;
5936 default:
5937 break;
5938 }
5939 if (rloc == RetLocINVALID)
5940 break; /* will go to stmt_fail: */
5941
5942 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args, rloc );
sewardj6c299f32009-12-31 18:00:12 +00005943 if (!ok)
5944 break; /* will go to stmt_fail: */
cerioncee30312004-12-17 20:30:21 +00005945
5946 /* Now figure out what to do with the returned value, if any. */
5947 if (d->tmp == IRTemp_INVALID)
sewardj6c299f32009-12-31 18:00:12 +00005948 /* No return value. Nothing to do. */
5949 return;
cerioncee30312004-12-17 20:30:21 +00005950
sewardj6c299f32009-12-31 18:00:12 +00005951 if (retty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005952 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005953 HReg tmp = lookupIRTemp(env, d->tmp);
5954 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5955 hregARM_R0()));
5956 } else {
5957 HReg dstHi, dstLo;
5958 /* The returned value is in r1:r0. Park it in the
5959 register-pair associated with tmp. */
5960 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5961 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5962 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5963 }
cerioncee30312004-12-17 20:30:21 +00005964 return;
5965 }
sewardj6c299f32009-12-31 18:00:12 +00005966 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5967 /* The returned value is in r0. Park it in the register
5968 associated with tmp. */
5969 HReg dst = lookupIRTemp(env, d->tmp);
5970 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5971 return;
5972 }
5973
cerioncee30312004-12-17 20:30:21 +00005974 break;
5975 }
5976
sewardj6c299f32009-12-31 18:00:12 +00005977 /* --------- Load Linked and Store Conditional --------- */
5978 case Ist_LLSC: {
5979 if (stmt->Ist.LLSC.storedata == NULL) {
5980 /* LL */
5981 IRTemp res = stmt->Ist.LLSC.result;
5982 IRType ty = typeOfIRTemp(env->type_env, res);
sewardjff7f5b72011-07-11 11:43:38 +00005983 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005984 Int szB = 0;
5985 HReg r_dst = lookupIRTemp(env, res);
5986 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5987 switch (ty) {
5988 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005989 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005990 case Ity_I32: szB = 4; break;
5991 default: vassert(0);
5992 }
sewardjff7f5b72011-07-11 11:43:38 +00005993 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
sewardj6c299f32009-12-31 18:00:12 +00005994 addInstr(env, ARMInstr_LdrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005995 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
sewardj6c299f32009-12-31 18:00:12 +00005996 return;
5997 }
sewardjff7f5b72011-07-11 11:43:38 +00005998 if (ty == Ity_I64) {
5999 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6000 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6001 addInstr(env, ARMInstr_LdrEX(8));
6002 /* Result is in r3:r2. On a non-NEON capable CPU, we must
6003 move it into a result register pair. On a NEON capable
6004 CPU, the result register will be a 64 bit NEON
6005 register, so we must move it there instead. */
sewardjc6f970f2012-04-02 21:54:49 +00006006 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardjff7f5b72011-07-11 11:43:38 +00006007 HReg dst = lookupIRTemp(env, res);
6008 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6009 hregARM_R2()));
6010 } else {
6011 HReg r_dst_hi, r_dst_lo;
6012 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6013 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6014 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6015 }
6016 return;
6017 }
6018 /*NOTREACHED*/
6019 vassert(0);
sewardj6c299f32009-12-31 18:00:12 +00006020 } else {
6021 /* SC */
sewardj6c299f32009-12-31 18:00:12 +00006022 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
sewardjff7f5b72011-07-11 11:43:38 +00006023 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6024 Int szB = 0;
6025 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6026 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
sewardj6c299f32009-12-31 18:00:12 +00006027 switch (tyd) {
6028 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00006029 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00006030 case Ity_I32: szB = 4; break;
6031 default: vassert(0);
6032 }
sewardjff7f5b72011-07-11 11:43:38 +00006033 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6034 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
sewardj6c299f32009-12-31 18:00:12 +00006035 addInstr(env, ARMInstr_StrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00006036 } else {
6037 vassert(tyd == Ity_I64);
6038 /* This is really ugly. There is no is/is-not NEON
6039 decision akin to the case for LL, because iselInt64Expr
6040 fudges this for us, and always gets the result into two
6041 GPRs even if this means moving it from a NEON
6042 register. */
6043 HReg rDhi, rDlo;
6044 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6045 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6046 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6047 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6048 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6049 addInstr(env, ARMInstr_StrEX(8));
sewardj6c299f32009-12-31 18:00:12 +00006050 }
sewardjff7f5b72011-07-11 11:43:38 +00006051 /* now r0 is 1 if failed, 0 if success. Change to IR
6052 conventions (0 is fail, 1 is success). Also transfer
6053 result to r_res. */
6054 IRTemp res = stmt->Ist.LLSC.result;
6055 IRType ty = typeOfIRTemp(env->type_env, res);
6056 HReg r_res = lookupIRTemp(env, res);
6057 ARMRI84* one = ARMRI84_I84(1,0);
6058 vassert(ty == Ity_I1);
6059 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6060 /* And be conservative -- mask off all but the lowest bit */
6061 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6062 return;
sewardj6c299f32009-12-31 18:00:12 +00006063 }
6064 break;
6065 }
6066
sewardj412098c2010-05-04 08:48:43 +00006067 /* --------- MEM FENCE --------- */
6068 case Ist_MBE:
6069 switch (stmt->Ist.MBE.event) {
6070 case Imbe_Fence:
sewardj6d615ba2011-09-26 16:19:43 +00006071 addInstr(env, ARMInstr_MFence());
6072 return;
6073 case Imbe_CancelReservation:
6074 addInstr(env, ARMInstr_CLREX());
sewardj412098c2010-05-04 08:48:43 +00006075 return;
6076 default:
6077 break;
6078 }
6079 break;
6080
sewardj6c299f32009-12-31 18:00:12 +00006081 /* --------- INSTR MARK --------- */
6082 /* Doesn't generate any executable code ... */
6083 case Ist_IMark:
6084 return;
6085
6086 /* --------- NO-OP --------- */
6087 case Ist_NoOp:
6088 return;
6089
cerioncee30312004-12-17 20:30:21 +00006090 /* --------- EXIT --------- */
cerioncee30312004-12-17 20:30:21 +00006091 case Ist_Exit: {
cerioncee30312004-12-17 20:30:21 +00006092 if (stmt->Ist.Exit.dst->tag != Ico_U32)
6093 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00006094
6095 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
6096 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
6097 stmt->Ist.Exit.offsIP);
6098
6099 /* Case: boring transfer to known address */
6100 if (stmt->Ist.Exit.jk == Ijk_Boring
6101 || stmt->Ist.Exit.jk == Ijk_Call
6102 || stmt->Ist.Exit.jk == Ijk_Ret) {
6103 if (env->chainingAllowed) {
6104 /* .. almost always true .. */
6105 /* Skip the event check at the dst if this is a forwards
6106 edge. */
6107 Bool toFastEP
6108 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
6109 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6110 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6111 amR15T, cc, toFastEP));
6112 } else {
6113 /* .. very occasionally .. */
6114 /* We can't use chaining, so ask for an assisted transfer,
6115 as that's the only alternative that is allowable. */
6116 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6117 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6118 }
6119 return;
6120 }
6121
6122 /* Case: assisted transfer to arbitrary address */
6123 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006124 /* Keep this list in sync with that in iselNext below */
6125 case Ijk_ClientReq:
sewardjc6f970f2012-04-02 21:54:49 +00006126 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00006127 case Ijk_NoRedir:
6128 case Ijk_Sys_syscall:
sewardjbc9b8722012-08-08 21:02:20 +00006129 case Ijk_TInval:
sewardjc6f970f2012-04-02 21:54:49 +00006130 {
6131 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6132 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6133 stmt->Ist.Exit.jk));
6134 return;
6135 }
6136 default:
6137 break;
6138 }
6139
6140 /* Do we ever expect to see any other kind? */
6141 goto stmt_fail;
cerioncee30312004-12-17 20:30:21 +00006142 }
6143
6144 default: break;
6145 }
sewardjaf1ceca2005-06-30 23:31:27 +00006146 stmt_fail:
cerioncee30312004-12-17 20:30:21 +00006147 ppIRStmt(stmt);
6148 vpanic("iselStmt");
6149}
6150
6151
6152/*---------------------------------------------------------*/
6153/*--- ISEL: Basic block terminators (Nexts) ---*/
6154/*---------------------------------------------------------*/
6155
sewardjc6f970f2012-04-02 21:54:49 +00006156static void iselNext ( ISelEnv* env,
6157 IRExpr* next, IRJumpKind jk, Int offsIP )
cerioncee30312004-12-17 20:30:21 +00006158{
sewardj6c299f32009-12-31 18:00:12 +00006159 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00006160 vex_printf( "\n-- PUT(%d) = ", offsIP);
6161 ppIRExpr( next );
6162 vex_printf( "; exit-");
sewardj6c299f32009-12-31 18:00:12 +00006163 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00006164 vex_printf( "\n");
sewardj6c299f32009-12-31 18:00:12 +00006165 }
sewardjc6f970f2012-04-02 21:54:49 +00006166
6167 /* Case: boring transfer to known address */
6168 if (next->tag == Iex_Const) {
6169 IRConst* cdst = next->Iex.Const.con;
6170 vassert(cdst->tag == Ico_U32);
6171 if (jk == Ijk_Boring || jk == Ijk_Call) {
6172 /* Boring transfer to known address */
6173 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6174 if (env->chainingAllowed) {
6175 /* .. almost always true .. */
6176 /* Skip the event check at the dst if this is a forwards
6177 edge. */
6178 Bool toFastEP
6179 = ((Addr64)cdst->Ico.U32) > env->max_ga;
6180 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6181 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6182 amR15T, ARMcc_AL,
6183 toFastEP));
6184 } else {
6185 /* .. very occasionally .. */
6186 /* We can't use chaining, so ask for an assisted transfer,
6187 as that's the only alternative that is allowable. */
6188 HReg r = iselIntExpr_R(env, next);
6189 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6190 Ijk_Boring));
6191 }
6192 return;
6193 }
6194 }
6195
6196 /* Case: call/return (==boring) transfer to any address */
6197 switch (jk) {
6198 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6199 HReg r = iselIntExpr_R(env, next);
6200 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6201 if (env->chainingAllowed) {
6202 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6203 } else {
6204 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6205 Ijk_Boring));
6206 }
6207 return;
6208 }
6209 default:
6210 break;
6211 }
6212
sewardj2f6902b2012-04-23 09:48:14 +00006213 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00006214 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006215 /* Keep this list in sync with that for Ist_Exit above */
6216 case Ijk_ClientReq:
6217 case Ijk_NoDecode:
sewardjc6f970f2012-04-02 21:54:49 +00006218 case Ijk_NoRedir:
sewardj2f6902b2012-04-23 09:48:14 +00006219 case Ijk_Sys_syscall:
sewardja0d8eb82012-09-03 21:48:42 +00006220 case Ijk_TInval:
sewardjc6f970f2012-04-02 21:54:49 +00006221 {
6222 HReg r = iselIntExpr_R(env, next);
6223 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6224 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6225 return;
6226 }
6227 default:
6228 break;
6229 }
6230
6231 vex_printf( "\n-- PUT(%d) = ", offsIP);
6232 ppIRExpr( next );
6233 vex_printf( "; exit-");
6234 ppIRJumpKind(jk);
6235 vex_printf( "\n");
6236 vassert(0); // are we expecting any other kind?
cerioncee30312004-12-17 20:30:21 +00006237}
6238
6239
6240/*---------------------------------------------------------*/
6241/*--- Insn selector top-level ---*/
6242/*---------------------------------------------------------*/
6243
sewardjdd40fdf2006-12-24 02:20:24 +00006244/* Translate an entire SB to arm code. */
cerioncee30312004-12-17 20:30:21 +00006245
sewardjc6f970f2012-04-02 21:54:49 +00006246HInstrArray* iselSB_ARM ( IRSB* bb,
6247 VexArch arch_host,
6248 VexArchInfo* archinfo_host,
6249 VexAbiInfo* vbi/*UNUSED*/,
6250 Int offs_Host_EvC_Counter,
6251 Int offs_Host_EvC_FailAddr,
6252 Bool chainingAllowed,
6253 Bool addProfInc,
6254 Addr64 max_ga )
cerioncee30312004-12-17 20:30:21 +00006255{
sewardjc6f970f2012-04-02 21:54:49 +00006256 Int i, j;
6257 HReg hreg, hregHI;
6258 ISelEnv* env;
6259 UInt hwcaps_host = archinfo_host->hwcaps;
6260 ARMAMode1 *amCounter, *amFailAddr;
cerioncee30312004-12-17 20:30:21 +00006261
sewardj6c299f32009-12-31 18:00:12 +00006262 /* sanity ... */
6263 vassert(arch_host == VexArchARM);
sewardj6c60b322010-08-22 12:48:28 +00006264
sewardjcfe046e2013-01-17 14:23:53 +00006265 /* guard against unexpected space regressions */
6266 vassert(sizeof(ARMInstr) <= 28);
6267
sewardj6c60b322010-08-22 12:48:28 +00006268 /* hwcaps should not change from one ISEL call to another. */
sewardjc6f970f2012-04-02 21:54:49 +00006269 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
cerioncee30312004-12-17 20:30:21 +00006270
sewardj6c299f32009-12-31 18:00:12 +00006271 /* Make up an initial environment to use. */
6272 env = LibVEX_Alloc(sizeof(ISelEnv));
6273 env->vreg_ctr = 0;
6274
6275 /* Set up output code array. */
6276 env->code = newHInstrArray();
cerioncee30312004-12-17 20:30:21 +00006277
sewardj6c299f32009-12-31 18:00:12 +00006278 /* Copy BB's type env. */
6279 env->type_env = bb->tyenv;
cerioncee30312004-12-17 20:30:21 +00006280
sewardj6c299f32009-12-31 18:00:12 +00006281 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6282 change as we go along. */
6283 env->n_vregmap = bb->tyenv->types_used;
6284 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6285 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
cerioncee30312004-12-17 20:30:21 +00006286
sewardjc6f970f2012-04-02 21:54:49 +00006287 /* and finally ... */
6288 env->chainingAllowed = chainingAllowed;
6289 env->hwcaps = hwcaps_host;
6290 env->max_ga = max_ga;
6291
sewardj6c299f32009-12-31 18:00:12 +00006292 /* For each IR temporary, allocate a suitably-kinded virtual
6293 register. */
6294 j = 0;
6295 for (i = 0; i < env->n_vregmap; i++) {
6296 hregHI = hreg = INVALID_HREG;
6297 switch (bb->tyenv->types[i]) {
6298 case Ity_I1:
6299 case Ity_I8:
6300 case Ity_I16:
6301 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
sewardj6c60b322010-08-22 12:48:28 +00006302 case Ity_I64:
sewardjc6f970f2012-04-02 21:54:49 +00006303 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00006304 hreg = mkHReg(j++, HRcFlt64, True);
sewardj6c60b322010-08-22 12:48:28 +00006305 } else {
6306 hregHI = mkHReg(j++, HRcInt32, True);
6307 hreg = mkHReg(j++, HRcInt32, True);
6308 }
6309 break;
sewardj6c299f32009-12-31 18:00:12 +00006310 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
6311 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
sewardj06122e72011-03-28 12:14:48 +00006312 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardj6c299f32009-12-31 18:00:12 +00006313 default: ppIRType(bb->tyenv->types[i]);
6314 vpanic("iselBB: IRTemp type");
6315 }
6316 env->vregmap[i] = hreg;
6317 env->vregmapHI[i] = hregHI;
6318 }
6319 env->vreg_ctr = j;
cerioncee30312004-12-17 20:30:21 +00006320
sewardjc6f970f2012-04-02 21:54:49 +00006321 /* The very first instruction must be an event check. */
6322 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6323 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6324 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6325
6326 /* Possibly a block counter increment (for profiling). At this
6327 point we don't know the address of the counter, so just pretend
6328 it is zero. It will have to be patched later, but before this
6329 translation is used, by a call to LibVEX_patchProfCtr. */
6330 if (addProfInc) {
6331 addInstr(env, ARMInstr_ProfInc());
6332 }
cerioncee30312004-12-17 20:30:21 +00006333
sewardj6c299f32009-12-31 18:00:12 +00006334 /* Ok, finally we can iterate over the statements. */
6335 for (i = 0; i < bb->stmts_used; i++)
sewardjc6f970f2012-04-02 21:54:49 +00006336 iselStmt(env, bb->stmts[i]);
sewardj6c299f32009-12-31 18:00:12 +00006337
sewardjc6f970f2012-04-02 21:54:49 +00006338 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardj6c299f32009-12-31 18:00:12 +00006339
6340 /* record the number of vregs we used. */
6341 env->code->n_vregs = env->vreg_ctr;
6342 return env->code;
cerioncee30312004-12-17 20:30:21 +00006343}
6344
6345
cerioncee30312004-12-17 20:30:21 +00006346/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00006347/*--- end host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00006348/*---------------------------------------------------------------*/